diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc60f7d..54d1aad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,23 @@ version line is kept in lock-step with the underlying SKaiNET engine
The format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.32.1] — 2026-06-26
+
+Fixes streaming detokenization — generated text no longer runs words together
+(`"the process"` → `"theprocess"`). Ships against engine **0.32.4**.
+
+### Fixed
+
+- **Per-token streaming decode preserves word-boundary spaces.** `SentencePieceSpecialTokens.decode(Int)`
+ and `UpstreamTokenizerAdapter.decode(Int)` now route through the engine's new `Tokenizer.decodeToken(id)`
+ (engine 0.32.4), which keeps each SentencePiece piece's leading space instead of stripping it per token
+ (the sequence-level `addSpacePrefix` strip is only correct once per sequence). Fixes correct-but-spaceless
+ output in streaming generation (kllama, agent loops). Adds `SentencePieceSpecialTokensStreamingTest`.
+
+### Changed
+
+- **Engine pin `skainet 0.32.2 → 0.32.4`** (adds `Tokenizer.decodeToken`).
+
## [0.32.0] — 2026-06-25
Brings the real-GGUF **Llama** eager path up to the Gemma standard (packed
diff --git a/README.md b/README.md
index ffb998e..57b55f4 100644
--- a/README.md
+++ b/README.md
@@ -103,9 +103,10 @@ Honest status — see the project-status note at the top of this README.
## Current release
-The current release is **0.32.0** (against **SKaiNET 0.32.2**). It brings the
-real-GGUF **Llama** eager path up to the Gemma standard and **unblocks StableHLO/IREE
-export for Llama-family models**:
+The current release is **0.32.1** (against **SKaiNET 0.32.4**). It fixes streaming
+detokenization — per-token decode now keeps each word's leading space, so generated text
+no longer runs together (`"the process"` not `"theprocess"`). On top of the **0.32.0**
+real-GGUF **Llama** eager + StableHLO/IREE export work:
- The eager **`NATIVE_OPTIMIZED` path now works for Llama** (`Q4_K`/`Q6_K`): weights stay
packed and `LlamaNetworkLoader.fromGguf(NATIVE_OPTIMIZED) + OptimizedLLMRuntime` decodes
@@ -123,7 +124,7 @@ The recommended way to consume is via the BOM. It pins every published `skainet-
```kotlin
dependencies {
- implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0"))
+ implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1"))
// Versions resolved from the BOM:
implementation("sk.ainet.transformers:skainet-transformers-core")
@@ -201,6 +202,14 @@ try (KLlamaSession session = KLlamaJava.loadGGUF(modelPath, /* systemPrompt */ n
See `llm-test/llm-test-java/src/test/java/.../KLlamaJavaToolCallingTest.java` for a runnable reference.
+## What's new in 0.32.1
+
+- **Streaming detokenization keeps word spaces.** A generation loop decoding one token at a time
+ (`tokenizer.decode(tokenId)`) no longer runs words together. `SentencePieceSpecialTokens` and
+ `UpstreamTokenizerAdapter` route `decode(Int)` through engine 0.32.4's `Tokenizer.decodeToken`,
+ which preserves each SentencePiece piece's leading space (llama.cpp `token_to_piece` semantics).
+ Engine pin `0.32.2 → 0.32.4`.
+
## What's new in 0.32.0
- **Eager `NATIVE_OPTIMIZED` for real-GGUF Llama.** `LlamaNetworkLoader.fromGguf(NATIVE_OPTIMIZED)`
diff --git a/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc b/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc
index 530d47f..786839d 100644
--- a/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc
+++ b/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc
@@ -25,7 +25,7 @@ In your `build.gradle.kts`:
[source,kotlin]
----
dependencies {
- implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0"))
+ implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1"))
implementation("sk.ainet.transformers:skainet-transformers-runtime-kllama")
implementation("sk.ainet.transformers:skainet-transformers-agent")
@@ -41,7 +41,7 @@ Or in Maven (Maven needs the `-jvm` classifier suffix on platform artifacts):
sk.ainet.transformers
skainet-transformers-bom
- 0.32.0
+ 0.32.1
pom
import
diff --git a/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc b/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc
index fbe5d0d..498d4e0 100644
--- a/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc
+++ b/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc
@@ -52,7 +52,7 @@ The pieces you need live in three modules:
[source,kotlin]
----
dependencies {
- implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0"))
+ implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1"))
implementation("sk.ainet.transformers:skainet-transformers-runtime-kllama")
implementation("sk.ainet.transformers:skainet-transformers-agent")
diff --git a/gradle.properties b/gradle.properties
index eb1c82f..beac568 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -1,5 +1,5 @@
GROUP=sk.ainet.transformers
-VERSION_NAME=0.32.0
+VERSION_NAME=0.32.1
POM_DESCRIPTION=SKaiNET-transformers
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index a14a71c..4cc57ed 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -1,5 +1,5 @@
[versions]
-skainet = "0.32.2"
+skainet = "0.32.4"
agp = "9.2.1"
jacksonDatabind = "2.22.0"
jsonSchemaValidator = "3.0.5"
diff --git a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt
index 3663e4b..1358b78 100644
--- a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt
+++ b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt
@@ -107,7 +107,9 @@ public class SentencePieceSpecialTokens(
override fun decode(token: Int): String {
val special = specialIdToString[token]
if (special != null) return special
- return base.decode(intArrayOf(token))
+ // Streaming single-token decode: keep the leading word-boundary space so
+ // generated tokens don't run together ("the process" not "theprocess").
+ return base.decodeToken(token)
}
private fun matchSpecialAt(text: String, from: Int): String? {
diff --git a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt
index 58440c1..389264d 100644
--- a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt
+++ b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt
@@ -33,5 +33,5 @@ internal class UpstreamTokenizerAdapter(
override fun decode(tokens: IntArray): String = delegate.decode(tokens)
- override fun decode(token: Int): String = delegate.decode(intArrayOf(token))
+ override fun decode(token: Int): String = delegate.decodeToken(token)
}
diff --git a/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt b/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt
new file mode 100644
index 0000000..62d75c2
--- /dev/null
+++ b/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt
@@ -0,0 +1,47 @@
+package sk.ainet.apps.llm.tokenizer
+
+import sk.ainet.io.tokenizer.SentencePieceTokenizer
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertFalse
+
+/**
+ * Streaming detokenization regression: a generation loop appends one decoded
+ * token at a time (`response.append(tokenizer.decode(tokenId))`). Each
+ * SentencePiece piece carries its own leading word-boundary space, so
+ * per-token decode must NOT strip it — otherwise words run together
+ * (`"the process"` → `"theprocess"`). See `SentencePieceTokenizer.decodeToken`.
+ */
+class SentencePieceSpecialTokensStreamingTest {
+
+ private fun toy(): SentencePieceSpecialTokens {
+ // Minimal vocab: control tokens + two ▁-prefixed word pieces.
+ val tokens = listOf("", "", "", "▁Hello", "▁world")
+ val scores = List(tokens.size) { 0.0f }
+ val base = SentencePieceTokenizer(
+ tokens = tokens,
+ scores = scores,
+ unknownTokenId = 0,
+ bosTokenId = 1,
+ eosTokenId = 2,
+ addSpacePrefix = true,
+ )
+ return SentencePieceSpecialTokens(base, specialTokens = emptyMap())
+ }
+
+ @Test
+ fun `streaming per-token decode preserves word spaces`() {
+ val tok = toy()
+ val ids = intArrayOf(3, 4) // ▁Hello, ▁world
+
+ val streamed = buildString { for (id in ids) append(tok.decode(id)) }
+ assertEquals(" Hello world", streamed)
+ assertFalse(streamed.contains("Helloworld"), "words must not run together")
+ }
+
+ @Test
+ fun `batch decode still strips the single leading space`() {
+ val tok = toy()
+ assertEquals("Hello world", tok.decode(intArrayOf(3, 4)))
+ }
+}