diff --git a/CHANGELOG.md b/CHANGELOG.md index fc60f7d..54d1aad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,23 @@ version line is kept in lock-step with the underlying SKaiNET engine The format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.32.1] — 2026-06-26 + +Fixes streaming detokenization — generated text no longer runs words together +(`"the process"` → `"theprocess"`). Ships against engine **0.32.4**. + +### Fixed + +- **Per-token streaming decode preserves word-boundary spaces.** `SentencePieceSpecialTokens.decode(Int)` + and `UpstreamTokenizerAdapter.decode(Int)` now route through the engine's new `Tokenizer.decodeToken(id)` + (engine 0.32.4), which keeps each SentencePiece piece's leading space instead of stripping it per token + (the sequence-level `addSpacePrefix` strip is only correct once per sequence). Fixes correct-but-spaceless + output in streaming generation (kllama, agent loops). Adds `SentencePieceSpecialTokensStreamingTest`. + +### Changed + +- **Engine pin `skainet 0.32.2 → 0.32.4`** (adds `Tokenizer.decodeToken`). + ## [0.32.0] — 2026-06-25 Brings the real-GGUF **Llama** eager path up to the Gemma standard (packed diff --git a/README.md b/README.md index ffb998e..57b55f4 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,10 @@ Honest status — see the project-status note at the top of this README. ## Current release -The current release is **0.32.0** (against **SKaiNET 0.32.2**). It brings the -real-GGUF **Llama** eager path up to the Gemma standard and **unblocks StableHLO/IREE -export for Llama-family models**: +The current release is **0.32.1** (against **SKaiNET 0.32.4**). It fixes streaming +detokenization — per-token decode now keeps each word's leading space, so generated text +no longer runs together (`"the process"` not `"theprocess"`). On top of the **0.32.0** +real-GGUF **Llama** eager + StableHLO/IREE export work: - The eager **`NATIVE_OPTIMIZED` path now works for Llama** (`Q4_K`/`Q6_K`): weights stay packed and `LlamaNetworkLoader.fromGguf(NATIVE_OPTIMIZED) + OptimizedLLMRuntime` decodes @@ -123,7 +124,7 @@ The recommended way to consume is via the BOM. It pins every published `skainet- ```kotlin dependencies { - implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0")) + implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1")) // Versions resolved from the BOM: implementation("sk.ainet.transformers:skainet-transformers-core") @@ -201,6 +202,14 @@ try (KLlamaSession session = KLlamaJava.loadGGUF(modelPath, /* systemPrompt */ n See `llm-test/llm-test-java/src/test/java/.../KLlamaJavaToolCallingTest.java` for a runnable reference. +## What's new in 0.32.1 + +- **Streaming detokenization keeps word spaces.** A generation loop decoding one token at a time + (`tokenizer.decode(tokenId)`) no longer runs words together. `SentencePieceSpecialTokens` and + `UpstreamTokenizerAdapter` route `decode(Int)` through engine 0.32.4's `Tokenizer.decodeToken`, + which preserves each SentencePiece piece's leading space (llama.cpp `token_to_piece` semantics). + Engine pin `0.32.2 → 0.32.4`. + ## What's new in 0.32.0 - **Eager `NATIVE_OPTIMIZED` for real-GGUF Llama.** `LlamaNetworkLoader.fromGguf(NATIVE_OPTIMIZED)` diff --git a/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc b/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc index 530d47f..786839d 100644 --- a/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc +++ b/docs/modules/ROOT/pages/tutorials/getting-started-java.adoc @@ -25,7 +25,7 @@ In your `build.gradle.kts`: [source,kotlin] ---- dependencies { - implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0")) + implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1")) implementation("sk.ainet.transformers:skainet-transformers-runtime-kllama") implementation("sk.ainet.transformers:skainet-transformers-agent") @@ -41,7 +41,7 @@ Or in Maven (Maven needs the `-jvm` classifier suffix on platform artifacts): sk.ainet.transformers skainet-transformers-bom - 0.32.0 + 0.32.1 pom import diff --git a/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc b/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc index fbe5d0d..498d4e0 100644 --- a/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc +++ b/docs/modules/ROOT/pages/tutorials/llama3-tool-calling.adoc @@ -52,7 +52,7 @@ The pieces you need live in three modules: [source,kotlin] ---- dependencies { - implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.0")) + implementation(platform("sk.ainet.transformers:skainet-transformers-bom:0.32.1")) implementation("sk.ainet.transformers:skainet-transformers-runtime-kllama") implementation("sk.ainet.transformers:skainet-transformers-agent") diff --git a/gradle.properties b/gradle.properties index eb1c82f..beac568 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ GROUP=sk.ainet.transformers -VERSION_NAME=0.32.0 +VERSION_NAME=0.32.1 POM_DESCRIPTION=SKaiNET-transformers diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a14a71c..4cc57ed 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,5 +1,5 @@ [versions] -skainet = "0.32.2" +skainet = "0.32.4" agp = "9.2.1" jacksonDatabind = "2.22.0" jsonSchemaValidator = "3.0.5" diff --git a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt index 3663e4b..1358b78 100644 --- a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt +++ b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokens.kt @@ -107,7 +107,9 @@ public class SentencePieceSpecialTokens( override fun decode(token: Int): String { val special = specialIdToString[token] if (special != null) return special - return base.decode(intArrayOf(token)) + // Streaming single-token decode: keep the leading word-boundary space so + // generated tokens don't run together ("the process" not "theprocess"). + return base.decodeToken(token) } private fun matchSpecialAt(text: String, from: Int): String? { diff --git a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt index 58440c1..389264d 100644 --- a/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt +++ b/llm-core/src/commonMain/kotlin/sk/ainet/apps/llm/tokenizer/UpstreamTokenizerAdapter.kt @@ -33,5 +33,5 @@ internal class UpstreamTokenizerAdapter( override fun decode(tokens: IntArray): String = delegate.decode(tokens) - override fun decode(token: Int): String = delegate.decode(intArrayOf(token)) + override fun decode(token: Int): String = delegate.decodeToken(token) } diff --git a/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt b/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt new file mode 100644 index 0000000..62d75c2 --- /dev/null +++ b/llm-core/src/commonTest/kotlin/sk/ainet/apps/llm/tokenizer/SentencePieceSpecialTokensStreamingTest.kt @@ -0,0 +1,47 @@ +package sk.ainet.apps.llm.tokenizer + +import sk.ainet.io.tokenizer.SentencePieceTokenizer +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFalse + +/** + * Streaming detokenization regression: a generation loop appends one decoded + * token at a time (`response.append(tokenizer.decode(tokenId))`). Each + * SentencePiece piece carries its own leading word-boundary space, so + * per-token decode must NOT strip it — otherwise words run together + * (`"the process"` → `"theprocess"`). See `SentencePieceTokenizer.decodeToken`. + */ +class SentencePieceSpecialTokensStreamingTest { + + private fun toy(): SentencePieceSpecialTokens { + // Minimal vocab: control tokens + two ▁-prefixed word pieces. + val tokens = listOf("", "", "", "▁Hello", "▁world") + val scores = List(tokens.size) { 0.0f } + val base = SentencePieceTokenizer( + tokens = tokens, + scores = scores, + unknownTokenId = 0, + bosTokenId = 1, + eosTokenId = 2, + addSpacePrefix = true, + ) + return SentencePieceSpecialTokens(base, specialTokens = emptyMap()) + } + + @Test + fun `streaming per-token decode preserves word spaces`() { + val tok = toy() + val ids = intArrayOf(3, 4) // ▁Hello, ▁world + + val streamed = buildString { for (id in ids) append(tok.decode(id)) } + assertEquals(" Hello world", streamed) + assertFalse(streamed.contains("Helloworld"), "words must not run together") + } + + @Test + fun `batch decode still strips the single leading space`() { + val tok = toy() + assertEquals("Hello world", tok.decode(intArrayOf(3, 4))) + } +}