From cab9fa658f628ced75d4e22bd799701bce259011 Mon Sep 17 00:00:00 2001 From: Marvin Hagemeister Date: Wed, 6 May 2026 10:09:28 +0200 Subject: [PATCH 1/2] fix: potential memory leak due to growing string table Also make encoding faster. --- src/adapter/protocol/string-table.test.ts | 53 ++++++++++++++++++----- src/adapter/protocol/string-table.ts | 48 +++++++++++++++++--- 2 files changed, 85 insertions(+), 16 deletions(-) diff --git a/src/adapter/protocol/string-table.test.ts b/src/adapter/protocol/string-table.test.ts index dd9b178a..53ab1c96 100644 --- a/src/adapter/protocol/string-table.test.ts +++ b/src/adapter/protocol/string-table.test.ts @@ -1,4 +1,9 @@ -import { parseTable, flushTable } from "./string-table"; +import { + ENCODE_CACHE_LIMIT, + encode, + parseTable, + flushTable, +} from "./string-table"; import { expect } from "chai"; describe("StringTable", () => { @@ -9,15 +14,7 @@ describe("StringTable", () => { ["foo", 2], ]); expect(flushTable(table)).to.deep.equal([ - 8, - 3, - 97, - 98, - 99, - 3, - 102, - 111, - 111, + 8, 3, 97, 98, 99, 3, 102, 111, 111, ]); }); }); @@ -33,4 +30,40 @@ describe("StringTable", () => { expect(parseTable(data)).to.deep.equal(["abc", "foo"]); }); }); + + describe("encode", () => { + it("should preserve NUL characters", () => { + expect(parseTable([2, 1, ...encode("\0")])).to.deep.equal(["\0"]); + }); + + it("should reuse cached strings", () => { + const encoded = encode("cached-string"); + expect(encode("cached-string")).to.equal(encoded); + }); + + it("should evict the least recently used string", () => { + const prefix = "evict-lru"; + const encoded = encode(prefix); + + for (let i = 0; i < ENCODE_CACHE_LIMIT; i++) { + encode(`${prefix}-${i}`); + } + + expect(encode(prefix)).to.not.equal(encoded); + }); + + it("should refresh a string when it is read from the cache", () => { + const prefix = "refresh-lru"; + const encoded = encode(prefix); + + for (let i = 0; i < ENCODE_CACHE_LIMIT - 1; i++) { + encode(`${prefix}-${i}`); + } + + expect(encode(prefix)).to.equal(encoded); + encode(`${prefix}-overflow`); + + expect(encode(prefix)).to.equal(encoded); + }); + }); }); diff --git a/src/adapter/protocol/string-table.ts b/src/adapter/protocol/string-table.ts index c534d5f5..54ca7329 100644 --- a/src/adapter/protocol/string-table.ts +++ b/src/adapter/protocol/string-table.ts @@ -20,19 +20,55 @@ export function getStringId(table: StringTable, input: string): number { return table.get(input)!; } -// TODO: Use a proper LRU cache? -const encoded = new Map(); +export const ENCODE_CACHE_LIMIT = 1000; -const toCodePoint = (s: string) => s.codePointAt(0) || 124; // "|"" symbol; +class LRUCache { + private cache = new Map(); + + constructor(private limit: number) {} + + get(key: K): V | undefined { + const value = this.cache.get(key); + if (value === undefined) return undefined; + + this.cache.delete(key); + this.cache.set(key, value); + return value; + } + + set(key: K, value: V) { + if (this.cache.has(key)) { + this.cache.delete(key); + } + + this.cache.set(key, value); + + if (this.cache.size > this.limit) { + const oldest = this.cache.keys().next().value; + if (oldest !== undefined) { + this.cache.delete(oldest); + } + } + } +} + +const encoded = new LRUCache(ENCODE_CACHE_LIMIT); /** * Convert a string to an array of codepoints */ export function encode(input: string): number[] { - if (!encoded.has(input)) { - encoded.set(input, input.split("").map(toCodePoint)); + const cached = encoded.get(input); + if (cached !== undefined) { + return cached; + } + + const value = new Array(input.length); + for (let i = 0; i < input.length; i++) { + value[i] = input.charCodeAt(i); } - return encoded.get(input)!; + encoded.set(input, value); + return value; } /** From 4476d9dafecd4cb27e1396667349d862d7a6754c Mon Sep 17 00:00:00 2001 From: Marvin Hagemeister Date: Wed, 6 May 2026 10:16:07 +0200 Subject: [PATCH 2/2] chore: lru tests --- src/adapter/protocol/string-table.test.ts | 55 +++++++++++++++++++++++ src/adapter/protocol/string-table.ts | 2 +- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/adapter/protocol/string-table.test.ts b/src/adapter/protocol/string-table.test.ts index 53ab1c96..3dd1f042 100644 --- a/src/adapter/protocol/string-table.test.ts +++ b/src/adapter/protocol/string-table.test.ts @@ -1,5 +1,6 @@ import { ENCODE_CACHE_LIMIT, + LRUCache, encode, parseTable, flushTable, @@ -31,6 +32,60 @@ describe("StringTable", () => { }); }); + describe("LRUCache", () => { + it("should return undefined for missing entries", () => { + const cache = new LRUCache(2); + + expect(cache.get("missing")).to.equal(undefined); + }); + + it("should store and read entries", () => { + const cache = new LRUCache(2); + + cache.set("a", 1); + + expect(cache.get("a")).to.equal(1); + }); + + it("should evict the oldest entry when the limit is exceeded", () => { + const cache = new LRUCache(2); + + cache.set("a", 1); + cache.set("b", 2); + cache.set("c", 3); + + expect(cache.get("a")).to.equal(undefined); + expect(cache.get("b")).to.equal(2); + expect(cache.get("c")).to.equal(3); + }); + + it("should refresh recency when an entry is read", () => { + const cache = new LRUCache(2); + + cache.set("a", 1); + cache.set("b", 2); + expect(cache.get("a")).to.equal(1); + cache.set("c", 3); + + expect(cache.get("a")).to.equal(1); + expect(cache.get("b")).to.equal(undefined); + expect(cache.get("c")).to.equal(3); + }); + + it("should refresh recency when an existing entry is written", () => { + const cache = new LRUCache(2); + + cache.set("a", 1); + cache.set("b", 2); + cache.set("a", 3); + cache.set("c", 4); + + expect(cache.get("a")).to.equal(3); + expect(cache.get("b")).to.equal(undefined); + expect(cache.get("c")).to.equal(4); + }); + }); + describe("encode", () => { it("should preserve NUL characters", () => { expect(parseTable([2, 1, ...encode("\0")])).to.deep.equal(["\0"]); diff --git a/src/adapter/protocol/string-table.ts b/src/adapter/protocol/string-table.ts index 54ca7329..1d620c0b 100644 --- a/src/adapter/protocol/string-table.ts +++ b/src/adapter/protocol/string-table.ts @@ -22,7 +22,7 @@ export function getStringId(table: StringTable, input: string): number { export const ENCODE_CACHE_LIMIT = 1000; -class LRUCache { +export class LRUCache { private cache = new Map(); constructor(private limit: number) {}