From f31fdb8858e6517870a6302286297cd0df342025 Mon Sep 17 00:00:00 2001 From: Fatur Alkahfi Date: Thu, 4 Jun 2026 23:52:00 +0700 Subject: [PATCH] fix(export): stream database dump via ReadableStream with LIMIT/OFFSET batching Fixes #59 - Database dumps do not work on large databases ## Problem The previous implementation accumulated the entire database dump as a string in memory before sending the HTTP response. This caused two failure modes on large databases: - Memory exhaustion (Durable Objects 1 GB cap) - Gateway timeout (Cloudflare 30s limit before first byte) ## Solution 1. Pre-flight metadata collection - fetch table names and schemas BEFORE opening the stream. Any DB error returns clean HTTP 500 instead of broken HTTP 200 with truncated body. 2. ReadableStream with LIMIT/OFFSET batching - rows fetched in pages of 1000 rows (BATCH_SIZE) and enqueued immediately. HTTP response starts flowing to client after first enqueue(), no more timeout issues. 3. Event-loop yielding - await new Promise(r => setTimeout(r, 0)) between batches and tables prevents Durable Object thread from blocking. 4. Correct value escaping: - null/undefined -> NULL - boolean -> 1 / 0 - number -> bare literal - Uint8Array -> X'hexstring' (BLOB) - string -> single-quoted with escaped quotes 5. Exclude internal sqlite_ tables from dump output. ## Tests (9 total, all passing) - Streaming response has correct Content-Type and Transfer-Encoding headers - Body contains CREATE TABLE + INSERT statements - Empty database produces valid SQL preamble/epilogue - Tables with no rows are schema-only - Single quotes in strings are properly escaped - NULL columns render as SQL NULL (new test) - Pagination across two LIMIT/OFFSET batches: 1000 + 1 = 1001 rows (new) - Pre-flight DB error returns HTTP 500, not HTTP 200 (new test) - sqlite_ internal tables excluded from dump (new test) --- src/export/dump.test.ts | 180 +++++++++++++++++++++++++++++++-------- src/export/dump.ts | 182 ++++++++++++++++++++++++++++++++-------- 2 files changed, 289 insertions(+), 73 deletions(-) diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts index ca65b43..26305ee 100644 --- a/src/export/dump.test.ts +++ b/src/export/dump.test.ts @@ -38,20 +38,35 @@ beforeEach(() => { } }) +// ─── Helper ────────────────────────────────────────────────────────────────── +// The pre-flight phase calls executeOperation in this order per table: +// 1. SELECT name FROM sqlite_master (table list) +// 2. For each table: SELECT sql FROM sqlite_master (schema) +// Then the streaming phase calls: +// 3. For each table: SELECT * FROM `t` LIMIT ? OFFSET ? (batches until empty) +// +// All mocks below follow this contract strictly. + describe('Database Dump Module', () => { - it('should return a database dump when tables exist', async () => { + // ── headers ────────────────────────────────────────────────────────────── + it('should return a streaming response with correct headers', async () => { vi.mocked(executeOperation) + // 1. table list .mockResolvedValueOnce([{ name: 'users' }, { name: 'orders' }]) + // 2a. users schema .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + { sql: 'CREATE TABLE users (id INTEGER, name TEXT)' }, ]) + // 2b. orders schema .mockResolvedValueOnce([ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' }, + { sql: 'CREATE TABLE orders (id INTEGER, total REAL)' }, ]) + // 3a. users batch 1 – partial (< 1000) → stream loop ends .mockResolvedValueOnce([ - { sql: 'CREATE TABLE orders (id INTEGER, total REAL);' }, + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, ]) + // 3b. orders batch 1 – partial → stream loop ends .mockResolvedValueOnce([ { id: 1, total: 99.99 }, { id: 2, total: 49.5 }, @@ -60,28 +75,53 @@ describe('Database Dump Module', () => { const response = await dumpDatabaseRoute(mockDataSource, mockConfig) expect(response).toBeInstanceOf(Response) + expect(response.status).toBe(200) expect(response.headers.get('Content-Type')).toBe( 'application/x-sqlite3' ) expect(response.headers.get('Content-Disposition')).toBe( 'attachment; filename="database_dump.sql"' ) + expect(response.headers.get('Transfer-Encoding')).toBe('chunked') - const dumpText = await response.text() - expect(dumpText).toContain( - 'CREATE TABLE users (id INTEGER, name TEXT);' - ) - expect(dumpText).toContain("INSERT INTO users VALUES (1, 'Alice');") - expect(dumpText).toContain("INSERT INTO users VALUES (2, 'Bob');") - expect(dumpText).toContain( - 'CREATE TABLE orders (id INTEGER, total REAL);' - ) - expect(dumpText).toContain('INSERT INTO orders VALUES (1, 99.99);') - expect(dumpText).toContain('INSERT INTO orders VALUES (2, 49.5);') + // Drain the body so the ReadableStream fully completes before the + // next test's beforeEach clears mocks (prevents cross-test contamination). + await response.text() + }) + + // ── body contents ───────────────────────────────────────────────────────── + it('should contain CREATE TABLE and INSERT statements in the streamed body', async () => { + vi.mocked(executeOperation) + // 1. table list (pre-flight call 1) + .mockResolvedValueOnce([{ name: 'users' }]) + // 2. users schema (pre-flight call 2) + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, name TEXT)' }, + ]) + // 3. rows – batch 1 (stream call 1): 2 rows < BATCH_SIZE=1000 → loop exits + .mockResolvedValueOnce([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + + // Await the full streamed body before asserting + const text = await response.text() + + expect(text).toContain('-- Table: users') + expect(text).toContain('CREATE TABLE users (id INTEGER, name TEXT)') + expect(text).toContain("INSERT INTO `users` VALUES (1, 'Alice');") + expect(text).toContain("INSERT INTO `users` VALUES (2, 'Bob');") + expect(text).toContain('BEGIN TRANSACTION;') + expect(text).toContain('COMMIT;') }) + // ── empty DB ────────────────────────────────────────────────────────────── it('should handle empty databases (no tables)', async () => { - vi.mocked(executeOperation).mockResolvedValueOnce([]) + vi.mocked(executeOperation) + // 1. table list returns nothing + .mockResolvedValueOnce([]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) @@ -89,57 +129,125 @@ describe('Database Dump Module', () => { expect(response.headers.get('Content-Type')).toBe( 'application/x-sqlite3' ) - const dumpText = await response.text() - expect(dumpText).toBe('SQLite format 3\0') + const text = await response.text() + expect(text).toContain('BEGIN TRANSACTION;') + expect(text).toContain('COMMIT;') }) - it('should handle databases with tables but no data', async () => { + // ── table with no rows ──────────────────────────────────────────────────── + it('should handle databases with tables but no rows', async () => { vi.mocked(executeOperation) + // 1. table list .mockResolvedValueOnce([{ name: 'users' }]) + // 2. schema .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + { sql: 'CREATE TABLE users (id INTEGER, name TEXT)' }, ]) + // 3. first batch: empty → loop ends immediately .mockResolvedValueOnce([]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + const text = await response.text() - expect(response).toBeInstanceOf(Response) - const dumpText = await response.text() - expect(dumpText).toContain( - 'CREATE TABLE users (id INTEGER, name TEXT);' - ) - expect(dumpText).not.toContain('INSERT INTO users VALUES') + expect(text).toContain('CREATE TABLE users') + expect(text).not.toContain('INSERT INTO `users`') }) + // ── string escaping ─────────────────────────────────────────────────────── it('should escape single quotes properly in string values', async () => { vi.mocked(executeOperation) .mockResolvedValueOnce([{ name: 'users' }]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, bio TEXT);' }, + { sql: 'CREATE TABLE users (id INTEGER, bio TEXT)' }, ]) .mockResolvedValueOnce([{ id: 1, bio: "Alice's adventure" }]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + const text = await response.text() - expect(response).toBeInstanceOf(Response) - const dumpText = await response.text() - expect(dumpText).toContain( - "INSERT INTO users VALUES (1, 'Alice''s adventure');" + expect(text).toContain( + "INSERT INTO `users` VALUES (1, 'Alice''s adventure');" ) }) - it('should return a 500 response when an error occurs', async () => { + // ── NULL handling ───────────────────────────────────────────────────────── + it('should write NULL for null column values', async () => { + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, bio TEXT)' }, + ]) + .mockResolvedValueOnce([{ id: 1, bio: null }]) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + const text = await response.text() + + expect(text).toContain('INSERT INTO `users` VALUES (1, NULL);') + }) + + // ── large-table pagination ──────────────────────────────────────────────── + it('should paginate large tables across multiple batches', async () => { + // First batch: exactly 1000 rows (triggers another fetch) + const fullBatch = Array.from({ length: 1000 }, (_, i) => ({ + id: i + 1, + name: `User ${i + 1}`, + })) + // Second batch: 1 row (< 1000 → loop ends) + const partialBatch = [{ id: 1001, name: 'User 1001' }] + + vi.mocked(executeOperation) + // 1. table list + .mockResolvedValueOnce([{ name: 'users' }]) + // 2. schema + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, name TEXT)' }, + ]) + // 3. batch 1 (offset 0): 1000 rows + .mockResolvedValueOnce(fullBatch) + // 4. batch 2 (offset 1000): 1 row → partial → loop ends + .mockResolvedValueOnce(partialBatch) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + const text = await response.text() + + const insertCount = (text.match(/INSERT INTO `users`/g) || []).length + expect(insertCount).toBe(1001) + }) + + // ── pre-flight error → HTTP 500 ─────────────────────────────────────────── + it('should return a 500 response when a pre-flight query fails', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(executeOperation).mockRejectedValue( + + // The very first call (table list) throws + vi.mocked(executeOperation).mockRejectedValueOnce( new Error('Database Error') ) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) expect(response.status).toBe(500) - const jsonResponse: { error: string } = await response.json() - expect(jsonResponse.error).toBe('Failed to create database dump') + const json: { error: string } = await response.json() + expect(json.error).toBe('Failed to create database dump') + + consoleErrorMock.mockRestore() + }) + + // ── sqlite_ tables excluded ─────────────────────────────────────────────── + it('should exclude sqlite_ internal tables from the dump', async () => { + // The SQL filter already prevents sqlite_ tables reaching us; + // mock reflects only user tables. + vi.mocked(executeOperation) + .mockResolvedValueOnce([{ name: 'users' }]) + .mockResolvedValueOnce([ + { sql: 'CREATE TABLE users (id INTEGER, name TEXT)' }, + ]) + .mockResolvedValueOnce([{ id: 1, name: 'Alice' }]) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig) + const text = await response.text() + + expect(text).not.toContain('sqlite_') }) }) diff --git a/src/export/dump.ts b/src/export/dump.ts index 91a2e89..0407d79 100644 --- a/src/export/dump.ts +++ b/src/export/dump.ts @@ -3,69 +3,177 @@ import { StarbaseDBConfiguration } from '../handler' import { DataSource } from '../types' import { createResponse } from '../utils' +/** Number of rows fetched per LIMIT/OFFSET batch. */ +const BATCH_SIZE = 1_000 + +/** + * Escape a column value for use inside a SQL VALUES clause. + * Handles: null/undefined, boolean, number, string, and Uint8Array (BLOB). + */ +function escapeSqlValue(value: unknown): string { + if (value === null || value === undefined) { + return 'NULL' + } + if (typeof value === 'boolean') { + return value ? '1' : '0' + } + if (typeof value === 'number') { + return String(value) + } + if (value instanceof Uint8Array) { + const hex = Array.from(value) + .map((b) => b.toString(16).padStart(2, '0')) + .join('') + return `X'${hex}'` + } + // Default: treat as text, escape single quotes + return `'${String(value).replace(/'/g, "''")}'` +} + +/** + * Stream a full SQL dump of the StarbaseDB database. + * + * Problem (issue #59): the old implementation fetched the entire database + * into a single string before returning a response. This caused: + * - Memory exhaustion on large databases (Durable Object 1 GB cap). + * - Gateway timeouts (30-second hard limit before the first byte is sent). + * + * Solution: + * 1. Pre-flight: collect table names & schemas *before* opening the stream + * so any DB error returns a clean HTTP 500 (not a broken HTTP 200). + * 2. Stream: rows are fetched in batches of BATCH_SIZE via LIMIT/OFFSET + * and pushed to a ReadableStream immediately — the HTTP response starts + * flowing to the client as soon as the first chunk is enqueued. + * 3. Yield: `await new Promise(r => setTimeout(r, 0))` between each batch + * lets the Durable Object's event loop breathe and avoids blocking. + */ export async function dumpDatabaseRoute( dataSource: DataSource, config: StarbaseDBConfiguration ): Promise { + // ── Pre-flight: collect metadata before opening the stream ──────────── + // Any error here produces a clean HTTP 500 rather than a mid-stream abort. + let tablesMeta: Array<{ name: string; schema: string }> = [] + try { - // Get all table names const tablesResult = await executeOperation( - [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }], + [ + { + sql: `SELECT name FROM sqlite_master + WHERE type = 'table' + AND name NOT LIKE 'sqlite_%' + ORDER BY name;`, + }, + ], dataSource, config ) - const tables = tablesResult.map((row: any) => row.name) - let dumpContent = 'SQLite format 3\0' // SQLite file header + const tableNames: string[] = tablesResult.map((row: any) => row.name) - // Iterate through all tables - for (const table of tables) { - // Get table schema + for (const name of tableNames) { const schemaResult = await executeOperation( [ { - sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, + sql: `SELECT sql FROM sqlite_master + WHERE type = 'table' AND name = ?;`, + params: [name], }, ], dataSource, config ) - if (schemaResult.length) { - const schema = schemaResult[0].sql - dumpContent += `\n-- Table: ${table}\n${schema};\n\n` - } + const schema: string = + schemaResult.length > 0 && schemaResult[0].sql + ? (schemaResult[0].sql as string) + : '' - // Get table data - const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${table};` }], - dataSource, - config - ) + tablesMeta.push({ name, schema }) + } + } catch (error: any) { + console.error('Database Dump Error (pre-flight):', error) + return createResponse(undefined, 'Failed to create database dump', 500) + } + + // ── Stream: push rows in chunks without holding them all in memory ───── + const encoder = new TextEncoder() - for (const row of dataResult) { - const values = Object.values(row).map((value) => - typeof value === 'string' - ? `'${value.replace(/'/g, "''")}'` - : value + const readable = new ReadableStream({ + async start(controller) { + const enqueue = (text: string) => + controller.enqueue(encoder.encode(text)) + + try { + enqueue( + `-- StarbaseDB SQL Dump\n-- Generated: ${new Date().toISOString()}\n\n` + + `PRAGMA foreign_keys = OFF;\nBEGIN TRANSACTION;\n\n` ) - dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` - } - dumpContent += '\n' - } + for (const { name, schema } of tablesMeta) { + if (schema) { + enqueue( + `-- Table: ${name}\n` + + `DROP TABLE IF EXISTS \`${name}\`;\n` + + `${schema};\n\n` + ) + } + + // Paginate through rows in BATCH_SIZE chunks + let offset = 0 + while (true) { + const rows = await executeOperation( + [ + { + sql: `SELECT * FROM \`${name}\` LIMIT ? OFFSET ?;`, + params: [BATCH_SIZE, offset], + }, + ], + dataSource, + config + ) + + if (!rows || rows.length === 0) break + + for (const row of rows) { + const values = Object.values(row) + .map(escapeSqlValue) + .join(', ') + enqueue(`INSERT INTO \`${name}\` VALUES (${values});\n`) + } + + offset += rows.length + + // Done when last page was a partial batch + if (rows.length < BATCH_SIZE) break - // Create a Blob from the dump content - const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' }) + // Yield event loop so the Durable Object does not stall + await new Promise((resolve) => + setTimeout(resolve, 0) + ) + } - const headers = new Headers({ + enqueue('\n') + + // Yield between tables + await new Promise((resolve) => setTimeout(resolve, 0)) + } + + enqueue('COMMIT;\n') + controller.close() + } catch (streamError: any) { + console.error('Database Dump Stream Error:', streamError) + controller.error(streamError) + } + }, + }) + + return new Response(readable, { + headers: new Headers({ 'Content-Type': 'application/x-sqlite3', 'Content-Disposition': 'attachment; filename="database_dump.sql"', - }) - - return new Response(blob, { headers }) - } catch (error: any) { - console.error('Database Dump Error:', error) - return createResponse(undefined, 'Failed to create database dump', 500) - } + 'Transfer-Encoding': 'chunked', + 'Cache-Control': 'no-store', + }), + }) }