Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,9 @@ What it skips out of the box:
- **Anything in your `.gitignore`** — honored in git repos via git, and in
non-git projects by reading `.gitignore` directly (root and nested).
- **Files larger than 1 MB** — generated bundles, minified JS, vendored blobs.
Override the threshold (in bytes) with `CODEGRAPH_MAX_FILE_SIZE` for repos with
legitimately large hand-written sources — e.g. `CODEGRAPH_MAX_FILE_SIZE=5242880`
for 5 MB. Invalid or non-positive values fall back to the 1 MB default.

To keep something else out, add it to `.gitignore`. To pull a default-excluded
directory back **in** (say you really do want a vendored dependency indexed),
Expand Down
34 changes: 34 additions & 0 deletions __tests__/resolve-max-file-size.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { afterEach, describe, expect, it } from 'vitest';
import { resolveMaxFileSize } from '../src/extraction';

const DEFAULT = 1024 * 1024;
const ENV = 'CODEGRAPH_MAX_FILE_SIZE';

describe('resolveMaxFileSize', () => {
afterEach(() => {
delete process.env[ENV];
});

it('falls back to the 1 MB default when the env var is unset', () => {
delete process.env[ENV];
expect(resolveMaxFileSize()).toBe(DEFAULT);
});

it('honours a valid positive override', () => {
process.env[ENV] = String(5 * 1024 * 1024);
expect(resolveMaxFileSize()).toBe(5 * 1024 * 1024);
});

it('floors fractional byte counts', () => {
process.env[ENV] = '2097152.9';
expect(resolveMaxFileSize()).toBe(2 * 1024 * 1024);
});

it.each(['', 'not-a-number', '0', '-1', 'NaN', 'Infinity'])(
'falls back to the default for invalid value %j',
(raw) => {
process.env[ENV] = raw;
expect(resolveMaxFileSize()).toBe(DEFAULT);
},
);
});
18 changes: 17 additions & 1 deletion src/extraction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,23 @@ export function hashContent(content: string): string {
* vendored blobs blow the WASM heap and the worker-recycle budget for no useful
* symbols. 1 MB covers essentially all hand-written source.
*/
const MAX_FILE_SIZE = 1024 * 1024;
const DEFAULT_MAX_FILE_SIZE = 1024 * 1024;

/**
* Resolve the max-file-size threshold (bytes), allowing an override via the
* `CODEGRAPH_MAX_FILE_SIZE` environment variable for repos with legitimately
* large hand-written sources. Falls back to the 1 MB default when the value is
* unset, non-numeric, or not a positive number.
*/
export function resolveMaxFileSize(): number {
const raw = process.env.CODEGRAPH_MAX_FILE_SIZE;
if (raw === undefined || raw === '') return DEFAULT_MAX_FILE_SIZE;
const parsed = Number(raw);
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_MAX_FILE_SIZE;
return Math.floor(parsed);
}

const MAX_FILE_SIZE = resolveMaxFileSize();

/**
* Directory names that are dependency, build, cache, or tooling output across the
Expand Down