Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Fixes

- C++ `class Foo;` forward declarations are no longer indexed as classes, so a heavily used type is no longer buried under phantom copies of itself. A forward declaration is just a promise that a type exists — it has no body, members, or base classes — but CodeGraph still minted a full `class` node for each one. In a large C++/Unreal-Engine codebase a hot class such as `APXCharacter` is forward-declared (`class APXCharacter;`) at the top of dozens of headers, so the graph ended up with dozens of bodiless `APXCharacter` nodes competing with the single real definition; `codegraph_explore` then returned a spray of forward-declaration sites — and picked one as the blast-radius representative — while the actual definition (with its members and callers) was crowded out of the results. Bodiless class specifiers are now skipped, exactly as bodiless structs (#831) and enums already were, so only the real definition is indexed. The skip is gated to C/C++, where a bodiless class is always a forward declaration; languages in which a bodiless class is a complete definition (Kotlin `class Empty`, Scala) are unaffected. Thanks @luoyxy for the report and fix.

## [1.1.6] - 2026-06-30

Expand Down
51 changes: 51 additions & 0 deletions __tests__/cpp-forward-decl.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* C++ forward-declaration extraction.
*
* A `class Foo;` forward declaration parses as a bodiless `class_specifier`.
* It is NOT a definition, so it must not mint a `class` node — otherwise every
* forward decl repeated across dozens of headers creates a phantom `class Foo`
* that competes with, and in `codegraph_explore` results MASKS, the single real
* definition (structs and enums already skip their bodiless forms). Languages
* where a bodiless class IS a definition (Kotlin `class Empty`, Scala) must be
* unaffected — the skip is gated on the C/C++ extractor's `skipBodilessClass`.
*/
import { describe, it, expect, beforeAll } from 'vitest';
import { extractFromSource } from '../src/extraction';
import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';

beforeAll(async () => {
await initGrammars();
await loadAllGrammars();
});

describe('C++ forward-declaration handling', () => {
it('does NOT emit phantom class nodes for forward declarations', () => {
const res = extractFromSource('Fwd.h', `class APXCharacter;\nclass UFoo;\n`, 'cpp', []);
expect(res.nodes.filter((n) => n.kind === 'class').length).toBe(0);
});

it('still emits a class node for a real definition', () => {
const res = extractFromSource('Bar.h', `class Bar {\npublic:\n void doThing();\n};\n`, 'cpp', []);
expect(res.nodes.filter((n) => n.kind === 'class').map((c) => c.name)).toContain('Bar');
});

it('keeps only the real definition when a fwd decl precedes it', () => {
const src = `class APXCharacter;\n\nclass APXCharacter {\npublic:\n void run() {}\n};\n`;
const res = extractFromSource('Mix.h', src, 'cpp', []);
const chars = res.nodes.filter((n) => n.kind === 'class' && n.name === 'APXCharacter');
expect(chars.length).toBe(1);
// the surviving node is the definition — its inline member method is extracted
expect(res.nodes.filter((n) => n.kind === 'method' && n.name === 'run').length).toBe(1);
});

it('templated forward declaration is skipped too', () => {
const res = extractFromSource('T.h', `template<typename T> class TFoo;\n`, 'cpp', []);
expect(res.nodes.filter((n) => n.kind === 'class').length).toBe(0);
});

it('Kotlin bodiless class remains a real definition (no regression)', () => {
const res = extractFromSource('K.kt', `class Empty\nclass WithBody { fun f() {} }\n`, 'kotlin', []);
const names = res.nodes.filter((n) => n.kind === 'class').map((c) => c.name);
expect(names).toEqual(expect.arrayContaining(['Empty', 'WithBody']));
});
});
5 changes: 5 additions & 0 deletions src/extraction/languages/c-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ export const cppExtractor: LanguageExtractor = {
// Recover macro-annotated class/struct definitions (`class MYMODULE_API Foo : Base`)
// that tree-sitter otherwise misparses into a phantom function (#1061/#946).
preParse: blankCppExportMacros,
// A bodiless `class_specifier` in C++ is a forward declaration (`class Foo;`)
// or an elaborated type reference, never a definition — skip it so repeated
// forward decls across headers don't mint phantom class nodes that mask the
// real definition (matches the bodiless skip structs/enums already get).
skipBodilessClass: true,
functionTypes: ['function_definition'],
classTypes: ['class_specifier'],
methodTypes: ['function_definition'],
Expand Down
12 changes: 12 additions & 0 deletions src/extraction/tree-sitter-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,18 @@ export interface LanguageExtractor {
methodsAreTopLevel?: boolean;
/** NodeKind to use for interface-like declarations (Rust: 'trait'). Default: 'interface' */
interfaceKind?: NodeKind;
/**
* When true, a class node with no body is a forward declaration / elaborated
* type reference — NOT a definition — and is skipped, mirroring the bodiless
* skip already applied to structs (#831) and enums. In C++ a `class Foo;`
* forward declaration parses as a bodiless `class_specifier`; repeated across
* dozens of headers it mints one phantom `class Foo` node per header that
* competes with — and in `codegraph_explore` results MASKS — the single real
* definition (the phantom, bodiless nodes crowd out the one that carries the
* members and callers). Off by default because some languages (Kotlin `class
* Empty`, Scala) treat a bodiless class as a complete definition. C/C++-only.
*/
skipBodilessClass?: boolean;

// --- New hooks ---

Expand Down
14 changes: 12 additions & 2 deletions src/extraction/tree-sitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,18 @@ export class TreeSitterExtractor {
private extractClass(node: SyntaxNode, kind: NodeKind = 'class'): void {
if (!this.extractor) return;

// Resolve the body once up front so a bodiless class can be skipped before a
// node is minted (mirrors extractStruct/extractEnum). In C++ a `class Foo;`
// forward declaration is a bodiless `class_specifier`; without this guard
// every such forward decl — repeated across dozens of headers — mints a
// phantom `class Foo` node that competes with and MASKS the single real
// definition in query results (the real one carries the members/callers).
// Gated on `skipBodilessClass` (C/C++ only) because Kotlin/Scala treat a
// bodiless class as a complete definition.
let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
?? getChildByField(node, this.extractor.bodyField);
if (!body && this.extractor.skipBodilessClass) return;

const name = extractName(node, this.source, this.extractor);
const docstring = getPrecedingDocstring(node, this.source);
const visibility = this.extractor.getVisibility?.(node);
Expand All @@ -1551,8 +1563,6 @@ export class TreeSitterExtractor {

// Push to stack and visit body
this.nodeStack.push(classNode.id);
let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
?? getChildByField(node, this.extractor.bodyField);
if (!body) body = node;

// Visit all children for methods and properties
Expand Down