diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 927ecfef1..c578261de 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -11,7 +11,7 @@ import * as os from 'os'; import { CodeGraph } from '../src'; import { extractFromSource, scanDirectory, buildDefaultIgnore } from '../src/extraction'; import { detectLanguage, isLanguageSupported, getSupportedLanguages, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars'; -import { stripCppTemplateArgs } from '../src/extraction/languages/c-cpp'; +import { stripCppTemplateArgs, blankCppExportMacros } from '../src/extraction/languages/c-cpp'; import { normalizePath } from '../src/utils'; beforeAll(async () => { @@ -2665,13 +2665,16 @@ std::unique_ptr makeWidget() { return nullptr; } }); }); - describe('C++ macro-prefixed class/struct misparse (#946)', () => { - // An export/visibility macro before the class name plus a base clause - // (`class MACRO Name : public Base { … }`) makes tree-sitter read `class - // MACRO` as an elaborated type and the whole declaration as a - // function_definition named after the class, spanning the entire body — a - // phantom `function` that polluted callers/impact/blast-radius. It's dropped. - it('does not mint a phantom function for a macro-annotated class that inherits', () => { + describe('C++ macro-prefixed class/struct recovery (#946)', () => { + // An export/visibility macro before the class name (`class MACRO Name : public + // Base { … }`, the UE `MYMODULE_API` convention) makes tree-sitter — which has + // no preprocessor — read `class MACRO` as an elaborated type and the whole + // declaration as a function_definition named after the class. The class node + // and its base/`extends` edge were lost (the phantom function was dropped), + // so inheritance-based queries returned nothing for these types. `preParse` + // now blanks the macro with equal-length spaces, so the class parses normally + // and keeps both its node and `extends` edge. + it('recovers the class node AND its extends edge for a macro-annotated class', () => { const code = `#pragma once #define MAPCORE_EXPORT __attribute__((visibility("default"))) @@ -2693,17 +2696,30 @@ public: expect(detectLanguage('provider.h', code)).toBe('cpp'); const result = extractFromSource('provider.h', code); - // The misparse used to surface as `function | LocalDataProvider` spanning - // the whole class body — a false caller in the graph. It's gone now. + // The macro-annotated type is now a real class node, not a phantom function + // spanning the whole body. + const local = result.nodes.find((n) => n.name === 'LocalDataProvider'); + expect(local?.kind).toBe('class'); expect( result.nodes.find((n) => n.name === 'LocalDataProvider' && n.kind === 'function') ).toBeUndefined(); // The sibling class without the macro is unaffected — still a class. - expect(result.nodes.find((n) => n.name === 'DataProvider')?.kind).toBe('class'); + const base = result.nodes.find((n) => n.name === 'DataProvider'); + expect(base?.kind).toBe('class'); + + // The whole point of #946: the base/`extends` edge survives, so subclass / + // type-hierarchy / inheritance-impact queries work for UE-style classes. + const extendsRef = result.unresolvedReferences.find( + (r) => + r.referenceKind === 'extends' && + r.referenceName === 'DataProvider' && + r.fromNodeId === local?.id + ); + expect(extendsRef, 'macro-annotated class should carry its `extends Base` edge').toBeDefined(); }); - it('drops the struct variant too, without dropping a genuine class', () => { + it('recovers the struct variant too, without disturbing a genuine class', () => { const code = ` #define API __declspec(dllexport) struct API Widget : public Base { int x; }; @@ -2711,15 +2727,61 @@ class Plain : public Base { public: int y; }; `; const result = extractFromSource('widget.cpp', code); - // `struct MACRO Name : Base { … }` misparses the same way — no phantom function. + // `struct MACRO Name : Base { … }` is recovered into a real struct node + // with its `extends` edge — not a phantom function, and not dropped. + const widget = result.nodes.find((n) => n.name === 'Widget'); + expect(widget?.kind).toBe('struct'); expect( result.nodes.find((n) => n.name === 'Widget' && n.kind === 'function') ).toBeUndefined(); - - // A normal class with a base clause and no macro must still be a class — the - // drop is precise, not a blanket "class with inheritance" filter. + expect( + result.unresolvedReferences.some( + (r) => + r.referenceKind === 'extends' && + r.referenceName === 'Base' && + r.fromNodeId === widget?.id + ) + ).toBe(true); + + // A normal class with a base clause and no macro is untouched — still a class. expect(result.nodes.find((n) => n.name === 'Plain')?.kind).toBe('class'); }); + + it('blankCppExportMacros only touches the macro token, preserving every byte offset', () => { + // Replacement is equal-length spaces on the same line, so length and the + // offset of every other symbol stay exactly as they were. + const before = 'class MYGAME_API UFoo : public UObject {};'; + const after = blankCppExportMacros(before); + expect(after.length).toBe(before.length); // equal-length → offsets preserved + expect(after.includes('MYGAME_API')).toBe(false); // macro blanked + expect(after.indexOf('UFoo')).toBe(before.indexOf('UFoo')); // name offset unchanged + expect(after).toMatch(/^class +UFoo : public UObject \{\};$/); // only spaces where the macro was + + // struct variant (body only, no base clause) and a `final` specifier are + // both recognized as definition headers and stripped. + const structOut = blankCppExportMacros('struct CORE_API Bar { int x; };'); + expect(structOut.includes('CORE_API')).toBe(false); + expect(structOut).toMatch(/^struct +Bar \{ int x; \};$/); + + const finalOut = blankCppExportMacros('class LIB_API Baz final : public Q {};'); + expect(finalOut.includes('LIB_API')).toBe(false); + expect(finalOut).toMatch(/^class +Baz final : public Q \{\};$/); + }); + + it('blankCppExportMacros leaves valid (non-macro) declarations alone', () => { + // One identifier after `class` → no macro to strip. + expect(blankCppExportMacros('class Foo : public Bar {};')).toBe('class Foo : public Bar {};'); + // ALL-CAPS *class name* (not a macro) with a base clause — the type name is + // followed directly by `:`, so the two-identifier rule doesn't fire. + expect(blankCppExportMacros('class FOO : public Bar {};')).toBe('class FOO : public Bar {};'); + // Template parameter `class T` and `enum class` keyword usage are untouched. + expect(blankCppExportMacros('template class Holder {};')).toBe( + 'template class Holder {};' + ); + expect(blankCppExportMacros('enum class Color { Red, Green };')).toBe( + 'enum class Color { Red, Green };' + ); + }); }); describe('C++ templated base-class inheritance (#1043)', () => { diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts index 793c6c51e..0b0e674fe 100644 --- a/src/extraction/languages/c-cpp.ts +++ b/src/extraction/languages/c-cpp.ts @@ -209,7 +209,58 @@ function isMacroMisparsedTypeDecl(node: SyntaxNode): boolean { return true; } +/** + * Blank a leading export-annotation macro in a `class`/`struct` *definition* + * header so tree-sitter parses the type normally. Runs as `preParse`, before + * the grammar sees the source. + * + * Unreal Engine — and many C++ libraries — annotate every exported type with an + * ALL-CAPS macro between the `class`/`struct` keyword and the type name: + * + * class MYMODULE_API UMyComponent : public UActorComponent { ... }; + * + * tree-sitter has no preprocessor, so it reads `class MYMODULE_API` as an + * elaborated type specifier and the rest as a function — the whole declaration + * surfaces as a `function_definition` that {@link isMacroMisparsedTypeDecl} + * then drops as unrecoverable (#946). Both the class node AND its base + * (`extends`) edge are lost, so "find subclasses / type hierarchy / impact + * through inheritance" return nothing for these types — effectively every + * gameplay class in a UE project. + * + * Blanking the macro token with EQUAL-LENGTH spaces — the same offset-preserving + * trick {@link blankCsharpPreprocessorDirectives} uses (#237) — rewrites the + * header to `class UMyComponent : public UActorComponent`, which + * parses as an ordinary `class_specifier` with a `base_class_clause`. The class + * node is then indexed and the existing base-clause extraction emits the + * `extends` edge, with every symbol's line/column unchanged (only spaces swap + * in for the macro on the same line). + * + * Scope is deliberately tight so valid code is never touched: + * - the macro must be ALL-CAPS (>= 2 chars) — the export-macro convention; + * - it must sit between `class`/`struct` and the type name (two identifiers in + * a row, which a genuine definition never has — the first MUST be a macro); + * - the type name must be followed by `final`, a base clause (`:`) or the body + * (`{`), i.e. a real definition. So `class Foo {` (one identifier), + * `template`, `enum class E {`, and an ALL-CAPS class name with no + * macro (`class FOO : public Bar`) are all left alone. + * + * Function-like export macros (`class MACRO(x) Name`) and macros containing + * lowercase letters aren't matched here — they still fall through to the + * {@link isMacroMisparsedTypeDecl} drop path (no node, but no regression). + */ +export function blankCppExportMacros(source: string): string { + if (source.indexOf('class') === -1 && source.indexOf('struct') === -1) return source; + return source.replace( + /\b(class|struct)(\s+)([A-Z][A-Z0-9_]+)(?=\s+[A-Za-z_]\w*\s*(?:final\b|[:{]))/g, + (_m, kw: string, ws: string, macro: string) => kw + ws + ' '.repeat(macro.length) + ); +} + export const cppExtractor: LanguageExtractor = { + // Strip UE-style `*_API` (and similar ALL-CAPS) export macros from class/struct + // headers before parsing, so macro-annotated types keep their node + `extends` + // edge instead of being dropped as a misparsed function (#946). + preParse: blankCppExportMacros, functionTypes: ['function_definition'], classTypes: ['class_specifier'], methodTypes: ['function_definition'],