diff --git a/CHANGELOG.md b/CHANGELOG.md index 629173953..64849b15c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### New Features + +- CodeGraph now indexes **Fortran** (`.f90`, `.f95`, `.f03`, `.f08`, `.f18`, and legacy fixed-form `.f`, `.for`, `.f77`, `.ftn`). It extracts modules and programs, subroutines and functions (including `CONTAINS`-block procedures), derived types with their components, generic interfaces, `PARAMETER` constants, and `use` imports — wired with `calls`, `imports`, `contains`, and `extends` edges so `callers`, `callees`, `impact`, and `explore` work across Fortran codebases. Modern free-form is the primary target; legacy fixed-form is recognized but parses less robustly. +- Fortran type-bound procedures are first-class: `PROCEDURE :: Integrate => CpgIntegrate` bindings (including `GENERIC` aliases and `DEFERRED` bindings) become real methods on their derived type, `CALL obj%method()` call sites link to the right binding via the receiver's declared `CLASS(...)`/`TYPE(...)`, and polymorphic dispatch through an abstract base is bridged to every extending type's override — so `callers`, `callees`, and `explore` follow Fortran's object-oriented call chains end-to-end instead of stopping at the base type. +- Fortran name matching is case-insensitive, matching the language: a routine declared as `subroutine foo` and called as `CALL FOO()` resolves to the same symbol, and array accesses are no longer mistaken for function calls when they share a name with a variable. + ## [1.2.0] - 2026-07-02 diff --git a/README.md b/README.md index 999f352ba..d1c4e40ab 100644 --- a/README.md +++ b/README.md @@ -244,7 +244,7 @@ The reliable, universal payoff is **surgical context and speed**: CodeGraph coll | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 | | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes | | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config | -| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Svelte, Vue, Astro, Liquid, Pascal/Delphi | +| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Fortran, Svelte, Vue, Astro, Liquid, Pascal/Delphi | | **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 17 frameworks | | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules | | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only | @@ -714,6 +714,7 @@ is written): | Lua | `.lua` | Full support (functions, methods with receivers, local variables, `require` imports, call edges) | | R | `.R` `.r` | Full support (functions in every assignment form, S4/R5/R6 classes with methods, `library`/`require` imports, `source()` file references, call edges) | | Luau | `.luau` | Full support (everything in Lua, plus `type`/`export type` aliases, typed signatures, and Roblox instance-path `require`) | +| Fortran | `.f90`, `.f95`, `.f03`, `.f08`, `.f18`, `.f`, `.for`, `.f77`, `.ftn` | Full support (modules/programs, subroutines & functions, derived types with components, type-bound procedures, generic interfaces, `PARAMETER` constants, `use` imports, `call`/function-reference edges, `extends`). Modern free-form is primary; legacy fixed-form is mapped but parses less robustly | ## Measured cross-file coverage diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 3500c5e07..403779cb5 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -7901,3 +7901,233 @@ GeomPoint <- ggproto("GeomPoint", Geom, }); }); }); + +describe('Fortran Extraction', () => { + const SAMPLE = ` +module geometry_mod + implicit none + real, parameter :: PI = 3.14159_8 + + type :: point + real :: x + real :: y + end type point + + type, extends(point) :: point3d + real :: z + end type point3d + + interface area + module procedure circle_area + end interface area + +contains + + function circle_area(r) result(a) + real, intent(in) :: r + real :: a + a = PI * r * r + end function circle_area + + subroutine make_point(p, x, y) + type(point), intent(out) :: p + real, intent(in) :: x, y + p%x = x + p%y = y + call log_point(p) + end subroutine make_point + + subroutine log_point(p) + type(point), intent(in) :: p + print *, p%x, p%y + end subroutine log_point +end module geometry_mod + +program main + use geometry_mod + implicit none + type(point) :: p + call make_point(p, 1.0, 2.0) +end program main +`; + + describe('Language detection', () => { + it('should detect Fortran files (free-form and legacy fixed-form)', () => { + expect(detectLanguage('solver.f90')).toBe('fortran'); + expect(detectLanguage('mod_geometry.F90')).toBe('fortran'); + expect(detectLanguage('legacy.f')).toBe('fortran'); + expect(detectLanguage('legacy.for')).toBe('fortran'); + }); + + it('should report Fortran as supported', () => { + expect(isLanguageSupported('fortran')).toBe(true); + expect(getSupportedLanguages()).toContain('fortran'); + }); + }); + + describe('Symbol extraction', () => { + let result: ReturnType; + beforeAll(() => { + result = extractFromSource('geometry.f90', SAMPLE); + }); + const byKind = (kind: string) => result.nodes.filter((n) => n.kind === kind).map((n) => n.name); + + it('should extract modules and programs as module nodes', () => { + const modules = byKind('module'); + expect(modules).toContain('geometry_mod'); + expect(modules).toContain('main'); + expect(result.nodes.find((n) => n.name === 'geometry_mod')?.language).toBe('fortran'); + }); + + it('should extract subroutines and functions as functions', () => { + const funcs = byKind('function'); + expect(funcs).toContain('circle_area'); + expect(funcs).toContain('make_point'); + expect(funcs).toContain('log_point'); + const circle = result.nodes.find((n) => n.name === 'circle_area' && n.kind === 'function'); + expect(circle?.qualifiedName).toBe('geometry_mod::circle_area'); + expect(circle?.signature).toContain('circle_area(r)'); + }); + + it('should extract derived types as structs with fields', () => { + expect(byKind('struct')).toEqual(expect.arrayContaining(['point', 'point3d'])); + const fields = byKind('field'); + expect(fields).toEqual(expect.arrayContaining(['x', 'y', 'z'])); + const x = result.nodes.find((n) => n.name === 'x' && n.kind === 'field'); + expect(x?.qualifiedName).toBe('geometry_mod::point::x'); + }); + + it('should extract PARAMETER declarations as constants', () => { + expect(byKind('constant')).toContain('PI'); + }); + + it('should extract named (generic) interfaces', () => { + expect(byKind('interface')).toContain('area'); + }); + }); + + describe('Reference extraction', () => { + let refs: NonNullable['unresolvedReferences']>; + beforeAll(() => { + refs = extractFromSource('geometry.f90', SAMPLE).unresolvedReferences ?? []; + }); + + it('should record `use` statements as import references', () => { + const imports = refs.filter((r) => r.referenceKind === 'imports').map((r) => r.referenceName); + expect(imports).toContain('geometry_mod'); + }); + + it('should record CALL statements as call references', () => { + const calls = refs.filter((r) => r.referenceKind === 'calls').map((r) => r.referenceName); + expect(calls).toContain('make_point'); + expect(calls).toContain('log_point'); + }); + + it('should record type EXTENDS as an extends reference', () => { + const ext = refs.filter((r) => r.referenceKind === 'extends').map((r) => r.referenceName); + expect(ext).toContain('point'); + }); + }); + + describe('Type-bound procedures and member calls', () => { + const TBP_SAMPLE = ` +module engine_mod + implicit none + + type, abstract :: base_engine_t + contains + procedure, pass :: Integrate => BaseIntegrate + procedure :: GetName + procedure(step_iface), deferred :: Step + generic :: Run => Integrate + end type base_engine_t + + type, extends(base_engine_t) :: cpg_engine_t + contains + procedure :: Integrate => CpgIntegrate + end type cpg_engine_t + +contains + + subroutine BaseIntegrate(this) + class(base_engine_t), intent(inout) :: this + call this%GetName() + end subroutine BaseIntegrate + + subroutine CpgIntegrate(this) + class(cpg_engine_t), intent(inout) :: this + end subroutine CpgIntegrate + + function GetName(this) result(name) + class(base_engine_t), intent(in) :: this + character(32) :: name + name = "base" + end function GetName + + subroutine driver(eng, holder) + class(base_engine_t), intent(inout) :: eng + type(cpg_engine_t) :: holder + real :: y + call eng%Integrate() + call holder%sub%Execute(1) + call eng%GetName() + y = eng%fn(2.0) + end subroutine driver +end module engine_mod +`; + let result: ReturnType; + beforeAll(() => { + result = extractFromSource('engine_mod.f90', TBP_SAMPLE); + }); + + it('should extract bindings as method nodes scoped under the derived type', () => { + const methods = result.nodes.filter((n) => n.kind === 'method'); + const names = methods.map((n) => n.name); + expect(names).toEqual( + expect.arrayContaining(['Integrate', 'GetName', 'Step', 'Run']) + ); + const integrate = methods.find( + (n) => n.name === 'Integrate' && n.qualifiedName.includes('base_engine_t') + ); + expect(integrate?.qualifiedName).toBe('engine_mod::base_engine_t::Integrate'); + // The override on the extending type is a distinct method node + expect( + methods.some( + (n) => n.name === 'Integrate' && n.qualifiedName.includes('cpg_engine_t') + ) + ).toBe(true); + }); + + it('should link each binding to its implementation via a calls reference', () => { + const refs = result.unresolvedReferences ?? []; + const methodIds = new Map( + result.nodes.filter((n) => n.kind === 'method').map((n) => [n.id, n]) + ); + const bindingRefs = refs.filter( + (r) => r.referenceKind === 'calls' && methodIds.has(r.fromNodeId) + ); + const byName = bindingRefs.map((r) => r.referenceName); + expect(byName).toContain('BaseIntegrate'); // explicit => target + expect(byName).toContain('CpgIntegrate'); + expect(byName).toContain('GetName'); // bare binding: impl shares the name + expect(byName).toContain('Integrate'); // GENERIC :: Run => Integrate + // DEFERRED bindings have no implementation to reference + const step = result.nodes.find((n) => n.kind === 'method' && n.name === 'Step'); + expect(bindingRefs.some((r) => r.fromNodeId === step?.id)).toBe(false); + }); + + it('should normalize %-member calls to receiver.method references', () => { + const calls = (result.unresolvedReferences ?? []) + .filter((r) => r.referenceKind === 'calls') + .map((r) => r.referenceName); + expect(calls).toContain('eng.Integrate'); // CALL eng%Integrate() + expect(calls).toContain('eng.GetName'); + expect(calls).toContain('sub.Execute'); // chained holder%sub%Execute → component receiver + expect(calls).toContain('eng.fn'); // function-form member call in expression + // this/self receivers are kept (declared dummies → typed resolution) + expect(calls).toContain('this.GetName'); + // No raw '%' name should survive extraction + expect(calls.some((c) => c.includes('%'))).toBe(false); + }); + }); +}); diff --git a/__tests__/integration/fortran-pipeline.test.ts b/__tests__/integration/fortran-pipeline.test.ts new file mode 100644 index 000000000..782e7a896 --- /dev/null +++ b/__tests__/integration/fortran-pipeline.test.ts @@ -0,0 +1,171 @@ +/** + * Fortran end-to-end pipeline integration tests + * + * Covers the Fortran-specific resolution semantics that unit tests can't: + * - cross-module CALL through `use ..., only:` resolves to a call edge + * - case-insensitive resolution (declared lowercase, called UPPERCASE) + * - free subroutine declared via an anonymous INTERFACE block + * - type-bound procedure calls (`CALL obj%Run()`) resolving onto the + * receiver's declared type binding, the binding→implementation edge, + * and the fortran-override dispatch bridge along EXTENDS edges + * - array indexing NOT producing call edges to variables + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import CodeGraph from '../../src/index'; + +function createTempDir(prefix = 'codegraph-fortran-'): string { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +function writeProject(root: string): void { + const src = path.join(root, 'src'); + fs.mkdirSync(src, { recursive: true }); + + fs.writeFileSync( + path.join(src, 'mod_engine.f90'), + `module mod_engine + implicit none + + type, abstract :: base_engine_t + contains + procedure, pass :: Integrate => BaseIntegrate + end type base_engine_t + + type, extends(base_engine_t) :: cpg_engine_t + contains + procedure :: Integrate => CpgIntegrate + end type cpg_engine_t + +contains + + subroutine BaseIntegrate(this) + class(base_engine_t), intent(inout) :: this + end subroutine BaseIntegrate + + subroutine CpgIntegrate(this) + class(cpg_engine_t), intent(inout) :: this + call helper_lowercase() + end subroutine CpgIntegrate + + subroutine helper_lowercase() + end subroutine helper_lowercase +end module mod_engine +` + ); + + fs.writeFileSync( + path.join(src, 'free_sub.f90'), + `subroutine free_standalone() +end subroutine free_standalone +` + ); + + fs.writeFileSync( + path.join(src, 'driver.f90'), + `module mod_driver + use mod_engine, only: base_engine_t, cpg_engine_t + implicit none + real :: shared_array(10) +contains + subroutine drive(eng) + class(base_engine_t), intent(inout) :: eng + integer :: i + interface + subroutine free_standalone() + end subroutine free_standalone + end interface + call eng%Integrate() + call HELPER_LOWERCASE() + call free_standalone() + do i = 1, 10 + shared_array(i) = shared_array(i) + 1.0 + end do + end subroutine drive +end module mod_driver +` + ); +} + +describe('Integration: Fortran pipeline', () => { + let tempDir: string; + let cg: CodeGraph; + + beforeAll(async () => { + tempDir = createTempDir(); + writeProject(tempDir); + cg = await CodeGraph.init(tempDir); + await cg.indexAll(); + }); + + afterAll(() => { + cg?.close(); + if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + function edgesBetween(sourceName: string, targetName: string) { + const queries = (cg as unknown as { queries: import('../../src/db/queries').QueryBuilder }).queries; + const sources = queries.getNodesByLowerName(sourceName.toLowerCase()); + const targets = new Set( + queries.getNodesByLowerName(targetName.toLowerCase()).map((n) => n.id) + ); + return sources.flatMap((s) => + queries.getOutgoingEdges(s.id, ['calls']).filter((e) => targets.has(e.target)) + ); + } + + it('resolves case-mismatched calls (declared lowercase, called UPPERCASE)', () => { + expect(edgesBetween('drive', 'helper_lowercase').length).toBeGreaterThan(0); + }); + + it('resolves a call to a free subroutine declared via an anonymous INTERFACE block', () => { + expect(edgesBetween('drive', 'free_standalone').length).toBeGreaterThan(0); + }); + + it('resolves CALL obj%Integrate() onto the declared type binding (method node)', () => { + const queries = (cg as unknown as { queries: import('../../src/db/queries').QueryBuilder }).queries; + const baseBinding = queries + .getNodesByLowerName('integrate') + .find((n) => n.kind === 'method' && n.qualifiedName.includes('base_engine_t')); + expect(baseBinding).toBeDefined(); + const drive = queries.getNodesByLowerName('drive').find((n) => n.kind === 'function'); + expect(drive).toBeDefined(); + const callEdges = queries + .getOutgoingEdges(drive!.id, ['calls']) + .filter((e) => e.target === baseBinding!.id); + expect(callEdges.length).toBeGreaterThan(0); + }); + + it('links the binding to its implementation subroutine', () => { + expect(edgesBetween('integrate', 'cpgintegrate').length).toBeGreaterThan(0); + expect(edgesBetween('integrate', 'baseintegrate').length).toBeGreaterThan(0); + }); + + it('bridges polymorphic dispatch: base binding → extending type override', () => { + const queries = (cg as unknown as { queries: import('../../src/db/queries').QueryBuilder }).queries; + const methods = queries.getNodesByLowerName('integrate').filter((n) => n.kind === 'method'); + const base = methods.find((n) => n.qualifiedName.includes('base_engine_t')); + const override = methods.find((n) => n.qualifiedName.includes('cpg_engine_t')); + expect(base).toBeDefined(); + expect(override).toBeDefined(); + const bridge = queries + .getOutgoingEdges(base!.id, ['calls']) + .filter((e) => e.target === override!.id); + expect(bridge.length).toBeGreaterThan(0); + expect(bridge[0]!.metadata?.synthesizedBy).toBe('fortran-override'); + }); + + it('does not create call edges from array indexing to variables', () => { + const queries = (cg as unknown as { queries: import('../../src/db/queries').QueryBuilder }).queries; + const arr = queries.getNodesByLowerName('shared_array'); + for (const n of arr) { + const incoming = queries.getIncomingEdges(n.id, ['calls']); + expect(incoming.length).toBe(0); + } + }); +}); diff --git a/site/src/content/docs/reference/languages.md b/site/src/content/docs/reference/languages.md index 0c5587773..e7055477c 100644 --- a/site/src/content/docs/reference/languages.md +++ b/site/src/content/docs/reference/languages.md @@ -31,3 +31,4 @@ Language support is automatic from the file extension — there's nothing to con | Lua | `.lua` | Full support (functions, methods, locals, `require` imports, call edges) | | R | `.R`, `.r` | Full support (functions, S4/R5/R6 classes with methods, `library`/`require` imports, `source()` file references, call edges) | | Luau | `.luau` | Full support (Lua, plus typed signatures, `type` aliases, Roblox `require`) | +| Fortran | `.f90`, `.f95`, `.f03`, `.f08`, `.f18`, `.f`, `.for`, `.f77`, `.ftn` | Full support (modules, subroutines & functions, derived types, type-bound procedures, generic interfaces, `use` imports, call edges) | diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index 1b15996c0..1e2359ff8 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -39,6 +39,7 @@ const WASM_GRAMMAR_FILES: Record = { r: 'tree-sitter-r.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', + fortran: 'tree-sitter-fortran.wasm', }; /** @@ -108,6 +109,18 @@ export const EXTENSION_MAP: Record = { '.luau': 'luau', '.m': 'objc', '.mm': 'objc', + // Fortran: modern free-form (.f90/.f95/.f03/.f08/.f18) and legacy fixed-form + // (.f/.for/.f77/.ftn) plus preprocessor (.fpp). Keys are lowercased on lookup. + '.f90': 'fortran', + '.f95': 'fortran', + '.f03': 'fortran', + '.f08': 'fortran', + '.f18': 'fortran', + '.f': 'fortran', + '.for': 'fortran', + '.ftn': 'fortran', + '.f77': 'fortran', + '.fpp': 'fortran', // XML: file-level tracking; the MyBatis extractor matches `` // shape and emits SQL-statement nodes (other XML returns empty). '.xml': 'xml', @@ -221,7 +234,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise null()`). +const DECLARATOR_TYPES = new Set([ + 'identifier', + 'init_declarator', + 'sized_declarator', + 'data_declarator', + 'pointer_init_declarator', + 'coarray_declarator', +]); + +// Header / terminator statements that should not be walked as body content. +const NON_BODY_STATEMENTS = new Set([ + 'subroutine_statement', + 'function_statement', + 'module_statement', + 'submodule_statement', + 'program_statement', + 'derived_type_statement', + 'end_subroutine_statement', + 'end_function_statement', + 'end_module_statement', + 'end_submodule_statement', + 'end_program_statement', + 'end_type_statement', +]); + +/** Read the symbol name from a header *_statement node. */ +function readStatementName(stmt: SyntaxNode | null, source: string): string | undefined { + if (!stmt) return undefined; + // subroutine_statement / function_statement expose `name` as a field… + const field = getChildByField(stmt, 'name'); + if (field) return getNodeText(field, source); + // …module/program/interface statements carry it as a child of type `name`. + const child = stmt.namedChildren.find((c: SyntaxNode) => c.type === 'name'); + return child ? getNodeText(child, source) : undefined; +} + +/** Find the header statement child of a program-unit node. */ +function headerStatement(node: SyntaxNode, type: string): SyntaxNode | null { + return node.namedChildren.find((c: SyntaxNode) => c.type === type) ?? null; +} + +/** Resolve the declared name of a single declarator node. */ +function declaratorName(decl: SyntaxNode, source: string): string | undefined { + if (decl.type === 'identifier') return getNodeText(decl, source); + const left = getChildByField(decl, 'left'); // init_declarator `x = ...` + if (left) return declaratorName(left, source); + const id = decl.namedChildren.find((c: SyntaxNode) => c.type === 'identifier'); + return id ? getNodeText(id, source) : undefined; +} + +/** All names declared by a variable_declaration (handles `integer :: a, b`). */ +function declaredNames(varDecl: SyntaxNode, source: string): string[] { + return varDecl.namedChildren + .filter((c: SyntaxNode) => DECLARATOR_TYPES.has(c.type)) + .map((c: SyntaxNode) => declaratorName(c, source)) + .filter((n): n is string => !!n); +} + +/** True when a variable_declaration carries the PARAMETER attribute (a constant). */ +function isParameterDecl(varDecl: SyntaxNode, source: string): boolean { + return varDecl.namedChildren.some( + (c: SyntaxNode) => + c.type === 'type_qualifier' && + getNodeText(c, source).trim().toLowerCase() === 'parameter' + ); +} + +/** True when a procedure_statement carries the given attribute (DEFERRED, PASS…). */ +function hasProcAttribute(stmt: SyntaxNode, source: string, attr: string): boolean { + return stmt.namedChildren.some( + (c: SyntaxNode) => + c.type === 'procedure_attribute' && + getNodeText(c, source).trim().toUpperCase() === attr + ); +} + + +export const fortranExtractor: LanguageExtractor = { + // All structural extraction happens in visitNode; the declarative type lists + // stay empty so the core never re-dispatches a Fortran definition node. + functionTypes: [], + classTypes: [], + methodTypes: [], + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: ['use_statement'], + callTypes: ['subroutine_call', 'call_expression'], + variableTypes: ['variable_declaration'], + + nameField: 'name', + bodyField: 'body', + paramsField: 'parameters', + + // `use module_name [, only: ...]` → an import node + an `imports` edge. + extractImport: (node, source) => { + const moduleName = node.namedChildren.find((c: SyntaxNode) => c.type === 'module_name'); + if (!moduleName) return null; + return { + moduleName: getNodeText(moduleName, source).trim(), + signature: getNodeText(node, source).replace(/\s+/g, ' ').trim(), + }; + }, + + visitNode: (node, ctx) => { + const source = ctx.source; + + // --- Program units: module, submodule, program → module-kind container --- + if (node.type === 'module' || node.type === 'submodule' || node.type === 'program') { + const name = readStatementName(headerStatement(node, `${node.type}_statement`), source); + const created = name ? ctx.createNode('module', name, node) : null; + if (created) ctx.pushScope(created.id); + for (const child of node.namedChildren) { + if (child.type === 'variable_declaration') { + // Module/program-level constants & variables. + const kind = isParameterDecl(child, source) ? 'constant' : 'variable'; + for (const vname of declaredNames(child, source)) ctx.createNode(kind, vname, child); + } else if (!NON_BODY_STATEMENTS.has(child.type)) { + ctx.visitNode(child); // procedures (CONTAINS), derived types, interfaces, use + } + } + if (created) ctx.popScope(); + return true; + } + + // --- Subroutines & functions → function-kind symbols --- + if (node.type === 'subroutine' || node.type === 'function') { + const stmt = headerStatement(node, `${node.type}_statement`); + const name = readStatementName(stmt, source); + const signature = stmt ? getNodeText(stmt, source).replace(/\s+/g, ' ').trim() : undefined; + const created = name ? ctx.createNode('function', name, node, { signature }) : null; + if (created) ctx.pushScope(created.id); + for (const child of node.namedChildren) { + // Skip local variable declarations (noise) and the header/end lines; + // walk statements (for calls) and internal_procedures (nested defs). + if (child.type === 'variable_declaration' || NON_BODY_STATEMENTS.has(child.type)) continue; + ctx.visitNode(child); + } + if (created) ctx.popScope(); + return true; + } + + // --- Derived types (F90+): TYPE :: name ... END TYPE → struct --- + if (node.type === 'derived_type_definition') { + const stmt = headerStatement(node, 'derived_type_statement'); + const typeName = stmt?.namedChildren.find((c: SyntaxNode) => c.type === 'type_name'); + const name = typeName ? getNodeText(typeName, source) : undefined; + const created = name ? ctx.createNode('struct', name, node) : null; + if (!created) return true; + // `type, extends(parent) :: name` → extends edge to the parent type. + const base = stmt ? getChildByField(stmt, 'base') : null; + const baseId = base?.namedChildren.find((c: SyntaxNode) => c.type === 'identifier'); + if (baseId) { + ctx.addUnresolvedReference({ + fromNodeId: created.id, + referenceName: getNodeText(baseId, source), + referenceKind: 'extends', + line: baseId.startPosition.row + 1, + column: baseId.startPosition.column, + }); + } + ctx.pushScope(created.id); + for (const child of node.namedChildren) { + if (child.type === 'variable_declaration') { + for (const fname of declaredNames(child, source)) ctx.createNode('field', fname, child); + } else if (child.type === 'derived_type_procedures') { + // Type-bound procedures: `PROCEDURE, PASS :: Integrate => CpgIntegrate` + // becomes a method node — auto-scoped under the struct, so it gets a + // contains edge and a `module::type_t::Integrate` qualifiedName, + // which is exactly what the resolver's receiver-typed method match + // consumes for `CALL obj%Integrate()` sites — plus a calls ref from + // the method to its implementation subroutine so flow traversal + // continues into the impl. DEFERRED bindings get the method node + // only; dispatch to overrides is bridged by the fortran-override + // synthesizer along extends edges. + for (const stmt of child.namedChildren) { + if (stmt.type === 'procedure_statement') { + const deferred = hasProcAttribute(stmt, source, 'DEFERRED'); + for (const d of stmt.namedChildren) { + if (d.type === 'binding') { + // `Integrate => CpgIntegrate`: binding_name is the callable + // name, method_name the implementation. + const bn = d.namedChildren.find((c: SyntaxNode) => c.type === 'binding_name'); + const impl = d.namedChildren.find((c: SyntaxNode) => c.type === 'method_name'); + const bname = bn ? getNodeText(bn, source) : undefined; + if (!bname) continue; + const m = ctx.createNode('method', bname, stmt); + if (m && impl && !deferred) { + ctx.addUnresolvedReference({ + fromNodeId: m.id, + referenceName: getNodeText(impl, source), + referenceKind: 'calls', + line: d.startPosition.row + 1, + column: d.startPosition.column, + }); + } + } else if (d.type === 'method_name') { + // Bare binding `PROCEDURE :: GetName`: implementation + // shares the binding name. + const bname = getNodeText(d, source); + const m = ctx.createNode('method', bname, stmt); + if (m && !deferred) { + ctx.addUnresolvedReference({ + fromNodeId: m.id, + referenceName: bname, + referenceKind: 'calls', + line: d.startPosition.row + 1, + column: d.startPosition.column, + }); + } + } + } + } else if (stmt.type === 'generic_statement') { + // `GENERIC :: Run => Integrate, IntegrateEx` — a dispatch alias. + // operator(+)/assignment(=) generics carry no plain identifier + // and are skipped. + const bl = stmt.namedChildren.find((c: SyntaxNode) => c.type === 'binding_list'); + if (!bl) continue; + const bn = bl.namedChildren.find((c: SyntaxNode) => c.type === 'binding_name'); + const gname = bn ? getNodeText(bn, source) : undefined; + if (!gname || !/^[A-Za-z_]\w*$/.test(gname)) continue; + const m = ctx.createNode('method', gname, stmt); + if (m) { + for (const spec of bl.namedChildren.filter((c: SyntaxNode) => c.type === 'method_name')) { + ctx.addUnresolvedReference({ + fromNodeId: m.id, + referenceName: getNodeText(spec, source), + referenceKind: 'calls', + line: spec.startPosition.row + 1, + column: spec.startPosition.column, + }); + } + } + } + // final_statement (destructors) intentionally skipped — not + // callable as obj%name(). + } + } else if (!NON_BODY_STATEMENTS.has(child.type)) { + ctx.visitNode(child); + } + } + ctx.popScope(); + return true; + } + + // --- Interface blocks. Only named (generic/operator) interfaces become + // nodes; anonymous explicit-interface blocks are signature-only noise. --- + if (node.type === 'interface') { + const name = readStatementName(headerStatement(node, 'interface_statement'), source); + if (name) ctx.createNode('interface', name, node); + return true; // do not descend into declaration signatures + } + + return false; + }, +}; diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index 9d4a949a5..5dd1a723c 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -27,6 +27,7 @@ import { luaExtractor } from './lua'; import { rExtractor } from './r'; import { luauExtractor } from './luau'; import { objcExtractor } from './objc'; +import { fortranExtractor } from './fortran'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -51,4 +52,5 @@ export const EXTRACTORS: Partial> = { r: rExtractor, luau: luauExtractor, objc: objcExtractor, + fortran: fortranExtractor, }; diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 9ef767568..914087ae6 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -3833,6 +3833,45 @@ export class TreeSitterExtractor { calleeName = methodName; } } + } else if (func.type === 'derived_type_member_expression') { + // Fortran member call: `CALL obj%method()` / `x = obj%fn(y)`. + // Normalize the `%` separator to `.` so the resolver's + // receiver.method matchers (pre-filter split, matchMethodCall + // regex, receiver-type inference) all apply. The raw text fallback + // below would emit "obj%method", which no matcher can split. + // Shape: derived_type_member_expression{base, type_member} where a + // chained receiver (`this%sub%Execute`) nests another + // derived_type_member_expression on the left — unwrap to the + // immediate component (`sub.Execute`), mirroring the Java + // `this.field.method()` unwrap above. + const members = func.namedChildren.filter((c: SyntaxNode) => c.type === 'type_member'); + const methodName = members.length > 0 + ? getNodeText(members[members.length - 1]!, this.source) + : ''; + if (methodName) { + const base = func.namedChild(0); + let receiverName = ''; + if (base?.type === 'identifier') { + receiverName = getNodeText(base, this.source); + } else if (base?.type === 'derived_type_member_expression') { + const innerMembers = base.namedChildren.filter((c: SyntaxNode) => c.type === 'type_member'); + if (innerMembers.length > 0) { + receiverName = getNodeText(innerMembers[innerMembers.length - 1]!, this.source); + } + } + // Unlike other languages, `this`/`self` receivers are NOT + // stripped: Fortran's passed-object dummy is explicitly declared + // (`CLASS(base_t) :: this`), so the resolver's receiver-type + // inference turns `this.Step` into a precise binding match — + // the most reliable signal available for intra-type calls. The + // matcher falls back to bare-name resolution when inference + // fails. + if (receiverName) { + calleeName = `${receiverName}.${methodName}`; + } else { + calleeName = methodName; + } + } } else if (func.type === 'scoped_identifier' || func.type === 'scoped_call_expression') { // Scoped call: Module::function() calleeName = getNodeText(func, this.source); diff --git a/src/extraction/wasm/tree-sitter-fortran.wasm b/src/extraction/wasm/tree-sitter-fortran.wasm new file mode 100755 index 000000000..ec884e463 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-fortran.wasm differ diff --git a/src/resolution/callback-synthesizer.ts b/src/resolution/callback-synthesizer.ts index 187c81f25..1ea3d9c1b 100644 --- a/src/resolution/callback-synthesizer.ts +++ b/src/resolution/callback-synthesizer.ts @@ -465,6 +465,57 @@ function cppOverrideEdges(queries: QueryBuilder): Edge[] { return edges; } +/** + * Phase 4d: Fortran type-bound dispatch. A call through a polymorphic + * `CLASS(base_t)` receiver (`CALL obj%Integrate()`) dispatches at runtime to + * the `EXTENDS(base_t)` subtype's overriding binding — Fortran's vtable + * indirection, no static call edge — so a flow stops at the abstract base + * binding. Bridge it like cpp-override: for each Fortran derived type + * (kind='struct') that extends a base, link each base method → the subtype + * method of the same name. Names compare case-insensitively because Fortran + * identifiers are case-insensitive (`Integrate` overrides `integrate`). + * Over-approximation accepted (reachability-correct); capped per type and + * gated to Fortran. + */ +function fortranOverrideEdges(queries: QueryBuilder): Edge[] { + const edges: Edge[] = []; + const seen = new Set(); + const methodsOf = (typeId: string): Node[] => + queries + .getOutgoingEdges(typeId, ['contains']) + .map((e) => queries.getNodeById(e.target)) + .filter((n): n is Node => !!n && n.kind === 'method'); + for (const cls of queries.getNodesByKind('struct')) { + if (cls.language !== 'fortran') continue; + const subMethods = methodsOf(cls.id); + if (subMethods.length === 0) continue; + for (const ext of queries.getOutgoingEdges(cls.id, ['extends'])) { + const base = queries.getNodeById(ext.target); + if (!base || base.language !== 'fortran' || base.id === cls.id) continue; + const baseMethods = new Map(methodsOf(base.id).map((m) => [m.name.toLowerCase(), m])); + let added = 0; + for (const m of subMethods) { + if (added >= MAX_CALLBACKS_PER_CHANNEL) break; + const bm = baseMethods.get(m.name.toLowerCase()); + if (!bm || bm.id === m.id) continue; + const key = `${bm.id}>${m.id}`; + if (seen.has(key)) continue; + seen.add(key); + edges.push({ + source: bm.id, + target: m.id, + kind: 'calls', + line: bm.startLine, + provenance: 'heuristic', + metadata: { synthesizedBy: 'fortran-override', via: m.name, registeredAt: `${m.filePath}:${m.startLine}` }, + }); + added++; + } + } + } + return edges; +} + /** * Phase 5.5: interface / abstract dispatch (Java, Kotlin). A call through an * injected interface (`@Autowired FooService svc; svc.list()`) or an abstract @@ -2709,6 +2760,7 @@ export async function synthesizeCallbackEdges(queries: QueryBuilder, ctx: Resolu const pascalEdges = pascalFormEdges(ctx); await yieldToLoop(); const flutterEdges = flutterBuildEdges(queries, ctx); await yieldToLoop(); const cppEdges = cppOverrideEdges(queries); await yieldToLoop(); + const fortranEdges = fortranOverrideEdges(queries); await yieldToLoop(); const ifaceEdges = interfaceOverrideEdges(queries); await yieldToLoop(); const kotlinExpectActual = kotlinExpectActualEdges(queries); await yieldToLoop(); const goGrpcEdges = goGrpcStubImplEdges(queries); await yieldToLoop(); @@ -2744,6 +2796,7 @@ export async function synthesizeCallbackEdges(queries: QueryBuilder, ctx: Resolu ...pascalEdges, ...flutterEdges, ...cppEdges, + ...fortranEdges, ...ifaceEdges, ...kotlinExpectActual, ...goGrpcEdges, diff --git a/src/resolution/index.ts b/src/resolution/index.ts index cd93addfc..d0de1fd87 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -16,7 +16,7 @@ import { FrameworkResolver, ImportMapping, } from './types'; -import { matchReference, matchFunctionRef, matchDottedCallChain, matchScopedCallChain, sameLanguageFamily, crossesKnownFamily } from './name-matcher'; +import { matchReference, matchFunctionRef, matchDottedCallChain, matchScopedCallChain, sameLanguageFamily, crossesKnownFamily, CASE_INSENSITIVE_LANGS } from './name-matcher'; import { resolveViaImport, resolveJvmImport, extractImportMappings, extractReExports, loadCppIncludeDirs, isPhpIncludePathRef } from './import-resolver'; import { detectFrameworks } from './frameworks'; import { synthesizeCallbackEdges } from './callback-synthesizer'; @@ -228,6 +228,7 @@ export class ReferenceResolver { private lowerNameCache: LRUCache; // lower(name) → nodes cache private qualifiedNameCache: LRUCache; // qualified_name → nodes cache private knownNames: Set | null = null; // all known symbol names for fast pre-filtering + private knownNamesLower: Set | null = null; // lowercased names, for case-insensitive languages private knownFiles: Set | null = null; private cachesWarmed = false; // tsconfig/jsconfig path-alias map. `undefined` = not yet computed, @@ -310,6 +311,12 @@ export class ReferenceResolver { // Cache all distinct symbol names for fast pre-filtering (just strings, not full nodes) this.knownNames = new Set(this.queries.getAllNodeNames()); + // Case-insensitive languages (Fortran) declare in one case and call in + // another; a case-sensitive pre-filter would drop those refs before any + // matcher runs. Built unconditionally — it's just strings. + this.knownNamesLower = new Set(); + for (const n of this.knownNames) this.knownNamesLower.add(n.toLowerCase()); + this.cachesWarmed = true; } @@ -325,6 +332,7 @@ export class ReferenceResolver { this.lowerNameCache.clear(); this.qualifiedNameCache.clear(); this.knownNames = null; + this.knownNamesLower = null; this.knownFiles = null; this.cachesWarmed = false; } @@ -586,18 +594,28 @@ export class ReferenceResolver { * Uses the pre-built knownNames set to skip expensive resolution * for names that definitely don't exist as symbols. */ - private hasAnyPossibleMatch(name: string): boolean { + private hasAnyPossibleMatch(name: string, language?: string): boolean { if (!this.knownNames) return true; // no pre-filter available + // For case-insensitive languages (Fortran) the membership tests also + // consult the lowercased name set — Fortran declares `subroutine foo` + // and calls `CALL FOO()` interchangeably, and the case-sensitive set + // would reject the ref before any matcher could see it. + const folded = language !== undefined && CASE_INSENSITIVE_LANGS.has(language) + ? this.knownNamesLower + : null; + const known = (n: string): boolean => + this.knownNames!.has(n) || (folded !== null && folded.has(n.toLowerCase())); + // Direct name match - if (this.knownNames.has(name)) return true; + if (known(name)) return true; // For qualified names like "obj.method" or "Class::method", check the parts const dotIdx = name.indexOf('.'); if (dotIdx > 0) { const receiver = name.substring(0, dotIdx); const member = name.substring(dotIdx + 1); - if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true; + if (known(receiver) || known(member)) return true; // Also check capitalized receiver (instance-method resolution) const capitalized = receiver.charAt(0).toUpperCase() + receiver.slice(1); if (this.knownNames.has(capitalized)) return true; @@ -607,14 +625,14 @@ export class ReferenceResolver { const lastDot = name.lastIndexOf('.'); if (lastDot > dotIdx) { const tail = name.substring(lastDot + 1); - if (tail && this.knownNames.has(tail)) return true; + if (tail && known(tail)) return true; } } const colonIdx = name.indexOf('::'); if (colonIdx > 0) { const receiver = name.substring(0, colonIdx); const member = name.substring(colonIdx + 2); - if (this.knownNames.has(receiver) || this.knownNames.has(member)) return true; + if (known(receiver) || known(member)) return true; // Multi-segment path `a::b::c` (a Rust/C++ module call like // `database::profiles::find`) — the only segment that names a symbol is // the last (`c`); `member` above is `b::c`, which never matches a node @@ -688,7 +706,7 @@ export class ReferenceResolver { // from './auth'`) intentionally call a name that has no // declaration anywhere — only the renamed upstream symbol does. if ( - !this.hasAnyPossibleMatch(ref.referenceName) && + !this.hasAnyPossibleMatch(ref.referenceName, ref.language) && !this.matchesAnyImport(ref) && !this.frameworks.some((f) => f.claimsReference?.(ref.referenceName)) ) { diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts index 228ac9f40..5d2655d74 100644 --- a/src/resolution/name-matcher.ts +++ b/src/resolution/name-matcher.ts @@ -7,6 +7,64 @@ import { Language, Node } from '../types'; import { UnresolvedRef, ResolvedRef, ResolutionContext } from './types'; +/** + * Languages whose identifiers are case-insensitive: a symbol declared as + * `subroutine foo` is legally referenced as `CALL FOO()`. Matchers fall back + * to the lowercase name index when the case-sensitive lookup comes up empty. + */ +export const CASE_INSENSITIVE_LANGS = new Set(['fortran']); + +/** + * Exact-name lookup with a case-insensitive fallback for languages where + * identifier case is not significant. Behavior is unchanged for every other + * language. The fallback also fires when the exact-case result contains no + * same-language node — in a mixed repo (Fortran + Python tooling) a + * case-mismatched Fortran call must not be captured by a same-spelling + * symbol from another language while the real (lowercased) target exists. + */ +function getNodesByNameCI( + name: string, + ref: UnresolvedRef, + context: ResolutionContext +): Node[] { + const exact = context.getNodesByName(name); + if (!CASE_INSENSITIVE_LANGS.has(ref.language)) return exact; + if (exact.some((n) => n.language === ref.language)) return exact; + const lower = context.getNodesByLowerName(name.toLowerCase()); + return lower.length > 0 ? lower : exact; +} + +/** + * True when the reference originates from a method node — for Fortran, + * that means a type-bound-procedure binding ref (`PROCEDURE :: Name [=> Impl]`) + * rather than an ordinary call site. Uses the per-file node cache, so the + * lookup is cheap. + */ +function refFromMethodNode(ref: UnresolvedRef, context: ResolutionContext): boolean { + const inFile = context.getNodesInFile(ref.filePath); + const from = inFile.find((n) => n.id === ref.fromNodeId); + return from?.kind === 'method'; +} + +/** + * Node kinds that can never be the target of a Fortran `calls` reference. + * Fortran's grammar cannot distinguish `a(i)` array indexing from `f(i)` + * function calls, so extraction emits a `calls` ref for every parenthesized + * identifier — letting those resolve to variables/fields would flood the + * graph with array-subscript edges. Struct stays callable (constructor + * `base_t(...)`, promoted to `instantiates` downstream), and interface stays + * callable (Fortran generic interfaces are call targets). + */ +const FORTRAN_NON_CALLABLE_KINDS = new Set([ + 'variable', 'constant', 'field', 'property', 'parameter', + 'enum', 'enum_member', 'import', 'file', 'namespace', 'type_alias', +]); + +function filterFortranCallable(candidates: Node[], ref: UnresolvedRef): Node[] { + if (ref.language !== 'fortran' || ref.referenceKind !== 'calls') return candidates; + return candidates.filter((n) => !FORTRAN_NON_CALLABLE_KINDS.has(n.kind)); +} + /** * Ceiling on how many same-named definitions a FUZZY name-match strategy will * score. A name defined more times than this is "ubiquitous" — a method/symbol @@ -353,8 +411,10 @@ export function matchByExactName( // unresolved import refs each scored K same-named import candidates through // findBestMatch — O(K²) per package, the dominant cost of "Resolving refs" on // large import-heavy (front-end + back-end) repos (#915). - const candidates = applyLanguageGate(context.getNodesByName(ref.referenceName), ref) - .filter((n) => n.kind !== 'import'); + const candidates = filterFortranCallable( + applyLanguageGate(getNodesByNameCI(ref.referenceName, ref, context), ref), + ref + ).filter((n) => n.kind !== 'import'); if (candidates.length === 0) { return null; @@ -362,6 +422,17 @@ export function matchByExactName( // If only one match, use it — but penalize cross-language matches if (candidates.length === 1) { + // A Fortran TBP binding ref (`PROCEDURE :: plainq` whose implementation + // isn't indexed) must not resolve to its own method node — a self-loop + // is worse than no edge. Ordinary recursion (function → itself) is NOT + // affected: its ref originates from a function node. + if ( + candidates[0]!.id === ref.fromNodeId && + CASE_INSENSITIVE_LANGS.has(ref.language) && + refFromMethodNode(ref, context) + ) { + return null; + } const isCrossLanguage = candidates[0]!.language !== ref.language; return { original: ref, @@ -441,8 +512,7 @@ export function matchByQualifiedName( const parts = ref.referenceName.split(/[:.]/); const lastName = parts[parts.length - 1]; if (lastName) { - const partialCandidates = context - .getNodesByName(lastName) + const partialCandidates = getNodesByNameCI(lastName, ref, context) .filter((candidate) => candidate.qualifiedName.endsWith(ref.referenceName)); const chosen = preferCallSiteFile(partialCandidates, ref.filePath)[0]; if (chosen) { @@ -503,14 +573,18 @@ export function resolveMethodOnType( // in-class (`class Foo { int bar() { ... } }`) or out-of-line in a separate // file (`int Foo::bar() { ... }` in foo.cpp while class Foo is in foo.hpp). // The previous same-file approach missed the latter — the typical C++ layout. - const methodCandidates = context.getNodesByName(methodName); + // Case-insensitive languages compare the qualified suffix case-folded. + const ci = CASE_INSENSITIVE_LANGS.has(ref.language); + const methodCandidates = getNodesByNameCI(methodName, ref, context); const want = `${typeName}::${methodName}`; + const wantLower = want.toLowerCase(); const matches: Node[] = []; for (const m of methodCandidates) { if (m.kind !== 'method') continue; if (m.language !== ref.language) continue; - const qn = m.qualifiedName; - if (qn === want || qn.endsWith(`::${want}`)) { + const qn = ci ? m.qualifiedName.toLowerCase() : m.qualifiedName; + const target = ci ? wantLower : want; + if (qn === target || qn.endsWith(`::${target}`)) { matches.push(m); } } @@ -1136,6 +1210,14 @@ function localReceiverTypePatterns(language: Language, r: string): RegExp[] { new RegExp(`\\b${r}\\b\\s*:\\s*([A-Z][\\w]*)`), // var lg: TLogger / param lg: TLogger new RegExp(`\\b${r}\\b\\s*:=\\s*([A-Z][\\w.]*)\\.Create\\b`), // lg := TLogger.Create ]; + case 'fortran': + return [ + // CLASS(engine_t) :: eng / TYPE(engine_t), INTENT(INOUT) :: eng, other — + // the receiver (incl. the passed-object dummy `this`/`self`) anywhere in + // the `::` declarator list. Case-insensitive to match the language; + // `[^!]` keeps the match out of trailing `!` comments. + new RegExp(`^\\s*(?:class|type)\\s*\\(\\s*([A-Za-z_]\\w*)\\s*\\)[^!]*?::(?:[^!]*?[\\s,])?${r}\\b`, 'i'), + ]; default: return []; } @@ -1295,13 +1377,24 @@ export function matchMethodCall( } } + // Fortran passed-object receiver (`this%Step()` / `self%Init()`, normalized + // to `this.Step` at extraction) whose CLASS declaration the shared scan + // above couldn't see: resolve the bare binding name — receiver-name + // word-overlap heuristics are meaningless for `this`. The original ref is + // preserved so the resolved row matches the persisted unresolved_refs + // triple on cleanup. + if (ref.language === 'fortran' && dotMatch && /^(?:this|self)$/i.test(objectOrClass!)) { + const bare = matchByExactName({ ...ref, referenceName: methodName! }, context); + return bare ? { ...bare, original: ref } : null; + } + // Strategy 1: Direct class name match (existing logic). When the receiver // names a class that exists in several files (`Logger.log()` / `Logger::log()` // with a `Logger` in both `a/` and `b/`), try the class in the call site's // own file first — otherwise the first-indexed class wins and a call in `b/` // resolves to `a/`'s method (#1079). const classCandidates = preferCallSiteFile( - context.getNodesByName(objectOrClass!), + getNodesByNameCI(objectOrClass!, ref, context), ref.filePath, ); @@ -1366,7 +1459,7 @@ export function matchMethodCall( // name similarity with the containing class. Handles abbreviated variable // names like permissionEngine → PermissionRuleEngine. if (methodName) { - const methodCandidates = context.getNodesByName(methodName!); + const methodCandidates = getNodesByNameCI(methodName!, ref, context); // Ubiquitous-method ceiling (#999): a method name re-declared across a // vendored theme/SDK (Metronic's `init`/`update`/… on every widget) yields // K candidates that receiver-word overlap can't reliably disambiguate — @@ -1376,8 +1469,12 @@ export function matchMethodCall( if (methodCandidates.length > AMBIGUOUS_NAME_CEILING) { return null; } + const ciLang = CASE_INSENSITIVE_LANGS.has(ref.language); const methods = methodCandidates.filter( - (n) => n.kind === 'method' && n.name === methodName + (n) => + n.kind === 'method' && + (n.name === methodName || + (ciLang && n.name.toLowerCase() === methodName!.toLowerCase())) ); // Filter to same-language candidates first @@ -1484,7 +1581,7 @@ function computePathProximity(filePath1: string, filePath2: string): number { function findBestMatch( ref: UnresolvedRef, candidates: Node[], - _context: ResolutionContext + context: ResolutionContext ): Node | null { // Prioritization rules: // 1. Same file > different file @@ -1573,6 +1670,21 @@ function findBestMatch( score += Math.max(0, 20 - distance / 10); } + // Penalize a Fortran TBP binding ref (`PROCEDURE :: GetName` — method + // node referencing the implementation subroutine of the same name) + // resolving back to its own method node purely on line-distance. Gated + // to method-origin Fortran refs: an unconditional penalty was shown to + // flip ordinary recursion edges in other languages whenever the + // recursive function shares its name with another same-file symbol + // (the 30-point penalty exceeds the 20-point line-proximity margin). + if ( + candidate.id === ref.fromNodeId && + CASE_INSENSITIVE_LANGS.has(ref.language) && + refFromMethodNode(ref, context) + ) { + score -= 30; + } + if (score > bestScore) { bestScore = score; bestNode = candidate; diff --git a/src/types.ts b/src/types.ts index a3122bf9a..99f417b9a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -91,6 +91,7 @@ export const LANGUAGES = [ 'luau', 'objc', 'r', + 'fortran', 'yaml', 'twig', 'xml',