From 4fd4ed17a1d366fac1e86061d549923e33ff7ba6 Mon Sep 17 00:00:00 2001 From: Sander Muller Date: Wed, 24 Jun 2026 19:25:53 +0200 Subject: [PATCH] Memoize file contents by path in CachedParser to skip redundant reads CachedParser::parseFile() read the whole file via FileReader::read() on every call, before the content-keyed node cache. The same file is parsed many times (a trait file once per class that uses it - on Tempest, 73 498 parseFile calls for 2 327 distinct files, 96.8% redundant reads; the 256-entry content cache thrashes on hot traits), so the read is repeated even when nothing changed. Memoize the contents by path, keyed by mtime, and skip the re-read when the file is unchanged. clearstatcache() before the mtime check keeps this correct in long-running processes (PHPStan Pro, fixer worker) where a file may be edited between calls, so an edited file is always re-read and re-parsed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/Parser/CachedParser.php | 20 ++++++++++- tests/PHPStan/Parser/CachedParserTest.php | 41 +++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/Parser/CachedParser.php b/src/Parser/CachedParser.php index 400c21bf5ad..fafc160728b 100644 --- a/src/Parser/CachedParser.php +++ b/src/Parser/CachedParser.php @@ -5,6 +5,8 @@ use PhpParser\Node; use PHPStan\File\FileReader; use function array_slice; +use function clearstatcache; +use function filemtime; final class CachedParser implements Parser { @@ -17,6 +19,9 @@ final class CachedParser implements Parser /** @var array */ private array $parsedByString = []; + /** @var array path => [mtime, source code] */ + private array $cachedSourceByFile = []; + public function __construct( private Parser $originalParser, private int $cachedNodesByStringCountMax, @@ -40,7 +45,20 @@ public function parseFile(string $file): array --$this->cachedNodesByStringCount; } - $sourceCode = FileReader::read($file); + // parseFile is called once per class using a trait, so the same file is read many times. + // Memoize the contents by path, keyed by mtime; clearstatcache keeps this correct when a file + // changes between calls in a long-running process (PHPStan Pro, fixer worker). + clearstatcache(true, $file); + $mtime = @filemtime($file); + if ($mtime !== false && isset($this->cachedSourceByFile[$file]) && $this->cachedSourceByFile[$file][0] === $mtime) { + $sourceCode = $this->cachedSourceByFile[$file][1]; + } else { + $sourceCode = FileReader::read($file); + if ($mtime !== false) { + $this->cachedSourceByFile[$file] = [$mtime, $sourceCode]; + } + } + if (!isset($this->cachedNodesByString[$sourceCode]) || isset($this->parsedByString[$sourceCode])) { $this->cachedNodesByString[$sourceCode] = $this->originalParser->parseFile($file); $this->cachedNodesByStringCount++; diff --git a/tests/PHPStan/Parser/CachedParserTest.php b/tests/PHPStan/Parser/CachedParserTest.php index df3097d3cb9..bc4ee1bbab3 100644 --- a/tests/PHPStan/Parser/CachedParserTest.php +++ b/tests/PHPStan/Parser/CachedParserTest.php @@ -5,12 +5,19 @@ use Generator; use PhpParser\Node; use PhpParser\Node\Stmt\Namespace_; +use PhpParser\Node\Stmt\Nop; use PHPStan\BetterReflection\Reflection\ExprCacheHelper; use PHPStan\File\FileHelper; use PHPStan\File\FileReader; use PHPStan\Testing\PHPStanTestCase; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\MockObject\Stub; +use function file_put_contents; +use function sys_get_temp_dir; +use function time; +use function touch; +use function uniqid; +use function unlink; class CachedParserTest extends PHPStanTestCase { @@ -171,4 +178,38 @@ public function testWithExprCacheHelper(): void $this->assertSame(['startLine' => 1, 'startTokenPos' => 35, 'startFilePos' => 137, 'endLine' => 10, 'endTokenPos' => 35, 'endFilePos' => 143, 'kind' => 1, 'rawValue' => "'hello'"], $reImported->getAttributes()); } + public function testParseFileSkipsReadingUnchangedFileAndRereadsAfterChange(): void + { + $parser = new CachedParser($this->getContentEchoingParserStub(), 500); + $path = sys_get_temp_dir() . '/phpstan-cached-parser-' . uniqid() . '.php'; + $baseTime = time() - 10; + + try { + file_put_contents($path, 'first contents'); + touch($path, $baseTime); + $this->assertSame('first contents', $parser->parseFile($path)[0]->getAttribute('content')); + + // Contents change but the mtime does not: the memoized contents are returned without re-reading. + file_put_contents($path, 'second contents'); + touch($path, $baseTime); + $this->assertSame('first contents', $parser->parseFile($path)[0]->getAttribute('content')); + + // A newer mtime invalidates the memo, so the file is read again. + touch($path, $baseTime + 10); + $this->assertSame('second contents', $parser->parseFile($path)[0]->getAttribute('content')); + } finally { + @unlink($path); + } + } + + private function getContentEchoingParserStub(): Parser&Stub + { + $mock = $this->createStub(Parser::class); + $mock->method('parseFile')->willReturnCallback( + static fn (string $file): array => [new Nop(['content' => FileReader::read($file)])], + ); + + return $mock; + } + }