Skip to content

Commit 0510c03

Browse files
authored
fix: prevent stack overflow in codebase indexing for large projects (#7712)
1 parent e559ac6 commit 0510c03

File tree

2 files changed

+307
-5
lines changed

2 files changed

+307
-5
lines changed
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
import * as path from "path"
2+
import * as fs from "fs"
3+
import { listFiles } from "../list-files"
4+
5+
// Mock ripgrep
6+
vi.mock("../../ripgrep", () => ({
7+
getBinPath: vi.fn().mockResolvedValue("/mock/path/to/rg"),
8+
}))
9+
10+
// Mock vscode
11+
vi.mock("vscode", () => ({
12+
env: {
13+
appRoot: "/mock/app/root",
14+
},
15+
}))
16+
17+
// Mock child_process
18+
vi.mock("child_process", () => ({
19+
spawn: vi.fn(),
20+
}))
21+
22+
vi.mock("../../../utils/path", () => ({
23+
arePathsEqual: vi.fn().mockReturnValue(false),
24+
}))
25+
26+
import * as childProcess from "child_process"
27+
28+
describe("listFiles limit handling for large projects", () => {
29+
beforeEach(() => {
30+
vi.clearAllMocks()
31+
})
32+
33+
it("should prevent stack overflow when scanning large projects", async () => {
34+
// This test verifies that the fix prevents the "Maximum call stack size exceeded" error
35+
// by ensuring the function completes successfully even with a very large directory structure
36+
37+
const createMockDirEntry = (name: string) =>
38+
({
39+
name,
40+
isDirectory: () => true,
41+
isSymbolicLink: () => false,
42+
isFile: () => false,
43+
}) as any
44+
45+
const mockReaddir = vi.fn()
46+
vi.mocked(fs.promises).readdir = mockReaddir
47+
48+
// Simulate a project with 200k+ items (as mentioned in the issue)
49+
// Create a broad directory tree that would cause stack overflow without proper limits
50+
let callCount = 0
51+
const maxDepth = 100 // Simulate deep nesting
52+
mockReaddir.mockImplementation(async () => {
53+
callCount++
54+
if (callCount > maxDepth) {
55+
return []
56+
}
57+
// Return many subdirectories at each level to simulate a large codebase
58+
return Array(50)
59+
.fill(null)
60+
.map((_, i) => createMockDirEntry(`dir${callCount}_${i}`))
61+
})
62+
63+
// Mock ripgrep to return many files
64+
const mockSpawn = vi.mocked(childProcess.spawn)
65+
const mockProcess = {
66+
stdout: {
67+
on: vi.fn((event, callback) => {
68+
if (event === "data") {
69+
// Return many files to simulate large project
70+
const files =
71+
Array(10000)
72+
.fill(null)
73+
.map((_, i) => `file${i}.ts`)
74+
.join("\n") + "\n"
75+
setTimeout(() => callback(files), 10)
76+
}
77+
}),
78+
},
79+
stderr: {
80+
on: vi.fn(),
81+
},
82+
on: vi.fn((event, callback) => {
83+
if (event === "close") {
84+
setTimeout(() => callback(0), 20)
85+
}
86+
}),
87+
kill: vi.fn(),
88+
}
89+
mockSpawn.mockReturnValue(mockProcess as any)
90+
91+
// Call listFiles with a limit that would be used in code indexing
92+
const limit = 50_000 // MAX_LIST_FILES_LIMIT_CODE_INDEX value
93+
94+
// The key test: this should complete without throwing a stack overflow error
95+
let error: Error | null = null
96+
let results: string[] = []
97+
let limitReached = false
98+
99+
try {
100+
const startTime = Date.now()
101+
const [res, didHitLimit] = await listFiles("/test/large-project", true, limit)
102+
const endTime = Date.now()
103+
104+
results = res
105+
limitReached = didHitLimit
106+
107+
// Should complete in reasonable time
108+
expect(endTime - startTime).toBeLessThan(10000) // 10 seconds max
109+
} catch (e) {
110+
error = e as Error
111+
}
112+
113+
// Main assertion: no stack overflow error should occur
114+
expect(error).toBeNull()
115+
116+
// The function should return valid results
117+
expect(results).toBeDefined()
118+
expect(Array.isArray(results)).toBe(true)
119+
120+
// The limit should be respected
121+
expect(results.length).toBeLessThanOrEqual(limit)
122+
123+
// Directory scanning should respect the overall limit
124+
const directories = results.filter((r) => r.endsWith("/"))
125+
const files = results.filter((r) => !r.endsWith("/"))
126+
expect(directories.length + files.length).toBeLessThanOrEqual(limit)
127+
})
128+
129+
it("should terminate early when directory limit is reached", async () => {
130+
const createMockDirEntry = (name: string) =>
131+
({
132+
name,
133+
isDirectory: () => true,
134+
isSymbolicLink: () => false,
135+
isFile: () => false,
136+
}) as any
137+
138+
const mockReaddir = vi.fn()
139+
vi.mocked(fs.promises).readdir = mockReaddir
140+
141+
// Mock directory structure
142+
let directoriesScanned = 0
143+
mockReaddir.mockImplementation(async (dirPath: string) => {
144+
directoriesScanned++
145+
146+
// Root directory has many subdirectories
147+
if (directoriesScanned === 1) {
148+
return Array(100)
149+
.fill(null)
150+
.map((_, i) => createMockDirEntry(`subdir${i}`))
151+
}
152+
153+
// Each subdirectory has more subdirectories
154+
return Array(50)
155+
.fill(null)
156+
.map((_, i) => createMockDirEntry(`nested${directoriesScanned}_${i}`))
157+
})
158+
159+
// Mock ripgrep to return no files (focus on directory scanning)
160+
const mockSpawn = vi.mocked(childProcess.spawn)
161+
const mockProcess = {
162+
stdout: {
163+
on: vi.fn((event, callback) => {
164+
if (event === "data") {
165+
setTimeout(() => callback(""), 10)
166+
}
167+
}),
168+
},
169+
stderr: {
170+
on: vi.fn(),
171+
},
172+
on: vi.fn((event, callback) => {
173+
if (event === "close") {
174+
setTimeout(() => callback(0), 20)
175+
}
176+
}),
177+
kill: vi.fn(),
178+
}
179+
mockSpawn.mockReturnValue(mockProcess as any)
180+
181+
// Call listFiles with a small limit
182+
const limit = 10
183+
const [results, limitReached] = await listFiles("/test/project", true, limit)
184+
185+
// Verify results respect the limit
186+
expect(results.length).toBeLessThanOrEqual(limit)
187+
expect(limitReached).toBe(true)
188+
189+
// Verify that directory scanning terminated early
190+
// Without the fix, this would scan thousands of directories
191+
// With the fix, it should stop after reaching the limit
192+
const directories = results.filter((r) => r.endsWith("/"))
193+
expect(directories.length).toBeLessThanOrEqual(limit)
194+
195+
// The number of readdir calls should be proportional to the limit, not unbounded
196+
// This ensures we're not scanning the entire tree before applying the limit
197+
expect(directoriesScanned).toBeLessThan(limit * 10) // Allow some overhead but not excessive
198+
})
199+
200+
it("should handle zero limit gracefully", async () => {
201+
// This test is already in the original spec but let's ensure it works with our changes
202+
const [results, limitReached] = await listFiles("/test/path", true, 0)
203+
204+
expect(results).toEqual([])
205+
expect(limitReached).toBe(false)
206+
207+
// No filesystem operations should occur with zero limit
208+
expect(fs.promises.readdir).not.toHaveBeenCalled()
209+
expect(childProcess.spawn).not.toHaveBeenCalled()
210+
})
211+
212+
it("should correctly distribute limit between files and directories", async () => {
213+
const createMockDirEntry = (name: string) =>
214+
({
215+
name,
216+
isDirectory: () => true,
217+
isSymbolicLink: () => false,
218+
isFile: () => false,
219+
}) as any
220+
221+
const mockReaddir = vi.fn()
222+
vi.mocked(fs.promises).readdir = mockReaddir
223+
224+
// Mock directory with some subdirectories
225+
mockReaddir
226+
.mockResolvedValueOnce([createMockDirEntry("dir1"), createMockDirEntry("dir2"), createMockDirEntry("dir3")])
227+
.mockResolvedValue([]) // Empty subdirectories
228+
229+
// Mock ripgrep to return files
230+
const mockSpawn = vi.mocked(childProcess.spawn)
231+
const mockProcess = {
232+
stdout: {
233+
on: vi.fn((event, callback) => {
234+
if (event === "data") {
235+
// Return 8 files
236+
const files =
237+
Array(8)
238+
.fill(null)
239+
.map((_, i) => `file${i}.ts`)
240+
.join("\n") + "\n"
241+
setTimeout(() => callback(files), 10)
242+
}
243+
}),
244+
},
245+
stderr: {
246+
on: vi.fn(),
247+
},
248+
on: vi.fn((event, callback) => {
249+
if (event === "close") {
250+
setTimeout(() => callback(0), 20)
251+
}
252+
}),
253+
kill: vi.fn(),
254+
}
255+
mockSpawn.mockReturnValue(mockProcess as any)
256+
257+
// Call with limit of 10
258+
const [results, limitReached] = await listFiles("/test/project", true, 10)
259+
260+
// Should include both files and directories up to the limit
261+
expect(results.length).toBe(10)
262+
expect(limitReached).toBe(true)
263+
264+
const files = results.filter((r) => !r.endsWith("/"))
265+
const directories = results.filter((r) => r.endsWith("/"))
266+
267+
// Should have both files and directories
268+
expect(files.length).toBeGreaterThan(0)
269+
expect(directories.length).toBeGreaterThan(0)
270+
271+
// Total should equal the limit
272+
expect(files.length + directories.length).toBe(10)
273+
})
274+
})

src/services/glob/list-files.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,20 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb
5050
// For non-recursive, use the existing approach
5151
const files = await listFilesWithRipgrep(rgPath, dirPath, false, limit)
5252
const ignoreInstance = await createIgnoreInstance(dirPath)
53-
const directories = await listFilteredDirectories(dirPath, false, ignoreInstance)
53+
// Calculate remaining limit for directories
54+
const remainingLimit = Math.max(0, limit - files.length)
55+
const directories = await listFilteredDirectories(dirPath, false, ignoreInstance, remainingLimit)
5456
return formatAndCombineResults(files, directories, limit)
5557
}
5658

5759
// For recursive mode, use the original approach but ensure first-level directories are included
5860
const files = await listFilesWithRipgrep(rgPath, dirPath, true, limit)
5961
const ignoreInstance = await createIgnoreInstance(dirPath)
60-
const directories = await listFilteredDirectories(dirPath, true, ignoreInstance)
62+
// Calculate remaining limit for directories
63+
const remainingLimit = Math.max(0, limit - files.length)
64+
const directories = await listFilteredDirectories(dirPath, true, ignoreInstance, remainingLimit)
6165

62-
// Combine and check if we hit the limit
66+
// Combine and check if we hit the limits
6367
const [results, limitReached] = formatAndCombineResults(files, directories, limit)
6468

6569
// If we hit the limit, ensure all first-level directories are included
@@ -384,9 +388,12 @@ async function listFilteredDirectories(
384388
dirPath: string,
385389
recursive: boolean,
386390
ignoreInstance: ReturnType<typeof ignore>,
391+
limit?: number,
387392
): Promise<string[]> {
388393
const absolutePath = path.resolve(dirPath)
389394
const directories: string[] = []
395+
let dirCount = 0
396+
const effectiveLimit = limit ?? Number.MAX_SAFE_INTEGER
390397

391398
// For environment details generation, we don't want to treat the root as a "target"
392399
// if we're doing a general recursive scan, as this would include hidden directories
@@ -401,13 +408,23 @@ async function listFilteredDirectories(
401408
ignoreInstance,
402409
}
403410

404-
async function scanDirectory(currentPath: string, context: ScanContext): Promise<void> {
411+
async function scanDirectory(currentPath: string, context: ScanContext): Promise<boolean> {
412+
// Check if we've reached the limit
413+
if (dirCount >= effectiveLimit) {
414+
return true // Signal that limit was reached
415+
}
416+
405417
try {
406418
// List all entries in the current directory
407419
const entries = await fs.promises.readdir(currentPath, { withFileTypes: true })
408420

409421
// Filter for directories only, excluding symbolic links to prevent circular traversal
410422
for (const entry of entries) {
423+
// Check limit before processing each directory
424+
if (dirCount >= effectiveLimit) {
425+
return true
426+
}
427+
411428
if (entry.isDirectory() && !entry.isSymbolicLink()) {
412429
const dirName = entry.name
413430
const fullDirPath = path.join(currentPath, dirName)
@@ -425,6 +442,12 @@ async function listFilteredDirectories(
425442
// fullDirPath is already absolute since it's built with path.join from absolutePath
426443
const formattedPath = fullDirPath.endsWith("/") ? fullDirPath : `${fullDirPath}/`
427444
directories.push(formattedPath)
445+
dirCount++
446+
447+
// Check if we've reached the limit after adding
448+
if (dirCount >= effectiveLimit) {
449+
return true
450+
}
428451
}
429452

430453
// If recursive mode and not a ignored directory, scan subdirectories
@@ -461,14 +484,19 @@ async function listFilteredDirectories(
461484
isTargetDir: false,
462485
insideExplicitHiddenTarget: newInsideExplicitHiddenTarget,
463486
}
464-
await scanDirectory(fullDirPath, newContext)
487+
const limitReached = await scanDirectory(fullDirPath, newContext)
488+
if (limitReached) {
489+
return true
490+
}
465491
}
466492
}
467493
}
468494
} catch (err) {
469495
// Continue if we can't read a directory
470496
console.warn(`Could not read directory ${currentPath}: ${err}`)
471497
}
498+
499+
return false // Limit not reached
472500
}
473501

474502
// Start scanning from the root directory

0 commit comments

Comments
 (0)