Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions src/services/glob/__tests__/list-files-limit.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
import * as path from "path"
import * as fs from "fs"
import { listFiles } from "../list-files"

// Mock ripgrep
vi.mock("../../ripgrep", () => ({
getBinPath: vi.fn().mockResolvedValue("/mock/path/to/rg"),
}))

// Mock vscode
vi.mock("vscode", () => ({
env: {
appRoot: "/mock/app/root",
},
}))

// Mock child_process
vi.mock("child_process", () => ({
spawn: vi.fn(),
}))

vi.mock("../../../utils/path", () => ({
arePathsEqual: vi.fn().mockReturnValue(false),
}))

import * as childProcess from "child_process"

describe("listFiles limit handling for large projects", () => {
beforeEach(() => {
vi.clearAllMocks()
})

it("should prevent stack overflow when scanning large projects", async () => {
// This test verifies that the fix prevents the "Maximum call stack size exceeded" error
// by ensuring the function completes successfully even with a very large directory structure

const createMockDirEntry = (name: string) =>
({
name,
isDirectory: () => true,
isSymbolicLink: () => false,
isFile: () => false,
}) as any

const mockReaddir = vi.fn()
vi.mocked(fs.promises).readdir = mockReaddir

// Simulate a project with 200k+ items (as mentioned in the issue)
// Create a broad directory tree that would cause stack overflow without proper limits
let callCount = 0
const maxDepth = 100 // Simulate deep nesting
mockReaddir.mockImplementation(async () => {
callCount++
if (callCount > maxDepth) {
return []
}
// Return many subdirectories at each level to simulate a large codebase
return Array(50)
.fill(null)
.map((_, i) => createMockDirEntry(`dir${callCount}_${i}`))
})

// Mock ripgrep to return many files
const mockSpawn = vi.mocked(childProcess.spawn)
const mockProcess = {
stdout: {
on: vi.fn((event, callback) => {
if (event === "data") {
// Return many files to simulate large project
const files =
Array(10000)
.fill(null)
.map((_, i) => `file${i}.ts`)
.join("\n") + "\n"
setTimeout(() => callback(files), 10)
}
}),
},
stderr: {
on: vi.fn(),
},
on: vi.fn((event, callback) => {
if (event === "close") {
setTimeout(() => callback(0), 20)
}
}),
kill: vi.fn(),
}
mockSpawn.mockReturnValue(mockProcess as any)

// Call listFiles with a limit that would be used in code indexing
const limit = 50_000 // MAX_LIST_FILES_LIMIT_CODE_INDEX value

// The key test: this should complete without throwing a stack overflow error
let error: Error | null = null
let results: string[] = []
let limitReached = false

try {
const startTime = Date.now()
const [res, didHitLimit] = await listFiles("/test/large-project", true, limit)
const endTime = Date.now()

results = res
limitReached = didHitLimit

// Should complete in reasonable time
expect(endTime - startTime).toBeLessThan(10000) // 10 seconds max
} catch (e) {
error = e as Error
}

// Main assertion: no stack overflow error should occur
expect(error).toBeNull()

// The function should return valid results
expect(results).toBeDefined()
expect(Array.isArray(results)).toBe(true)

// The limit should be respected
expect(results.length).toBeLessThanOrEqual(limit)

// Directory scanning should respect the overall limit
const directories = results.filter((r) => r.endsWith("/"))
const files = results.filter((r) => !r.endsWith("/"))
expect(directories.length + files.length).toBeLessThanOrEqual(limit)
})

it("should terminate early when directory limit is reached", async () => {
const createMockDirEntry = (name: string) =>
({
name,
isDirectory: () => true,
isSymbolicLink: () => false,
isFile: () => false,
}) as any

const mockReaddir = vi.fn()
vi.mocked(fs.promises).readdir = mockReaddir

// Mock directory structure
let directoriesScanned = 0
mockReaddir.mockImplementation(async (dirPath: string) => {
directoriesScanned++

// Root directory has many subdirectories
if (directoriesScanned === 1) {
return Array(100)
.fill(null)
.map((_, i) => createMockDirEntry(`subdir${i}`))
}

// Each subdirectory has more subdirectories
return Array(50)
.fill(null)
.map((_, i) => createMockDirEntry(`nested${directoriesScanned}_${i}`))
})

// Mock ripgrep to return no files (focus on directory scanning)
const mockSpawn = vi.mocked(childProcess.spawn)
const mockProcess = {
stdout: {
on: vi.fn((event, callback) => {
if (event === "data") {
setTimeout(() => callback(""), 10)
}
}),
},
stderr: {
on: vi.fn(),
},
on: vi.fn((event, callback) => {
if (event === "close") {
setTimeout(() => callback(0), 20)
}
}),
kill: vi.fn(),
}
mockSpawn.mockReturnValue(mockProcess as any)

// Call listFiles with a small limit
const limit = 10
const [results, limitReached] = await listFiles("/test/project", true, limit)

// Verify results respect the limit
expect(results.length).toBeLessThanOrEqual(limit)
expect(limitReached).toBe(true)

// Verify that directory scanning terminated early
// Without the fix, this would scan thousands of directories
// With the fix, it should stop after reaching the limit
const directories = results.filter((r) => r.endsWith("/"))
expect(directories.length).toBeLessThanOrEqual(limit)

// The number of readdir calls should be proportional to the limit, not unbounded
// This ensures we're not scanning the entire tree before applying the limit
expect(directoriesScanned).toBeLessThan(limit * 10) // Allow some overhead but not excessive
})

it("should handle zero limit gracefully", async () => {
// This test is already in the original spec but let's ensure it works with our changes
const [results, limitReached] = await listFiles("/test/path", true, 0)

expect(results).toEqual([])
expect(limitReached).toBe(false)

// No filesystem operations should occur with zero limit
expect(fs.promises.readdir).not.toHaveBeenCalled()
expect(childProcess.spawn).not.toHaveBeenCalled()
})

it("should correctly distribute limit between files and directories", async () => {
const createMockDirEntry = (name: string) =>
({
name,
isDirectory: () => true,
isSymbolicLink: () => false,
isFile: () => false,
}) as any

const mockReaddir = vi.fn()
vi.mocked(fs.promises).readdir = mockReaddir

// Mock directory with some subdirectories
mockReaddir
.mockResolvedValueOnce([createMockDirEntry("dir1"), createMockDirEntry("dir2"), createMockDirEntry("dir3")])
.mockResolvedValue([]) // Empty subdirectories

// Mock ripgrep to return files
const mockSpawn = vi.mocked(childProcess.spawn)
const mockProcess = {
stdout: {
on: vi.fn((event, callback) => {
if (event === "data") {
// Return 8 files
const files =
Array(8)
.fill(null)
.map((_, i) => `file${i}.ts`)
.join("\n") + "\n"
setTimeout(() => callback(files), 10)
}
}),
},
stderr: {
on: vi.fn(),
},
on: vi.fn((event, callback) => {
if (event === "close") {
setTimeout(() => callback(0), 20)
}
}),
kill: vi.fn(),
}
mockSpawn.mockReturnValue(mockProcess as any)

// Call with limit of 10
const [results, limitReached] = await listFiles("/test/project", true, 10)

// Should include both files and directories up to the limit
expect(results.length).toBe(10)
expect(limitReached).toBe(true)

const files = results.filter((r) => !r.endsWith("/"))
const directories = results.filter((r) => r.endsWith("/"))

// Should have both files and directories
expect(files.length).toBeGreaterThan(0)
expect(directories.length).toBeGreaterThan(0)

// Total should equal the limit
expect(files.length + directories.length).toBe(10)
})
})
38 changes: 33 additions & 5 deletions src/services/glob/list-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,20 @@ export async function listFiles(dirPath: string, recursive: boolean, limit: numb
// For non-recursive, use the existing approach
const files = await listFilesWithRipgrep(rgPath, dirPath, false, limit)
const ignoreInstance = await createIgnoreInstance(dirPath)
const directories = await listFilteredDirectories(dirPath, false, ignoreInstance)
// Calculate remaining limit for directories
const remainingLimit = Math.max(0, limit - files.length)
const directories = await listFilteredDirectories(dirPath, false, ignoreInstance, remainingLimit)
return formatAndCombineResults(files, directories, limit)
}

// For recursive mode, use the original approach but ensure first-level directories are included
const files = await listFilesWithRipgrep(rgPath, dirPath, true, limit)
const ignoreInstance = await createIgnoreInstance(dirPath)
const directories = await listFilteredDirectories(dirPath, true, ignoreInstance)
// Calculate remaining limit for directories
const remainingLimit = Math.max(0, limit - files.length)
const directories = await listFilteredDirectories(dirPath, true, ignoreInstance, remainingLimit)

// Combine and check if we hit the limit
// Combine and check if we hit the limits
const [results, limitReached] = formatAndCombineResults(files, directories, limit)

// If we hit the limit, ensure all first-level directories are included
Expand Down Expand Up @@ -384,9 +388,12 @@ async function listFilteredDirectories(
dirPath: string,
recursive: boolean,
ignoreInstance: ReturnType<typeof ignore>,
limit?: number,
): Promise<string[]> {
const absolutePath = path.resolve(dirPath)
const directories: string[] = []
let dirCount = 0
const effectiveLimit = limit ?? Number.MAX_SAFE_INTEGER

// For environment details generation, we don't want to treat the root as a "target"
// if we're doing a general recursive scan, as this would include hidden directories
Expand All @@ -401,13 +408,23 @@ async function listFilteredDirectories(
ignoreInstance,
}

async function scanDirectory(currentPath: string, context: ScanContext): Promise<void> {
async function scanDirectory(currentPath: string, context: ScanContext): Promise<boolean> {
// Check if we've reached the limit
if (dirCount >= effectiveLimit) {
return true // Signal that limit was reached
}

try {
// List all entries in the current directory
const entries = await fs.promises.readdir(currentPath, { withFileTypes: true })

// Filter for directories only, excluding symbolic links to prevent circular traversal
for (const entry of entries) {
// Check limit before processing each directory
if (dirCount >= effectiveLimit) {
return true
}

if (entry.isDirectory() && !entry.isSymbolicLink()) {
const dirName = entry.name
const fullDirPath = path.join(currentPath, dirName)
Expand All @@ -425,6 +442,12 @@ async function listFilteredDirectories(
// fullDirPath is already absolute since it's built with path.join from absolutePath
const formattedPath = fullDirPath.endsWith("/") ? fullDirPath : `${fullDirPath}/`
directories.push(formattedPath)
dirCount++

// Check if we've reached the limit after adding
if (dirCount >= effectiveLimit) {
return true
}
}

// If recursive mode and not a ignored directory, scan subdirectories
Expand Down Expand Up @@ -461,14 +484,19 @@ async function listFilteredDirectories(
isTargetDir: false,
insideExplicitHiddenTarget: newInsideExplicitHiddenTarget,
}
await scanDirectory(fullDirPath, newContext)
const limitReached = await scanDirectory(fullDirPath, newContext)
if (limitReached) {
return true
}
}
}
}
} catch (err) {
// Continue if we can't read a directory
console.warn(`Could not read directory ${currentPath}: ${err}`)
}

return false // Limit not reached
}

// Start scanning from the root directory
Expand Down
Loading