Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions tsdb/engine/tsm1/compact_common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package tsm1_test

import (
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
"time"
)

type TestLevelResults struct {
level1Groups []tsm1.PlannedCompactionGroup
level2Groups []tsm1.PlannedCompactionGroup
level3Groups []tsm1.PlannedCompactionGroup
level4Groups []tsm1.PlannedCompactionGroup
level5Groups []tsm1.PlannedCompactionGroup
}

type TestEnginePlanCompactionsRunner struct {
name string
files []tsm1.ExtFileStat
defaultBlockCount int // Default block count if member of files has FirstBlockCount of 0.
// This is specifically used to adjust the modification time
// so we can simulate the passage of time in tests
testShardTime time.Duration
// Each result is for the different plantypes
expectedResult func() TestLevelResults
}
173 changes: 173 additions & 0 deletions tsdb/engine/tsm1/compact_property_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package tsm1_test

import (
"errors"
"fmt"
"sort"

"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)

// CompactionProperty represents a property that compaction groups should satisfy
type CompactionProperty struct {
Name string
Description string
Validator func(allFiles []string, groups []tsm1.CompactionGroup) error
}

// AdjacentFileProperty validates that compaction groups don't create gaps
var AdjacentFileProperty = CompactionProperty{
Name: "No Gaps Rule",
Description: "Files should not have gaps within the same compaction level - if files A and C are in the same level group, any file B between them should also be in a group at the same level or higher",
Validator: validateNoGaps,
}

// parseFileName uses the existing TSM file parsing functionality
func parseFileName(filename string) (generation int, sequence int, err error) {
return tsm1.DefaultParseFileName(filename)
}

// fileInfo holds parsed file information for sorting
type fileInfo struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for the wrapping struct?

filename string
generation int
sequence int
}

// validateNoGaps checks that there are no gaps within the same compaction level
// A gap occurs when files A and C are in the same level group, but file B (between A and C)
// is in a different level group, creating a "hole" in the sequence
func validateNoGaps(allFiles []string, groups []tsm1.CompactionGroup) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the algorithm in here is strange; why did you write it this way? I believe it works but it seems more complicated than it needs to be.

It also sorts the files that are in the input CompactionGroups... which hides errors of files being out of order in the group (yes this should get caught elsewhere too).

I don't understand why the algorithm isn't:

  1. parse the master file list filenames to get gens and seqs
  2. sort by generation and then seq within generation into a "sorted master slice"
  3. invert slice to a map[filename]->index in sorted master list
  4. for each group, for each file in the group check if file[i] has an index in the master map that is -1 from file[i+1] in the master map.

This will fail if the group's files aren't sorted or if the group's files aren't adjacent.

e.g.
sorted master files list -> index in sorted slice

0-5 -> 0
1-5 -> 1
2-4 -> 2
3-4 -> 3
4-4 -> 4
5-2 -> 5
6-1 -> 6
7-1 -> 7

Any group has to be a strict subset.

Group0: <-- PASS
1-5 -> 1
2-4 -> 2

Group1:
3-4 -> 3
4-4 -> 4
6-1 -> 6 <--- failure. index 6 is not 4+1. Adjacency failure.

Group2:
6-1 -> 6
5-2 -> 5 <--- failure. index 5 is not 6+1; group file ordering failure.

For the Group1, it'd be easy to print the files in between because you have the index 4 and index 6, so you print the file at index 5 e.g.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nice. I just noticed that @davidby-influx proposed exactly the same algorithm 6 hours ago in his comment.

// Parse all files and sort them by generation, then by sequence
var fileInfos []fileInfo
for _, file := range allFiles {
gen, seq, err := parseFileName(file)
if err != nil {
return fmt.Errorf("failed to parse file %s: %v", file, err)
}
fileInfos = append(fileInfos, fileInfo{
filename: file,
generation: gen,
sequence: seq,
})
}

// Sort by generation first, then by sequence
sort.Slice(fileInfos, func(i, j int) bool {
if fileInfos[i].generation != fileInfos[j].generation {
return fileInfos[i].generation < fileInfos[j].generation
}
return fileInfos[i].sequence < fileInfos[j].sequence
})

// Create a map of filename to group index
fileToGroup := make(map[string]int)
for groupIdx, group := range groups {
for _, file := range group {
fileToGroup[file] = groupIdx
}
}

// Group files by their compaction group
groupToFiles := make(map[int][]fileInfo)
for _, info := range fileInfos {
if groupIdx, inGroup := fileToGroup[info.filename]; inGroup {
groupToFiles[groupIdx] = append(groupToFiles[groupIdx], info)
}
}

// For each group, check if there are gaps in the file sequence
for groupIdx, filesInGroup := range groupToFiles {
if len(filesInGroup) < 2 {
continue // No gaps possible with less than 2 files
}

// Check for gaps between files in this group
for i := 0; i < len(filesInGroup)-1; i++ {
current := filesInGroup[i]
next := filesInGroup[i+1]

// Find all files between current and next in the sorted sequence
currentPos := findFilePosition(current, fileInfos)
nextPos := findFilePosition(next, fileInfos)

// Check if there are files between current and next that are in different groups
for pos := currentPos + 1; pos < nextPos; pos++ {
betweenFile := fileInfos[pos]
betweenGroup, inGroup := fileToGroup[betweenFile.filename]

if inGroup && betweenGroup != groupIdx {
return fmt.Errorf("gap detected: files %s and %s are in group %d, but file %s (between them) is in group %d",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expected results: No Gaps Rule violation: gap detected: files 04-04.tsm and 09-04.tsm are in group 1, but file 05-03.tsm (between them) is in group 0

This error message is tough as the group indices don't tell me anything about which level group it came from. I think it should know which level group its talking about.

also: "no gaps rule" is odd description of the property. Maybe its me: the rule is that tsm files within a group must be adjacent in the master list of files. "Adjacency rule" seems more accurate. a "gap" sounds like a missing tsm file instead of one in the wrong place.

Copy link
Contributor

@philjb philjb Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also think the validation should tell me if the problem is in the expected results (aka I made a bad test case) or in the output of the compaction planner (the compaction planner is wrong). I know if the expected results are correct and valid and the compaction planner produces a bad plan, the test case will still fail, but more information is always helpful.

nvm -- i see that it does this!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, it'd be nice if all instances of the issues were reported instead of stopping at the first adjacency rule violation.

current.filename, next.filename, groupIdx, betweenFile.filename, betweenGroup)
}
}
}
}

return nil
}

// findFilePosition returns the position of a file in the sorted fileInfos slice
func findFilePosition(target fileInfo, fileInfos []fileInfo) int {
for i, info := range fileInfos {
if info.filename == target.filename {
return i
}
}
return -1
}

// ValidateCompactionProperties validates that compaction results satisfy all properties
func ValidateCompactionProperties(allFiles []string, results TestLevelResults, properties ...CompactionProperty) error {
var errs []error

// Collect all compaction groups from results
var allGroups []tsm1.CompactionGroup
allGroups = append(allGroups, extractGroups(results.level1Groups)...)
allGroups = append(allGroups, extractGroups(results.level2Groups)...)
allGroups = append(allGroups, extractGroups(results.level3Groups)...)
allGroups = append(allGroups, extractGroups(results.level4Groups)...)
allGroups = append(allGroups, extractGroups(results.level5Groups)...)

// Validate each property
for _, property := range properties {
if err := property.Validator(allFiles, allGroups); err != nil {
errs = append(errs, fmt.Errorf("%s violation: %v", property.Name, err))
}
}

return errors.Join(errs...)
}

// extractGroups extracts CompactionGroup from PlannedCompactionGroup
func extractGroups(plannedGroups []tsm1.PlannedCompactionGroup) []tsm1.CompactionGroup {
var groups []tsm1.CompactionGroup
for _, planned := range plannedGroups {
groups = append(groups, planned.Group)
}
return groups
}

// ValidateTestCase validates both expected results and actual planner output
func ValidateTestCase(testCase TestEnginePlanCompactionsRunner, actualResults TestLevelResults) error {
var errs []error

// Extract all filenames from test case
var allFiles []string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since you're going to touch this for the comment change, please preallocate this since you know how big it will be. ci pays for allocations too.

for _, file := range testCase.files {
allFiles = append(allFiles, file.Path)
}

// Validate expected results
expectedResults := testCase.expectedResult()
if expectedErr := ValidateCompactionProperties(allFiles, expectedResults, AdjacentFileProperty); expectedErr != nil {
errs = append(errs, fmt.Errorf("expected results: %v", expectedErr))
}

// Validate actual results
if actualErr := ValidateCompactionProperties(allFiles, actualResults, AdjacentFileProperty); actualErr != nil {
errs = append(errs, fmt.Errorf("actual results: %v", actualErr))
}

return errors.Join(errs...)
}
Loading