-
Notifications
You must be signed in to change notification settings - Fork 3.6k
feat: Adds property based testing framework for compaction #26783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2a73c97
b708182
8d241d9
c14a416
a149a65
f3498c0
5ace584
061ade9
36e8106
0b7c018
1320743
817317e
98d0c6a
912765e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package tsm1_test | ||
|
||
import ( | ||
"github.com/influxdata/influxdb/tsdb/engine/tsm1" | ||
"time" | ||
) | ||
|
||
type TestLevelResults struct { | ||
level1Groups []tsm1.PlannedCompactionGroup | ||
level2Groups []tsm1.PlannedCompactionGroup | ||
level3Groups []tsm1.PlannedCompactionGroup | ||
level4Groups []tsm1.PlannedCompactionGroup | ||
level5Groups []tsm1.PlannedCompactionGroup | ||
} | ||
|
||
type TestEnginePlanCompactionsRunner struct { | ||
name string | ||
files []tsm1.ExtFileStat | ||
defaultBlockCount int // Default block count if member of files has FirstBlockCount of 0. | ||
// This is specifically used to adjust the modification time | ||
// so we can simulate the passage of time in tests | ||
testShardTime time.Duration | ||
// Each result is for the different plantypes | ||
expectedResult func() TestLevelResults | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
package tsm1_test | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"sort" | ||
|
||
"github.com/influxdata/influxdb/tsdb/engine/tsm1" | ||
) | ||
|
||
// CompactionProperty represents a property that compaction groups should satisfy | ||
type CompactionProperty struct { | ||
Name string | ||
Description string | ||
Validator func(allFiles []string, groups []tsm1.CompactionGroup) error | ||
} | ||
|
||
// AdjacentFileProperty validates that compaction groups don't create gaps | ||
var AdjacentFileProperty = CompactionProperty{ | ||
Name: "No Gaps Rule", | ||
Description: "Files should not have gaps within the same compaction level - if files A and C are in the same level group, any file B between them should also be in a group at the same level or higher", | ||
Validator: validateNoGaps, | ||
} | ||
|
||
// parseFileName uses the existing TSM file parsing functionality | ||
func parseFileName(filename string) (generation int, sequence int, err error) { | ||
return tsm1.DefaultParseFileName(filename) | ||
} | ||
|
||
// fileInfo holds parsed file information for sorting | ||
type fileInfo struct { | ||
filename string | ||
generation int | ||
sequence int | ||
} | ||
|
||
// validateNoGaps checks that there are no gaps within the same compaction level | ||
// A gap occurs when files A and C are in the same level group, but file B (between A and C) | ||
// is in a different level group, creating a "hole" in the sequence | ||
func validateNoGaps(allFiles []string, groups []tsm1.CompactionGroup) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the algorithm in here is strange; why did you write it this way? I believe it works but it seems more complicated than it needs to be. It also sorts the files that are in the input CompactionGroups... which hides errors of files being out of order in the group (yes this should get caught elsewhere too). I don't understand why the algorithm isn't:
This will fail if the group's files aren't sorted or if the group's files aren't adjacent. e.g.
Any group has to be a strict subset. Group0: <-- PASS Group1: Group2: For the Group1, it'd be easy to print the files in between because you have the index 4 and index 6, so you print the file at index 5 e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh nice. I just noticed that @davidby-influx proposed exactly the same algorithm 6 hours ago in his comment. |
||
// Parse all files and sort them by generation, then by sequence | ||
var fileInfos []fileInfo | ||
for _, file := range allFiles { | ||
gen, seq, err := parseFileName(file) | ||
if err != nil { | ||
return fmt.Errorf("failed to parse file %s: %v", file, err) | ||
} | ||
fileInfos = append(fileInfos, fileInfo{ | ||
filename: file, | ||
generation: gen, | ||
sequence: seq, | ||
}) | ||
} | ||
|
||
// Sort by generation first, then by sequence | ||
sort.Slice(fileInfos, func(i, j int) bool { | ||
if fileInfos[i].generation != fileInfos[j].generation { | ||
return fileInfos[i].generation < fileInfos[j].generation | ||
} | ||
return fileInfos[i].sequence < fileInfos[j].sequence | ||
}) | ||
|
||
// Create a map of filename to group index | ||
fileToGroup := make(map[string]int) | ||
for groupIdx, group := range groups { | ||
for _, file := range group { | ||
fileToGroup[file] = groupIdx | ||
} | ||
} | ||
|
||
// Group files by their compaction group | ||
groupToFiles := make(map[int][]fileInfo) | ||
for _, info := range fileInfos { | ||
if groupIdx, inGroup := fileToGroup[info.filename]; inGroup { | ||
groupToFiles[groupIdx] = append(groupToFiles[groupIdx], info) | ||
} | ||
} | ||
|
||
// For each group, check if there are gaps in the file sequence | ||
for groupIdx, filesInGroup := range groupToFiles { | ||
if len(filesInGroup) < 2 { | ||
continue // No gaps possible with less than 2 files | ||
} | ||
|
||
// Check for gaps between files in this group | ||
for i := 0; i < len(filesInGroup)-1; i++ { | ||
current := filesInGroup[i] | ||
next := filesInGroup[i+1] | ||
|
||
// Find all files between current and next in the sorted sequence | ||
currentPos := findFilePosition(current, fileInfos) | ||
nextPos := findFilePosition(next, fileInfos) | ||
|
||
// Check if there are files between current and next that are in different groups | ||
for pos := currentPos + 1; pos < nextPos; pos++ { | ||
betweenFile := fileInfos[pos] | ||
betweenGroup, inGroup := fileToGroup[betweenFile.filename] | ||
|
||
if inGroup && betweenGroup != groupIdx { | ||
return fmt.Errorf("gap detected: files %s and %s are in group %d, but file %s (between them) is in group %d", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This error message is tough as the group indices don't tell me anything about which level group it came from. I think it should know which level group its talking about. also: "no gaps rule" is odd description of the property. Maybe its me: the rule is that tsm files within a group must be adjacent in the master list of files. "Adjacency rule" seems more accurate. a "gap" sounds like a missing tsm file instead of one in the wrong place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
nvm -- i see that it does this! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, it'd be nice if all instances of the issues were reported instead of stopping at the first adjacency rule violation. |
||
current.filename, next.filename, groupIdx, betweenFile.filename, betweenGroup) | ||
} | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// findFilePosition returns the position of a file in the sorted fileInfos slice | ||
func findFilePosition(target fileInfo, fileInfos []fileInfo) int { | ||
for i, info := range fileInfos { | ||
if info.filename == target.filename { | ||
return i | ||
} | ||
} | ||
return -1 | ||
} | ||
|
||
// ValidateCompactionProperties validates that compaction results satisfy all properties | ||
func ValidateCompactionProperties(allFiles []string, results TestLevelResults, properties ...CompactionProperty) error { | ||
var errs []error | ||
|
||
// Collect all compaction groups from results | ||
var allGroups []tsm1.CompactionGroup | ||
allGroups = append(allGroups, extractGroups(results.level1Groups)...) | ||
allGroups = append(allGroups, extractGroups(results.level2Groups)...) | ||
allGroups = append(allGroups, extractGroups(results.level3Groups)...) | ||
allGroups = append(allGroups, extractGroups(results.level4Groups)...) | ||
allGroups = append(allGroups, extractGroups(results.level5Groups)...) | ||
|
||
// Validate each property | ||
for _, property := range properties { | ||
if err := property.Validator(allFiles, allGroups); err != nil { | ||
errs = append(errs, fmt.Errorf("%s violation: %v", property.Name, err)) | ||
} | ||
} | ||
|
||
return errors.Join(errs...) | ||
} | ||
|
||
// extractGroups extracts CompactionGroup from PlannedCompactionGroup | ||
func extractGroups(plannedGroups []tsm1.PlannedCompactionGroup) []tsm1.CompactionGroup { | ||
var groups []tsm1.CompactionGroup | ||
for _, planned := range plannedGroups { | ||
groups = append(groups, planned.Group) | ||
} | ||
return groups | ||
} | ||
|
||
// ValidateTestCase validates both expected results and actual planner output | ||
func ValidateTestCase(testCase TestEnginePlanCompactionsRunner, actualResults TestLevelResults) error { | ||
var errs []error | ||
|
||
// Extract all filenames from test case | ||
var allFiles []string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since you're going to touch this for the comment change, please preallocate this since you know how big it will be. ci pays for allocations too. |
||
for _, file := range testCase.files { | ||
allFiles = append(allFiles, file.Path) | ||
} | ||
|
||
// Validate expected results | ||
expectedResults := testCase.expectedResult() | ||
if expectedErr := ValidateCompactionProperties(allFiles, expectedResults, AdjacentFileProperty); expectedErr != nil { | ||
errs = append(errs, fmt.Errorf("expected results: %v", expectedErr)) | ||
} | ||
|
||
// Validate actual results | ||
if actualErr := ValidateCompactionProperties(allFiles, actualResults, AdjacentFileProperty); actualErr != nil { | ||
errs = append(errs, fmt.Errorf("actual results: %v", actualErr)) | ||
} | ||
|
||
return errors.Join(errs...) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason for the wrapping
struct
?