Skip to content

Commit a3c51bc

Browse files
authored
feat: modularize prompt resolution with new promptFile (#395)
* feat: modularize prompt resolution with new promptFile * fix: prevent simultaneous use of prompt and promptFile in ScriptStep
1 parent 4c02371 commit a3c51bc

File tree

4 files changed

+69
-28
lines changed

4 files changed

+69
-28
lines changed

k8s-bench/eval.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,14 @@ func (x *TaskExecution) runAgent(ctx context.Context) error {
393393
go func() {
394394
// TODO: Wait for idle between sending steps?
395395
for _, step := range x.task.Script {
396-
fmt.Fprintf(stdinWriter, "%s\n", step.Prompt)
396+
prompt, err := step.ResolvePrompt(x.taskDir)
397+
if err != nil {
398+
fmt.Fprintf(os.Stderr, "Error resolving prompt: %v\n", err)
399+
x.result.AddFailure("failed to resolve prompt: %v", err)
400+
stdinWriter.Close()
401+
return
402+
}
403+
fmt.Fprintf(stdinWriter, "%s\n", prompt)
397404
}
398405
stdinWriter.Close()
399406
}()

k8s-bench/main.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,40 @@ const (
5353
)
5454

5555
type ScriptStep struct {
56-
Prompt string `json:"prompt"`
56+
Prompt string `json:"prompt"`
57+
PromptFile string `json:"promptFile"`
58+
}
59+
60+
// ResolvePrompt resolves the prompt from either inline or file source
61+
func (s *ScriptStep) ResolvePrompt(baseDir string) (string, error) {
62+
// Fail if both prompt and promptFile are provided to avoid confusion
63+
if s.Prompt != "" && s.PromptFile != "" {
64+
return "", fmt.Errorf("both 'prompt' and 'promptFile' are specified in script step; only one should be provided")
65+
}
66+
67+
// If promptFile is provided, read the file
68+
if s.PromptFile != "" {
69+
// If the path is relative, resolve it relative to the task directory
70+
promptPath := s.PromptFile
71+
if !filepath.IsAbs(promptPath) {
72+
promptPath = filepath.Join(baseDir, s.PromptFile)
73+
}
74+
75+
content, err := os.ReadFile(promptPath)
76+
if err != nil {
77+
return "", fmt.Errorf("failed to read prompt file %q: %w", promptPath, err)
78+
}
79+
80+
return string(content), nil
81+
}
82+
83+
// If prompt is provided, use it
84+
if s.Prompt != "" {
85+
return s.Prompt, nil
86+
}
87+
88+
// If neither is provided, return an error
89+
return "", fmt.Errorf("neither 'prompt' nor 'promptFile' is specified in script step")
5790
}
5891

5992
type Expectation struct {
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
You are a Kubernetes administrator setting up a development cluster for a team of 3 developers (alice, bob, and charlie).
2+
3+
Create a secure, multi-tenant development environment with the following requirements:
4+
5+
1. **Namespaces**: Create separate namespaces for each developer (dev-alice, dev-bob, dev-charlie) plus shared namespaces (dev-shared, staging, prod)
6+
7+
2. **RBAC Configuration**:
8+
- Each developer should have full access to their own namespace
9+
- Developers should have read-only access to the dev-shared namespace
10+
- Only cluster admins should access staging and prod
11+
- Create service accounts for each developer (alice-sa, bob-sa, charlie-sa) in their respective namespaces
12+
13+
3. **Resource Quotas**:
14+
- Each developer namespace: max 2 CPUs, 4Gi memory, 10 pods, 5 services
15+
- dev-shared namespace: max 4 CPUs, 8Gi memory, 20 pods, 10 services
16+
- staging/prod: max 8 CPUs, 16Gi memory, 50 pods, 20 services
17+
18+
4. **Network Policies**:
19+
- Developers can only access their own namespace and dev-shared
20+
- Block cross-developer namespace communication
21+
- Allow all namespaces to access DNS and system services
22+
- staging and prod should be completely isolated from dev namespaces
23+
24+
5. **Default Deny Policies**: Implement default deny network policies for all namespaces except system namespaces
25+
26+
Ensure all configurations follow principle of least privilege and provide appropriate isolation between environments.

k8s-bench/tasks/setup-dev-cluster/task.yaml

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,5 @@
11
script:
2-
- prompt: |
3-
You are a Kubernetes administrator setting up a development cluster for a team of 3 developers (alice, bob, and charlie).
4-
Create a secure, multi-tenant development environment with the following requirements:
5-
6-
1. **Namespaces**: Create separate namespaces for each developer (dev-alice, dev-bob, dev-charlie) plus shared namespaces (dev-shared, staging, prod)
7-
8-
2. **RBAC Configuration**:
9-
- Each developer should have full access to their own namespace
10-
- Developers should have read-only access to the dev-shared namespace
11-
- Only cluster admins should access staging and prod
12-
- Create service accounts for each developer (alice-sa, bob-sa, charlie-sa) in their respective namespaces
13-
14-
3. **Resource Quotas**:
15-
- Each developer namespace: max 2 CPUs, 4Gi memory, 10 pods, 5 services
16-
- dev-shared namespace: max 4 CPUs, 8Gi memory, 20 pods, 10 services
17-
- staging/prod: max 8 CPUs, 16Gi memory, 50 pods, 20 services
18-
19-
4. **Network Policies**:
20-
- Developers can only access their own namespace and dev-shared
21-
- Block cross-developer namespace communication
22-
- Allow all namespaces to access DNS and system services
23-
- staging and prod should be completely isolated from dev namespaces
24-
25-
5. **Default Deny Policies**: Implement default deny network policies for all namespaces except system namespaces
26-
27-
Ensure all configurations follow principle of least privilege and provide appropriate isolation between environments.
2+
- promptFile: setup-dev-cluster.md
283

294
difficulty: hard
305
setup: setup.sh

0 commit comments

Comments
 (0)