Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ require (
k8s.io/client-go v0.32.3
)

require github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250805165836-14e16b51b910
require github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250811125322-247815da58ca

require (
cuelang.org/go v0.11.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ github.com/grafana/grafana-app-sdk/logging v0.35.1 h1:taVpl+RoixTYl0JBJGhH+fPVmw
github.com/grafana/grafana-app-sdk/logging v0.35.1/go.mod h1:Y/bvbDhBiV/tkIle9RW49pgfSPIPSON8Q4qjx3pyqDk=
github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250805165836-14e16b51b910 h1:2OfDIhMtXWWVQcDp9cq/VMSBOJJfDek9450rcsV+qLg=
github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250805165836-14e16b51b910/go.mod h1:EL/5hluCvj6EDjkUfoClLKSKDoCoDowZUety28jhxQI=
github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250811125322-247815da58ca h1:GVzyCTi3rqvjK42b++lFjabG2zsrLvyAbbR43dWP6s0=
github.com/grafana/grafana-asserts-public-clients/go/gcom v0.0.0-20250811125322-247815da58ca/go.mod h1:EL/5hluCvj6EDjkUfoClLKSKDoCoDowZUety28jhxQI=
github.com/grafana/grafana-com-public-clients/go/gcom v0.0.0-20250526074454-7ec66e02e4bb h1:rmYEnCXHNQbRsuzc5jCX5qkBqFF37c5RCHlyqAAPJZo=
github.com/grafana/grafana-com-public-clients/go/gcom v0.0.0-20250526074454-7ec66e02e4bb/go.mod h1:sYWkB3NhyirQJfy3wtNQ29UYjoHbRlJlYhqN1jNsC5g=
github.com/grafana/grafana-openapi-client-go v0.0.0-20250617151817-c0f8cbb88d5c h1:jox7J0BnJmcZJp8lp631u4gjDEoIfpi6O3yrpiXNTtg=
Expand Down
86 changes: 86 additions & 0 deletions internal/resources/asserts/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package asserts

import (
"context"
"fmt"
"math"
"math/rand"
"time"

"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"

assertsapi "github.com/grafana/grafana-asserts-public-clients/go/gcom"
"github.com/grafana/terraform-provider-grafana/v4/internal/common"
)

// validateAssertsClient checks if the Asserts API client is properly configured
func validateAssertsClient(meta interface{}) (*assertsapi.APIClient, int64, diag.Diagnostics) {
client := meta.(*common.Client).AssertsAPIClient
if client == nil {
return nil, 0, diag.Errorf("Asserts API client is not configured")
}

stackID := meta.(*common.Client).GrafanaStackID
if stackID == 0 {
return nil, 0, diag.Errorf("stack_id must be set in provider configuration for Asserts resources")
}

return client, stackID, nil
}

// retryReadFunc is a function that performs a read operation with retry logic
type retryReadFunc func(retryCount, maxRetries int) *retry.RetryError

// withRetryRead wraps a read operation with consistent retry logic and exponential backoff
func withRetryRead(ctx context.Context, operation retryReadFunc) error {
retryCount := 0
maxRetries := 40

// Increase overall timeout to better handle eventual consistency when
// multiple resources are created concurrently (e.g., stress tests)
return retry.RetryContext(ctx, 600*time.Second, func() *retry.RetryError {
retryCount++

// Backoff with jitter to reduce request stampeding
var baseSleep time.Duration
if retryCount == 1 {
baseSleep = 1 * time.Second
} else {
// Exponential backoff: 1s, 2s, 4s, 8s, 16s (capped at 16s)
baseSleep = time.Duration(1<<int(math.Min(float64(retryCount-2), 4))) * time.Second
}

// Apply jitter: sleep in [base/2, base]
minSleep := baseSleep / 2
maxJitter := baseSleep - minSleep
if maxJitter > 0 {
//nolint:gosec // Using math/rand for jitter in retry logic, not cryptographic purposes
j := time.Duration(rand.Int63n(int64(maxJitter)))
time.Sleep(minSleep + j)
} else {
time.Sleep(baseSleep)
}

// Execute the operation with retry count
return operation(retryCount, maxRetries)
})
}

// createRetryableError creates a retryable error with consistent formatting
func createRetryableError(resourceType, resourceName string, retryCount, maxRetries int) *retry.RetryError {
return retry.RetryableError(fmt.Errorf("%s %s not found (attempt %d/%d)", resourceType, resourceName, retryCount, maxRetries))
}

// createNonRetryableError creates a non-retryable error with consistent formatting
func createNonRetryableError(resourceType, resourceName string, retryCount int) *retry.RetryError {
return retry.NonRetryableError(fmt.Errorf("%s %s not found after %d retries - may indicate a permanent issue", resourceType, resourceName, retryCount))
}

// createAPIError creates a retryable or non-retryable API error based on retry count
func createAPIError(operation string, retryCount, maxRetries int, err error) *retry.RetryError {
if retryCount >= maxRetries {
return retry.NonRetryableError(fmt.Errorf("failed to %s after %d retries: %w", operation, retryCount, err))
}
return retry.RetryableError(fmt.Errorf("failed to %s: %w", operation, err))
}
88 changes: 25 additions & 63 deletions internal/resources/asserts/resource_alert_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ package asserts
import (
"context"
"fmt"
"math"

"time"

"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
Expand Down Expand Up @@ -70,28 +67,23 @@ func makeResourceAlertConfig() *common.Resource {
).WithLister(assertsListerFunction(listAlertConfigs))
}

func resourceAlertConfigCreate(ctx context.Context, d *schema.ResourceData, meta any) diag.Diagnostics {
client := meta.(*common.Client).AssertsAPIClient
if client == nil {
return diag.Errorf("Asserts API client is not configured")
}

stackID := meta.(*common.Client).GrafanaStackID
if stackID == 0 {
return diag.Errorf("stack_id must be set in provider configuration for Asserts resources")
func resourceAlertConfigCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, stackID, diags := validateAssertsClient(meta)
if diags.HasError() {
return diags
}
name := d.Get("name").(string)
matchLabels := make(map[string]string)
alertLabels := make(map[string]string)

if v, ok := d.GetOk("match_labels"); ok {
for k, val := range v.(map[string]any) {
for k, val := range v.(map[string]interface{}) {
matchLabels[k] = val.(string)
}
}

if v, ok := d.GetOk("alert_labels"); ok {
for k, val := range v.(map[string]any) {
for k, val := range v.(map[string]interface{}) {
alertLabels[k] = val.(string)
}
}
Expand Down Expand Up @@ -136,42 +128,23 @@ func resourceAlertConfigCreate(ctx context.Context, d *schema.ResourceData, meta
return resourceAlertConfigRead(ctx, d, meta)
}

func resourceAlertConfigRead(ctx context.Context, d *schema.ResourceData, meta any) diag.Diagnostics {
client := meta.(*common.Client).AssertsAPIClient
if client == nil {
return diag.Errorf("Asserts API client is not configured")
}

stackID := meta.(*common.Client).GrafanaStackID
if stackID == 0 {
return diag.Errorf("stack_id must be set in provider configuration for Asserts resources")
func resourceAlertConfigRead(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, stackID, diags := validateAssertsClient(meta)
if diags.HasError() {
return diags
}
name := d.Id()

// Retry logic for read operation to handle eventual consistency
var foundConfig *assertsapi.AlertConfigDto
retryCount := 0
maxRetries := 10
err := retry.RetryContext(ctx, 60*time.Second, func() *retry.RetryError {
retryCount++

// Exponential backoff: 1s, 2s, 4s, 8s, etc. (capped at 8s)
if retryCount > 1 {
backoffDuration := time.Duration(1<<int(math.Min(float64(retryCount-2), 3))) * time.Second
time.Sleep(backoffDuration)
}

err := withRetryRead(ctx, func(retryCount, maxRetries int) *retry.RetryError {
// Get all alert configs using the generated client API
request := client.AlertConfigurationAPI.GetAllAlertConfigs(ctx).
XScopeOrgID(fmt.Sprintf("%d", stackID))

alertConfigs, _, err := request.Execute()
if err != nil {
// If we've retried many times and still getting API errors, give up
if retryCount >= maxRetries {
return retry.NonRetryableError(fmt.Errorf("failed to get alert configurations after %d retries: %w", retryCount, err))
}
return retry.RetryableError(fmt.Errorf("failed to get alert configurations: %w", err))
return createAPIError("get alert configurations", retryCount, maxRetries, err)
}

// Find our specific config
Expand All @@ -182,12 +155,11 @@ func resourceAlertConfigRead(ctx context.Context, d *schema.ResourceData, meta a
}
}

// If we've retried many times and still not found, give up
// Check if we should give up or retry
if retryCount >= maxRetries {
return retry.NonRetryableError(fmt.Errorf("alert configuration %s not found after %d retries - may indicate a permanent issue", name, retryCount))
return createNonRetryableError("alert configuration", name, retryCount)
}

return retry.RetryableError(fmt.Errorf("alert configuration %s not found (attempt %d/%d)", name, retryCount, maxRetries))
return createRetryableError("alert configuration", name, retryCount, maxRetries)
})

if err != nil {
Expand Down Expand Up @@ -229,29 +201,24 @@ func resourceAlertConfigRead(ctx context.Context, d *schema.ResourceData, meta a
return nil
}

func resourceAlertConfigUpdate(ctx context.Context, d *schema.ResourceData, meta any) diag.Diagnostics {
client := meta.(*common.Client).AssertsAPIClient
if client == nil {
return diag.Errorf("Asserts API client is not configured")
}

stackID := meta.(*common.Client).GrafanaStackID
if stackID == 0 {
return diag.Errorf("stack_id must be set in provider configuration for Asserts resources")
func resourceAlertConfigUpdate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, stackID, diags := validateAssertsClient(meta)
if diags.HasError() {
return diags
}

name := d.Get("name").(string)
matchLabels := make(map[string]string)
alertLabels := make(map[string]string)

if v, ok := d.GetOk("match_labels"); ok {
for k, val := range v.(map[string]any) {
for k, val := range v.(map[string]interface{}) {
matchLabels[k] = val.(string)
}
}

if v, ok := d.GetOk("alert_labels"); ok {
for k, val := range v.(map[string]any) {
for k, val := range v.(map[string]interface{}) {
alertLabels[k] = val.(string)
}
}
Expand Down Expand Up @@ -294,15 +261,10 @@ func resourceAlertConfigUpdate(ctx context.Context, d *schema.ResourceData, meta
return resourceAlertConfigRead(ctx, d, meta)
}

func resourceAlertConfigDelete(ctx context.Context, d *schema.ResourceData, meta any) diag.Diagnostics {
client := meta.(*common.Client).AssertsAPIClient
if client == nil {
return diag.Errorf("Asserts API client is not configured")
}

stackID := meta.(*common.Client).GrafanaStackID
if stackID == 0 {
return diag.Errorf("stack_id must be set in provider configuration for Asserts resources")
func resourceAlertConfigDelete(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
client, stackID, diags := validateAssertsClient(meta)
if diags.HasError() {
return diags
}
name := d.Id()

Expand Down
47 changes: 29 additions & 18 deletions internal/resources/asserts/resource_alert_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"strconv"
"testing"
"time"

"github.com/grafana/terraform-provider-grafana/v4/internal/common"
"github.com/grafana/terraform-provider-grafana/v4/internal/testutils"
Expand Down Expand Up @@ -117,6 +118,7 @@ func testAccAssertsAlertConfigCheckDestroy(s *terraform.State) error {
client := testutils.Provider.Meta().(*common.Client).AssertsAPIClient
ctx := context.Background()

deadline := time.Now().Add(60 * time.Second)
for _, rs := range s.RootModule().Resources {
if rs.Type != "grafana_asserts_notification_alerts_config" {
continue
Expand All @@ -126,24 +128,37 @@ func testAccAssertsAlertConfigCheckDestroy(s *terraform.State) error {
name := rs.Primary.ID
stackID := fmt.Sprintf("%d", testutils.Provider.Meta().(*common.Client).GrafanaStackID)

// Get all alert configs
request := client.AlertConfigurationAPI.GetAllAlertConfigs(ctx).
XScopeOrgID(stackID)
for {
// Get all alert configs
request := client.AlertConfigurationAPI.GetAllAlertConfigs(ctx).
XScopeOrgID(stackID)

alertConfigs, _, err := request.Execute()
if err != nil {
// If we can't get configs, assume it's because they don't exist
if common.IsNotFoundError(err) {
break
}
return fmt.Errorf("error checking alert config destruction: %s", err)
}

alertConfigs, _, err := request.Execute()
if err != nil {
// If we can't get configs, assume it's because they don't exist
if common.IsNotFoundError(err) {
continue
// Check if our config still exists
stillExists := false
for _, config := range alertConfigs.AlertConfigs {
if config.Name != nil && *config.Name == name {
stillExists = true
break
}
}
return fmt.Errorf("error checking alert config destruction: %s", err)
}

// Check if our config still exists
for _, config := range alertConfigs.AlertConfigs {
if config.Name != nil && *config.Name == name {
if !stillExists {
break
}

if time.Now().After(deadline) {
return fmt.Errorf("alert config %s still exists", name)
}
time.Sleep(2 * time.Second)
}
}

Expand Down Expand Up @@ -208,11 +223,7 @@ resource "grafana_asserts_notification_alerts_config" "test" {
// to verify the retry logic handles eventual consistency properly
func TestAccAssertsAlertConfig_eventualConsistencyStress(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t)

// Skip this flaky test unless explicitly enabled
if !testutils.AccTestsEnabled("TF_ACC_STRESS_TESTS") {
t.Skip("TF_ACC_STRESS_TESTS must be set to a truthy value for stress tests")
}
testutils.CheckStressTestsEnabled(t)

stackID := getTestStackID(t)
baseName := fmt.Sprintf("stress-test-%s", acctest.RandString(8))
Expand Down
Loading
Loading