@@ -114,22 +114,24 @@ def mock_evaluate_instances_side_effect(*args, **kwargs):
114
114
mock_upload_to_gcs .return_value = (
115
115
"gs://mock-bucket/mock_path/evaluation_result_timestamp.json"
116
116
)
117
- mock_prebuilt_safety_metric = vertexai_genai_types .LLMMetric (
118
- name = "safety " , prompt_template = "Is this safe ? {response}"
117
+ mock_prebuilt_fluency_metric = vertexai_genai_types .LLMMetric (
118
+ name = "fluency " , prompt_template = "Is this fluent ? {response}"
119
119
)
120
- mock_prebuilt_safety_metric ._is_predefined = True
121
- mock_prebuilt_safety_metric ._config_source = "gs://mock-metrics/safety/v1.yaml"
122
- mock_prebuilt_safety_metric ._version = "v1"
120
+ mock_prebuilt_fluency_metric ._is_predefined = True
121
+ mock_prebuilt_fluency_metric ._config_source = (
122
+ "gs://mock-metrics/fluency/v1.yaml"
123
+ )
124
+ mock_prebuilt_fluency_metric ._version = "v1"
123
125
124
- mock_fetch_prebuilt_metric .return_value = mock_prebuilt_safety_metric
126
+ mock_fetch_prebuilt_metric .return_value = mock_prebuilt_fluency_metric
125
127
126
128
yield {
127
129
"mock_storage_client" : mock_storage_client ,
128
130
"mock_bq_client" : mock_bq_client ,
129
131
"mock_evaluate_instances" : mock_evaluate_instances ,
130
132
"mock_upload_to_gcs" : mock_upload_to_gcs ,
131
133
"mock_fetch_prebuilt_metric" : mock_fetch_prebuilt_metric ,
132
- "mock_prebuilt_safety_metric " : mock_prebuilt_safety_metric ,
134
+ "mock_prebuilt_fluency_metric " : mock_prebuilt_fluency_metric ,
133
135
}
134
136
135
137
@@ -3156,7 +3158,7 @@ def test_execute_evaluation_with_openai_schema(
3156
3158
mock_loader_instance .load .return_value = mock_openai_raw_data
3157
3159
3158
3160
with mock .patch .object (
3159
- _evals_metric_handlers .LLMMetricHandler , "process "
3161
+ _evals_metric_handlers .LLMMetricHandler , "get_metric_result "
3160
3162
) as mock_llm_process :
3161
3163
mock_llm_process .return_value = (
3162
3164
vertexai_genai_types .EvalCaseMetricResult (
@@ -3233,7 +3235,7 @@ def test_llm_metric_default_aggregation_mixed_results(
3233
3235
)
3234
3236
3235
3237
with mock .patch (
3236
- "vertexai._genai._evals_metric_handlers.LLMMetricHandler.process "
3238
+ "vertexai._genai._evals_metric_handlers.LLMMetricHandler.get_metric_result "
3237
3239
) as mock_llm_process :
3238
3240
mock_llm_process .side_effect = [
3239
3241
vertexai_genai_types .EvalCaseMetricResult (
@@ -3288,7 +3290,7 @@ def custom_agg_fn(results: list[vertexai_genai_types.EvalCaseMetricResult]):
3288
3290
)
3289
3291
3290
3292
with mock .patch (
3291
- "vertexai._genai._evals_metric_handlers.LLMMetricHandler.process "
3293
+ "vertexai._genai._evals_metric_handlers.LLMMetricHandler.get_metric_result "
3292
3294
) as mock_llm_process :
3293
3295
mock_llm_process .side_effect = [
3294
3296
vertexai_genai_types .EvalCaseMetricResult (
@@ -3335,7 +3337,7 @@ def custom_agg_fn_error(
3335
3337
aggregate_summary_fn = custom_agg_fn_error ,
3336
3338
)
3337
3339
with mock .patch (
3338
- "vertexai._genai._evals_metric_handlers.LLMMetricHandler.process "
3340
+ "vertexai._genai._evals_metric_handlers.LLMMetricHandler.get_metric_result "
3339
3341
) as mock_llm_process :
3340
3342
mock_llm_process .side_effect = [
3341
3343
vertexai_genai_types .EvalCaseMetricResult (
@@ -3379,7 +3381,7 @@ def custom_agg_fn_invalid_type(
3379
3381
aggregate_summary_fn = custom_agg_fn_invalid_type ,
3380
3382
)
3381
3383
with mock .patch (
3382
- "vertexai._genai._evals_metric_handlers.LLMMetricHandler.process "
3384
+ "vertexai._genai._evals_metric_handlers.LLMMetricHandler.get_metric_result "
3383
3385
) as mock_llm_process :
3384
3386
mock_llm_process .return_value = vertexai_genai_types .EvalCaseMetricResult (
3385
3387
metric_name = "invalid_type_fallback" , score = 0.8
@@ -3405,7 +3407,7 @@ def test_execute_evaluation_lazy_loaded_prebuilt_metric_instance(
3405
3407
)
3406
3408
3407
3409
lazy_metric_instance = _evals_utils .LazyLoadedPrebuiltMetric (
3408
- name = "safety " , version = "v1"
3410
+ name = "fluency " , version = "v1"
3409
3411
)
3410
3412
3411
3413
result = _evals_common ._execute_evaluation (
@@ -3421,7 +3423,7 @@ def test_execute_evaluation_lazy_loaded_prebuilt_metric_instance(
3421
3423
assert result .evaluation_dataset == [input_dataset ]
3422
3424
assert len (result .summary_metrics ) == 1
3423
3425
summary_metric = result .summary_metrics [0 ]
3424
- assert summary_metric .metric_name == "safety "
3426
+ assert summary_metric .metric_name == "fluency "
3425
3427
assert summary_metric .mean_score == 0.9
3426
3428
3427
3429
def test_execute_evaluation_prebuilt_metric_via_loader (
@@ -3434,7 +3436,7 @@ def test_execute_evaluation_prebuilt_metric_via_loader(
3434
3436
eval_dataset_df = dataset_df
3435
3437
)
3436
3438
3437
- prebuilt_metric = vertexai_genai_types .PrebuiltMetric .SAFETY
3439
+ prebuilt_metric = vertexai_genai_types .PrebuiltMetric .FLUENCY
3438
3440
3439
3441
result = _evals_common ._execute_evaluation (
3440
3442
api_client = mock_api_client_fixture ,
@@ -3449,7 +3451,7 @@ def test_execute_evaluation_prebuilt_metric_via_loader(
3449
3451
assert result .evaluation_dataset == [input_dataset ]
3450
3452
assert len (result .summary_metrics ) == 1
3451
3453
summary_metric = result .summary_metrics [0 ]
3452
- assert summary_metric .metric_name == "safety "
3454
+ assert summary_metric .metric_name == "fluency "
3453
3455
assert summary_metric .mean_score == 0.9
3454
3456
3455
3457
def test_execute_evaluation_with_gcs_destination (
0 commit comments