Skip to content

Commit c55f906

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI SDK client(evals) - add RubricMetric as an alias to PrebuiltMetric in evals module, merge customizability of general quality and remove separate metric names
PiperOrigin-RevId: 799670236
1 parent edaedb1 commit c55f906

File tree

9 files changed

+19
-27
lines changed

9 files changed

+19
-27
lines changed

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ Then run evaluation by providing the inference results and specifying the metric
9595
metrics=[
9696
types.Metric(name='exact_match'),
9797
types.Metric(name='rouge_l_sum'),
98-
types.PrebuiltMetric.TEXT_QUALITY,
98+
types.RubricMetric.TEXT_QUALITY,
9999
]
100100
)
101101

tests/unit/vertexai/genai/replays/test_batch_evaluate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_batch_eval(client):
3030
batch_eval_operation = client.evals.batch_evaluate(
3131
dataset=eval_dataset,
3232
metrics=[
33-
types.PrebuiltMetric.TEXT_QUALITY,
33+
types.RubricMetric.TEXT_QUALITY,
3434
],
3535
dest="gs://genai-eval-sdk-replay-test/test_data/batch_eval_output",
3636
)
@@ -60,7 +60,7 @@ async def test_batch_eval_async(client):
6060
response = await client.aio.evals.batch_evaluate(
6161
dataset=eval_dataset,
6262
metrics=[
63-
types.PrebuiltMetric.TEXT_QUALITY,
63+
types.RubricMetric.TEXT_QUALITY,
6464
],
6565
dest="gs://genai-eval-sdk-replay-test/test_data/batch_eval_output",
6666
)

tests/unit/vertexai/genai/replays/test_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_evaluation_result(client):
2929
)
3030

3131
metrics_to_run = [
32-
types.PrebuiltMetric.TEXT_QUALITY,
32+
types.RubricMetric.TEXT_QUALITY,
3333
]
3434

3535
evaluation_result = client.evals.evaluate(

tests/unit/vertexai/genai/test_evals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3436,7 +3436,7 @@ def test_execute_evaluation_prebuilt_metric_via_loader(
34363436
eval_dataset_df=dataset_df
34373437
)
34383438

3439-
prebuilt_metric = vertexai_genai_types.PrebuiltMetric.FLUENCY
3439+
prebuilt_metric = vertexai_genai_types.RubricMetric.FLUENCY
34403440

34413441
result = _evals_common._execute_evaluation(
34423442
api_client=mock_api_client_fixture,

vertexai/_genai/_evals_common.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ def _resolve_dataset_inputs(
746746
def _resolve_metrics(
747747
metrics: list[types.Metric], api_client: Any
748748
) -> list[types.Metric]:
749-
"""Resolves a list of metric instances, loading prebuilt metrics if necessary."""
749+
"""Resolves a list of metric instances, loading RubricMetric if necessary."""
750750
resolved_metrics_list = []
751751
for metric_instance in metrics:
752752
if isinstance(metric_instance, _evals_utils.LazyLoadedPrebuiltMetric):
@@ -756,7 +756,7 @@ def _resolve_metrics(
756756
)
757757
except Exception as e:
758758
logger.error(
759-
"Failed to resolve prebuilt metric %s@%s: %s",
759+
"Failed to resolve RubricMetric %s@%s: %s",
760760
metric_instance.name,
761761
metric_instance.version,
762762
e,
@@ -768,7 +768,7 @@ def _resolve_metrics(
768768
try:
769769
metric_name_str = str(metric_instance)
770770
lazy_metric_instance = getattr(
771-
_evals_utils.PrebuiltMetric, metric_name_str.upper()
771+
_evals_utils.RubricMetric, metric_name_str.upper()
772772
)
773773
if isinstance(
774774
lazy_metric_instance, _evals_utils.LazyLoadedPrebuiltMetric
@@ -778,12 +778,11 @@ def _resolve_metrics(
778778
)
779779
else:
780780
raise TypeError(
781-
f"PrebuiltMetric.{metric_name_str.upper()} did not return a"
782-
" LazyLoadedPrebuiltMetric proxy."
781+
f"RubricMetric.{metric_name_str.upper()} cannot be resolved."
783782
)
784783
except AttributeError as exc:
785784
raise TypeError(
786-
"Unsupported metric type or invalid prebuilt metric name:"
785+
"Unsupported metric type or invalid RubricMetric name:"
787786
f" {metric_instance}"
788787
) from exc
789788
return resolved_metrics_list

vertexai/_genai/_evals_constant.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,5 @@
2525
"multi_turn_text_quality_v1",
2626
"final_response_match_v2",
2727
"final_response_reference_free_v1",
28-
"partially_customizable_general_quality_v1",
29-
"fully_customizable_general_quality_v1",
3028
}
3129
)

vertexai/_genai/_evals_utils.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,8 @@ class PrebuiltMetricLoader:
521521
when they are first accessed.
522522
523523
Example:
524-
metric = PrebuiltMetric.TEXT_QUALITY
525-
metric = PrebuiltMetric.TEXT_QUALITY(version="v1")
524+
from vertexai import types
525+
text_quality_metric = types.RubricMetric.TEXT_QUALITY
526526
"""
527527

528528
def __getattr__(
@@ -554,14 +554,6 @@ def MULTI_TURN_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
554554
def MULTI_TURN_TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
555555
return self.__getattr__("MULTI_TURN_TEXT_QUALITY")
556556

557-
@property
558-
def PARTIALLY_CUSTOMIZABLE_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
559-
return self.__getattr__("PARTIALLY_CUSTOMIZABLE_GENERAL_QUALITY")
560-
561-
@property
562-
def FULLY_CUSTOMIZABLE_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
563-
return self.__getattr__("FULLY_CUSTOMIZABLE_GENERAL_QUALITY")
564-
565557
@property
566558
def FINAL_RESPONSE_MATCH(self) -> LazyLoadedPrebuiltMetric:
567559
return self.__getattr__("FINAL_RESPONSE_MATCH", version="v2")
@@ -600,6 +592,7 @@ def MULTI_TURN_SAFETY(self) -> LazyLoadedPrebuiltMetric:
600592

601593

602594
PrebuiltMetric = PrebuiltMetricLoader()
595+
RubricMetric = PrebuiltMetric
603596

604597

605598
class BatchEvaluateRequestPreparer:

vertexai/_genai/evals.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,7 +1115,7 @@ def evaluate(
11151115
dataset: Union[
11161116
types.EvaluationDatasetOrDict, list[types.EvaluationDatasetOrDict]
11171117
],
1118-
metrics: list[types.MetricOrDict],
1118+
metrics: list[types.MetricOrDict] = None,
11191119
config: Optional[types.EvaluateMethodConfigOrDict] = None,
11201120
) -> types.EvaluationResult:
11211121
"""Evaluates candidate responses in the provided dataset(s) using the specified metrics.
@@ -1147,6 +1147,8 @@ def evaluate(
11471147
else:
11481148
if isinstance(dataset, dict):
11491149
dataset = types.EvaluationDataset.model_validate(dataset)
1150+
if metrics is None:
1151+
metrics = [types.Metric(name="general_quality_v1")]
11501152

11511153
return _evals_common._execute_evaluation(
11521154
api_client=self._api_client,
@@ -1279,7 +1281,7 @@ def generate_rubrics(
12791281
metric_spec_parameters: Optional. Parameters for the Predefined
12801282
Metric, used to customize rubric generation. Only used if
12811283
`predefined_spec_name` is set.
1282-
Example: {"requirements": ["The response must be in Japanese."]}
1284+
Example: {"guidelines": ["The response must be in Japanese."]}
12831285
config: Optional. Configuration for the rubric generation process.
12841286
12851287
Returns:

vertexai/_genai/types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@
5050

5151
logger = logging.getLogger("vertexai_genai.types")
5252

53-
__all__ = ["PrebuiltMetric"] # noqa: F822
53+
__all__ = ["PrebuiltMetric", "RubricMetric"] # noqa: F822
5454

5555

5656
def __getattr__(name: str) -> typing.Any:
57-
if name == "PrebuiltMetric":
57+
if name == "PrebuiltMetric" or name == "RubricMetric":
5858
module = importlib.import_module("._evals_utils", __package__)
5959
prebuilt_metric_obj = getattr(module, name)
6060
globals()[name] = prebuilt_metric_obj

0 commit comments

Comments
 (0)