Skip to content

Commit e9644f1

Browse files
authored
replace RelevanceTruthAndCompleteneessEvaluator (#46075)
1 parent f7d7c9a commit e9644f1

File tree

3 files changed

+37
-41
lines changed

3 files changed

+37
-41
lines changed

docs/ai/tutorials/evaluate-with-reporting.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
---
22
title: Tutorial - Evaluate a model's response
33
description: Create an MSTest app and add a custom evaluator to evaluate the AI chat response of a language model, and learn how to use the caching and reporting features of Microsoft.Extensions.AI.Evaluation.
4-
ms.date: 03/14/2025
4+
ms.date: 05/09/2025
55
ms.topic: tutorial
66
ms.custom: devx-track-dotnet-ai
77
---
88

99
# Tutorial: Evaluate a model's response with response caching and reporting
1010

11-
In this tutorial, you create an MSTest app to evaluate the chat response of an OpenAI model. The test app uses the [Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) libraries to perform the evaluations, cache the model responses, and create reports. The tutorial uses both a [built-in evaluator](xref:Microsoft.Extensions.AI.Evaluation.Quality.RelevanceTruthAndCompletenessEvaluator) and a custom evaluator.
11+
In this tutorial, you create an MSTest app to evaluate the chat response of an OpenAI model. The test app uses the [Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) libraries to perform the evaluations, cache the model responses, and create reports. The tutorial uses both built-in and custom evaluators.
1212

1313
## Prerequisites
1414

@@ -25,32 +25,32 @@ Complete the following steps to create an MSTest project that connects to the `g
2525

2626
1. In a terminal window, navigate to the directory where you want to create your app, and create a new MSTest app with the `dotnet new` command:
2727

28-
```dotnetcli
29-
dotnet new mstest -o TestAIWithReporting
30-
```
28+
```dotnetcli
29+
dotnet new mstest -o TestAIWithReporting
30+
```
3131

3232
1. Navigate to the `TestAIWithReporting` directory, and add the necessary packages to your app:
3333

34-
```dotnetcli
35-
dotnet add package Azure.AI.OpenAI
36-
dotnet add package Azure.Identity
37-
dotnet add package Microsoft.Extensions.AI.Abstractions --prerelease
38-
dotnet add package Microsoft.Extensions.AI.Evaluation --prerelease
39-
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality --prerelease
40-
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting --prerelease
41-
dotnet add package Microsoft.Extensions.AI.OpenAI --prerelease
42-
dotnet add package Microsoft.Extensions.Configuration
43-
dotnet add package Microsoft.Extensions.Configuration.UserSecrets
44-
```
34+
```dotnetcli
35+
dotnet add package Azure.AI.OpenAI
36+
dotnet add package Azure.Identity
37+
dotnet add package Microsoft.Extensions.AI.Abstractions --prerelease
38+
dotnet add package Microsoft.Extensions.AI.Evaluation --prerelease
39+
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality --prerelease
40+
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting --prerelease
41+
dotnet add package Microsoft.Extensions.AI.OpenAI --prerelease
42+
dotnet add package Microsoft.Extensions.Configuration
43+
dotnet add package Microsoft.Extensions.Configuration.UserSecrets
44+
```
4545

4646
1. Run the following commands to add [app secrets](/aspnet/core/security/app-secrets) for your Azure OpenAI endpoint, model name, and tenant ID:
4747

48-
```bash
49-
dotnet user-secrets init
50-
dotnet user-secrets set AZURE_OPENAI_ENDPOINT <your-azure-openai-endpoint>
51-
dotnet user-secrets set AZURE_OPENAI_GPT_NAME gpt-4o
52-
dotnet user-secrets set AZURE_TENANT_ID <your-tenant-id>
53-
```
48+
```bash
49+
dotnet user-secrets init
50+
dotnet user-secrets set AZURE_OPENAI_ENDPOINT <your-azure-openai-endpoint>
51+
dotnet user-secrets set AZURE_OPENAI_GPT_NAME gpt-4o
52+
dotnet user-secrets set AZURE_TENANT_ID <your-tenant-id>
53+
```
5454

5555
(Depending on your environment, the tenant ID might not be needed. In that case, remove it from the code that instantiates the <xref:Azure.Identity.DefaultAzureCredential>.)
5656

docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,11 @@ private static ChatConfiguration GetAzureOpenAIChatConfiguration()
5959
// <SnippetGetEvaluators>
6060
private static IEnumerable<IEvaluator> GetEvaluators()
6161
{
62-
IEvaluator rtcEvaluator = new RelevanceTruthAndCompletenessEvaluator();
62+
IEvaluator relevanceEvaluator = new RelevanceEvaluator();
63+
IEvaluator coherenceEvaluator = new CoherenceEvaluator();
6364
IEvaluator wordCountEvaluator = new WordCountEvaluator();
6465

65-
return [rtcEvaluator, wordCountEvaluator];
66+
return [relevanceEvaluator, coherenceEvaluator, wordCountEvaluator];
6667
}
6768
// </SnippetGetEvaluators>
6869

@@ -104,20 +105,15 @@ private static void Validate(EvaluationResult result)
104105
{
105106
// Retrieve the score for relevance from the <see cref="EvaluationResult"/>.
106107
NumericMetric relevance =
107-
result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.RelevanceMetricName);
108+
result.Get<NumericMetric>(RelevanceEvaluator.RelevanceMetricName);
108109
Assert.IsFalse(relevance.Interpretation!.Failed, relevance.Reason);
109110
Assert.IsTrue(relevance.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional);
110111

111-
// Retrieve the score for truth from the <see cref="EvaluationResult"/>.
112-
NumericMetric truth = result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.TruthMetricName);
113-
Assert.IsFalse(truth.Interpretation!.Failed, truth.Reason);
114-
Assert.IsTrue(truth.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional);
115-
116-
// Retrieve the score for completeness from the <see cref="EvaluationResult"/>.
117-
NumericMetric completeness =
118-
result.Get<NumericMetric>(RelevanceTruthAndCompletenessEvaluator.CompletenessMetricName);
119-
Assert.IsFalse(completeness.Interpretation!.Failed, completeness.Reason);
120-
Assert.IsTrue(completeness.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional);
112+
// Retrieve the score for coherence from the <see cref="EvaluationResult"/>.
113+
NumericMetric coherence =
114+
result.Get<NumericMetric>(CoherenceEvaluator.CoherenceMetricName);
115+
Assert.IsFalse(coherence.Interpretation!.Failed, coherence.Reason);
116+
Assert.IsTrue(coherence.Interpretation.Rating is EvaluationRating.Good or EvaluationRating.Exceptional);
121117

122118
// Retrieve the word count from the <see cref="EvaluationResult"/>.
123119
NumericMetric wordCount = result.Get<NumericMetric>(WordCountEvaluator.WordCountMetricName);
@@ -135,7 +131,7 @@ public async Task SampleAndEvaluateResponse()
135131
// Create a <see cref="ScenarioRun"/> with the scenario name
136132
// set to the fully qualified name of the current test method.
137133
await using ScenarioRun scenarioRun =
138-
await s_defaultReportingConfiguration.CreateScenarioRunAsync(this.ScenarioName);
134+
await s_defaultReportingConfiguration.CreateScenarioRunAsync(ScenarioName);
139135

140136
// Use the <see cref="IChatClient"/> that's included in the
141137
// <see cref="ScenarioRun.ChatConfiguration"/> to get the LLM response.

docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
<ItemGroup>
1212
<PackageReference Include="Azure.AI.OpenAI" Version="2.1.0" />
1313
<PackageReference Include="Azure.Identity" Version="1.13.2" />
14-
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.4.0-preview.1.25207.5" />
15-
<PackageReference Include="Microsoft.Extensions.AI.Evaluation" Version="9.4.0-preview.1.25207.5" />
16-
<PackageReference Include="Microsoft.Extensions.AI.Evaluation.Quality" Version="9.4.0-preview.1.25207.5" />
17-
<PackageReference Include="Microsoft.Extensions.AI.Evaluation.Reporting" Version="9.4.0-preview.1.25207.5" />
18-
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" Version="9.4.0-preview.1.25207.5" />
14+
<PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.4.3-preview.1.25230.7" />
15+
<PackageReference Include="Microsoft.Extensions.AI.Evaluation" Version="9.4.3-preview.1.25230.7" />
16+
<PackageReference Include="Microsoft.Extensions.AI.Evaluation.Quality" Version="9.4.3-preview.1.25230.7" />
17+
<PackageReference Include="Microsoft.Extensions.AI.Evaluation.Reporting" Version="9.4.3-preview.1.25230.7" />
18+
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" Version="9.4.3-preview.1.25230.7" />
1919
<PackageReference Include="microsoft.extensions.configuration" Version="9.0.4" />
2020
<PackageReference Include="Microsoft.Extensions.Configuration.UserSecrets" Version="9.0.4" />
2121
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />

0 commit comments

Comments
 (0)