Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7c04845
Update package dependencies
peterwald Mar 14, 2025
5829dc0
Minor tweaks; trigger a build.
peterwald Mar 14, 2025
b53e6e1
Merge branch 'main' into trend
peterwald Mar 20, 2025
2075608
Update outdated packages
peterwald Mar 20, 2025
1465c13
Handle multiple executions in the report summary.
peterwald Mar 20, 2025
51379e5
Add ability to view multiple versions of the trend data
peterwald Mar 21, 2025
3aead83
Support adding tags to scenarios
shyamnamboodiripad Mar 25, 2025
0a1f3ec
WIP
peterwald Mar 25, 2025
314e9fe
Merge branch 'tags' into trend
peterwald Mar 25, 2025
7f0d783
Updates to fix merge conflicts
peterwald Mar 25, 2025
04999b1
Add score item-level trend data.
peterwald Mar 25, 2025
b1060e0
Merge remote-tracking branch 'origin/main' into trend
peterwald Mar 26, 2025
7acf7ca
Rework tree to use node keys and add reporting context.
peterwald Mar 26, 2025
dbaae57
Move rendermarkdown to ReportContext
peterwald Mar 26, 2025
3fd7c88
Move more central data into the ReportContext
peterwald Mar 26, 2025
999ed40
Build history report; Refactor major components into files.
peterwald Mar 27, 2025
16bb826
Report styling updates.
peterwald Mar 27, 2025
2562c2a
Fix npm linter errors
peterwald Mar 27, 2025
bcc0ae3
Update package-lock.json
peterwald Mar 27, 2025
4466eb7
Merge branch 'main' into trend
peterwald Mar 27, 2025
3401549
Fixup package-lock.json
peterwald Mar 27, 2025
7eea9c5
Rollback change to make chat response and messages optional.
peterwald Mar 27, 2025
321fcdf
Messages cannot be null
peterwald Mar 27, 2025
d894717
Don't show the selectors if there is no history.
peterwald Mar 27, 2025
b8b46d1
Fix for duplicate data embedding.
peterwald Mar 28, 2025
70a3ef0
Move tag selection into the ReportContext
peterwald Mar 28, 2025
de573aa
Rename firstExecutionName
peterwald Mar 28, 2025
0641912
Change default trend capture from 1 to 10
peterwald Mar 28, 2025
9149046
Make conversations 100% wide up to 72rem.
peterwald Mar 28, 2025
7efd42f
Update maxWidth on container
peterwald Mar 28, 2025
3cfd3f3
Set a maxwidth on section.
peterwald Mar 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,25 @@ internal async Task<int> InvokeAsync(

List<ScenarioRunResult> results = [];

string? latestExecutionName = null;

await foreach (string executionName in
resultStore.GetLatestExecutionNamesAsync(lastN, cancellationToken).ConfigureAwait(false))
{
latestExecutionName ??= executionName;

await foreach (ScenarioRunResult result in
resultStore.ReadResultsAsync(
executionName,
cancellationToken: cancellationToken).ConfigureAwait(false))
{
if (result.ExecutionName != latestExecutionName)
{
// Clear the chat data for following executions
result.Messages = [];
result.ModelResponse = new ChatResponse();
}

results.Add(result);

logger.LogInformation("Execution: {executionName} Scenario: {scenarioName} Iteration: {iterationName}", result.ExecutionName, result.ScenarioName, result.IterationName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ private static async Task<int> Main(string[] args)
};
reportCmd.AddOption(outputOpt);

var lastNOpt = new Option<int>(["-n"], () => 1, "Number of most recent executions to include in the report.");
var lastNOpt = new Option<int>(["-n"], () => 10, "Number of most recent executions to include in the report.");
reportCmd.AddOption(lastNOpt);

var formatOpt =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import * as SDK from "azure-devops-extension-sdk";
import { getClient } from "azure-devops-extension-api";
import { Build, Attachment, BuildRestClient } from "azure-devops-extension-api/Build";
import { FluentProvider, webLightTheme } from '@fluentui/react-components';
import { createScoreTree } from '../../components/Summary.ts';
import { createScoreSummary as createScoreSummary } from '../../components/Summary.ts';
import { ReportContextProvider } from '../../components/ReportContext.tsx';

const ErrorHtml = ({ message }: { message: string }) =>
<html>
Expand Down Expand Up @@ -67,12 +68,14 @@ const run = async () => {
throw new Error('No data was available to load.');
}

const scoreTree = createScoreTree(dataset);
const scoreSummary = createScoreSummary(dataset);

createRoot(document.getElementById('root')!).render(
<FluentProvider theme={webLightTheme}>
<StrictMode>
<App tree={scoreTree} dataset={dataset} />
<ReportContextProvider dataset={dataset} scoreSummary={scoreSummary}>
<App />
</ReportContextProvider>
</StrictMode>
</FluentProvider>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,11 @@ import { Settings28Regular, FilterDismissRegular, Dismiss20Regular } from '@flue
import { Drawer, DrawerBody, DrawerHeader, DrawerHeaderTitle, Switch, Tooltip } from '@fluentui/react-components';
import { makeStyles } from '@fluentui/react-components';
import './App.css';
import { ScoreNode } from './Summary';
import { ScenarioGroup } from './ScenarioTree';
import { GlobalTagsDisplay, FilterableTagsDisplay, categorizeAndSortTags } from './TagsDisplay';
import { tokens } from '@fluentui/react-components';

type AppProperties = {
dataset: Dataset,
tree: ScoreNode,
};
import { ScoreNodeHistory } from './ScoreNodeHistory';
import { useReportContext } from './ReportContext';

const useStyles = makeStyles({
header: {
Expand All @@ -26,6 +22,8 @@ const useStyles = makeStyles({
zIndex: 1,
paddingBottom: '12px',
backgroundColor: tokens.colorNeutralBackground1,
borderBottom: `1px solid ${tokens.colorNeutralStroke2}`,
marginBottom: '1rem',
},
headerTop: {
display: 'flex',
Expand Down Expand Up @@ -82,28 +80,16 @@ const useStyles = makeStyles({
drawerBody: { paddingTop: '1rem' },
});

function App({ dataset, tree }: AppProperties) {
function App() {
const classes = useStyles();
const { dataset, scoreSummary, selectedTags, clearFilters } = useReportContext();
const [isSettingsOpen, setIsSettingsOpen] = useState(false);
const [renderMarkdown, setRenderMarkdown] = useState(true);
const [selectedTags, setSelectedTags] = useState<string[]>([]);

const { renderMarkdown, setRenderMarkdown } = useReportContext();
const { globalTags, filterableTags } = categorizeAndSortTags(dataset);

const toggleSettings = () => setIsSettingsOpen(!isSettingsOpen);
const toggleRenderMarkdown = () => setRenderMarkdown(!renderMarkdown);
const closeSettings = () => setIsSettingsOpen(false);

const handleTagClick = (tag: string) => {
setSelectedTags((prevTags) =>
prevTags.includes(tag) ? prevTags.filter((t) => t !== tag) : [...prevTags, tag]
);
};

const clearFilters = () => {
setSelectedTags([]);
};

return (
<>
<div className={classes.header}>
Expand All @@ -125,17 +111,17 @@ function App({ dataset, tree }: AppProperties) {
</div>
</div>
<GlobalTagsDisplay globalTags={globalTags} />

<FilterableTagsDisplay
filterableTags={filterableTags}
onTagClick={handleTagClick}
selectedTags={selectedTags}
/>

<ScoreNodeHistory />
</div>

<ScenarioGroup
node={tree}
renderMarkdown={renderMarkdown}
selectedTags={selectedTags}
node={scoreSummary.primaryResult}
scoreSummary={scoreSummary}
/>

<p className={classes.footerText}>
Expand All @@ -150,7 +136,7 @@ function App({ dataset, tree }: AppProperties) {
<DrawerBody className={classes.drawerBody}>
<Switch
checked={renderMarkdown}
onChange={toggleRenderMarkdown}
onChange={(_ev, data) => setRenderMarkdown(data.checked)}
label={<span className={classes.switchLabel}>Render markdown for conversations</span>}
/>
</DrawerBody>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { Table, TableHeader, TableRow, TableHeaderCell, TableBody, TableCell } from "@fluentui/react-components";
import { ChevronDown12Regular, ChevronRight12Regular, Warning16Regular, CheckmarkCircle16Regular, Copy16Regular } from "@fluentui/react-icons";
import { useState } from "react";
import { useStyles } from "./Styles";


export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails; }) => {
const classes = useStyles();
const [isExpanded, setIsExpanded] = useState(false);

const totalTurns = chatDetails.turnDetails.length;
const cachedTurns = chatDetails.turnDetails.filter(turn => turn.cacheHit === true).length;

const hasCacheKey = chatDetails.turnDetails.some(turn => turn.cacheKey !== undefined);
const hasCacheStatus = chatDetails.turnDetails.some(turn => turn.cacheHit !== undefined);
const hasModelInfo = chatDetails.turnDetails.some(turn => turn.model !== undefined);
const hasInputTokens = chatDetails.turnDetails.some(turn => turn.usage?.inputTokenCount !== undefined);
const hasOutputTokens = chatDetails.turnDetails.some(turn => turn.usage?.outputTokenCount !== undefined);
const hasTotalTokens = chatDetails.turnDetails.some(turn => turn.usage?.totalTokenCount !== undefined);

const copyToClipboard = (text: string) => {
navigator.clipboard.writeText(text);
};
return (
<div className={classes.section}>
<div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
{isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
<h3 className={classes.sectionHeaderText}>LLM Chat Diagnostic Details</h3>
{hasCacheStatus && (
<div className={classes.hint}>
{cachedTurns != totalTurns ?
<Warning16Regular className={classes.cacheMissIcon} /> :
<CheckmarkCircle16Regular className={classes.cacheHitIcon} />}
{cachedTurns}/{totalTurns} chat responses for this evaluation were fulfiled from cache
</div>
)}
</div>

{isExpanded && (
<div className={classes.sectionContainer}>
<div className={classes.tableContainer}>
<Table>
<TableHeader>
<TableRow>
{hasCacheKey && <TableHeaderCell>Cache Key</TableHeaderCell>}
{hasCacheStatus && <TableHeaderCell>Cache Status</TableHeaderCell>}
<TableHeaderCell>Latency (s)</TableHeaderCell>
{hasModelInfo && <TableHeaderCell>Model Used</TableHeaderCell>}
{hasInputTokens && <TableHeaderCell>Input Tokens</TableHeaderCell>}
{hasOutputTokens && <TableHeaderCell>Output Tokens</TableHeaderCell>}
{hasTotalTokens && <TableHeaderCell>Total Tokens</TableHeaderCell>}
</TableRow>
</TableHeader>
<TableBody>
{chatDetails.turnDetails.map((turn, index) => (
<TableRow key={index}>
{hasCacheKey && (
<TableCell className={classes.cacheKeyCell}>
{turn.cacheKey ? (
<div className={classes.cacheKeyContainer} title={turn.cacheKey}>
<span className={classes.cacheKey}>
{turn.cacheKey.substring(0, 8)}...
</span>
<button
className={classes.copyButton}
onClick={(e) => {
e.stopPropagation();
copyToClipboard(turn.cacheKey || "");
}}
title="Copy Cache Key"
>
<Copy16Regular />
</button>
</div>
) : (
<span className={classes.noCacheKey}>N/A</span>
)}
</TableCell>
)}
{hasCacheStatus && (
<TableCell>
{turn.cacheHit === true ?
<span className={classes.cacheHit}>
<CheckmarkCircle16Regular className={classes.cacheHitIcon} /> Hit
</span> :
<span className={classes.cacheMiss}>
<Warning16Regular className={classes.cacheMissIcon} /> Miss
</span>}
</TableCell>
)}
<TableCell>{turn.latency.toFixed(2)}</TableCell>
{hasModelInfo && <TableCell>{turn.model || '-'}</TableCell>}
{hasInputTokens && <TableCell>{turn.usage?.inputTokenCount || '-'}</TableCell>}
{hasOutputTokens && <TableCell>{turn.usage?.outputTokenCount || '-'}</TableCell>}
{hasTotalTokens && <TableCell>{turn.usage?.totalTokenCount || '-'}</TableCell>}
</TableRow>
))}
</TableBody>
</Table>
</div>
</div>
)}
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { mergeClasses } from "@fluentui/react-components";
import { ChevronDown12Regular, ChevronRight12Regular } from "@fluentui/react-icons";
import { useState } from "react";
import ReactMarkdown from "react-markdown";
import { useReportContext } from "./ReportContext";
import { useStyles } from "./Styles";
import { ChatMessageDisplay } from "./Summary";


export const ConversationDetails = ({ messages, model, usage }: {
messages: ChatMessageDisplay[];
model?: string;
usage?: UsageDetails;
}) => {
const classes = useStyles();
const [isExpanded, setIsExpanded] = useState(true);
const { renderMarkdown } = useReportContext();

const isUserSide = (role: string) => role.toLowerCase() === 'user' || role.toLowerCase() === 'system';

const infoText = [
model && `Model: ${model}`,
usage?.inputTokenCount && `Input Tokens: ${usage.inputTokenCount}`,
usage?.outputTokenCount && `Output Tokens: ${usage.outputTokenCount}`,
usage?.totalTokenCount && `Total Tokens: ${usage.totalTokenCount}`,
].filter(Boolean).join(' • ');

return (
<div className={classes.section}>
<div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
{isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
<h3 className={classes.sectionHeaderText}>Conversation</h3>
{infoText && <div className={classes.hint}>{infoText}</div>}
</div>

{isExpanded && (
<div className={classes.sectionContainer}>
{messages.map((message, index) => {
const isFromUserSide = isUserSide(message.role);
const messageRowClass = mergeClasses(
classes.messageRow,
isFromUserSide ? classes.userMessageRow : classes.assistantMessageRow
);

return (
<div key={index} className={messageRowClass}>
<div className={classes.messageParticipantName}>{message.participantName}</div>
<div className={classes.messageBubble}>
{renderMarkdown ?
<ReactMarkdown>{message.content}</ReactMarkdown> :
<pre className={classes.preWrap}>{message.content}</pre>}
</div>
</div>
);
})}
</div>
)}
</div>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { DismissCircle16Regular, Warning16Regular, Info16Regular } from "@fluentui/react-icons";
import { useStyles } from "./Styles";


export const DiagnosticsContent = ({ diagnostics }: { diagnostics: EvaluationDiagnostic[]; }) => {
const classes = useStyles();

const errorDiagnostics = diagnostics.filter(d => d.severity === "error");
const warningDiagnostics = diagnostics.filter(d => d.severity === "warning");
const infoDiagnostics = diagnostics.filter(d => d.severity === "informational");

return (
<>
{errorDiagnostics.map((diag, index) => (
<div key={`error-${index}`} className={classes.failMessage}>
<DismissCircle16Regular /> {diag.message}
</div>
))}
{warningDiagnostics.map((diag, index) => (
<div key={`warning-${index}`} className={classes.warningMessage}>
<Warning16Regular /> {diag.message}
</div>
))}
{infoDiagnostics.map((diag, index) => (
<div key={`info-${index}`} className={classes.infoMessage}>
<Info16Regular /> {diag.message}
</div>
))}
</>
);
};
Loading
Loading