Skip to content
agentEvaluation.badge

agentEvaluation.title

agentEvaluation.subtitle

5agentEvaluation.stats.displayTypes
13agentEvaluation.stats.traceFormats
14agentEvaluation.stats.exampleProjects
3agentEvaluation.stats.liveBackends

agentEvaluation.displayTypes.title

agentEvaluation.displayTypes.subtitle

🔄

agentEvaluation.displayTypes.items.agentTrace.title

agentEvaluation.displayTypes.items.agentTrace.description

🌐

agentEvaluation.displayTypes.items.webAgent.title

agentEvaluation.displayTypes.items.webAgent.description

💬

agentEvaluation.displayTypes.items.interactiveChat.title

agentEvaluation.displayTypes.items.interactiveChat.description

📡

agentEvaluation.displayTypes.items.liveAgent.title

agentEvaluation.displayTypes.items.liveAgent.description

💻

agentEvaluation.displayTypes.items.codingTrace.title

agentEvaluation.displayTypes.items.codingTrace.description

agentEvaluation.schemas.title

agentEvaluation.schemas.subtitle

agentEvaluation.schemas.items.trajectoryEval.badge

agentEvaluation.schemas.items.trajectoryEval.title

agentEvaluation.schemas.items.trajectoryEval.description

agentEvaluation.schemas.items.rubricEval.badge

agentEvaluation.schemas.items.rubricEval.title

agentEvaluation.schemas.items.rubricEval.description

agentEvaluation.schemas.items.pairwise.badge

agentEvaluation.schemas.items.pairwise.title

agentEvaluation.schemas.items.pairwise.description

agentEvaluation.schemas.items.perTurn.badge

agentEvaluation.schemas.items.perTurn.title

agentEvaluation.schemas.items.perTurn.description

agentEvaluation.schemas.items.processReward.badge

agentEvaluation.schemas.items.processReward.title

agentEvaluation.schemas.items.processReward.description

agentEvaluation.schemas.items.codeReview.badge

agentEvaluation.schemas.items.codeReview.title

agentEvaluation.schemas.items.codeReview.description

agentEvaluation.traceFormats.title

agentEvaluation.traceFormats.subtitle

agentEvaluation.traceFormats.headers.converteragentEvaluation.traceFormats.headers.sourceagentEvaluation.traceFormats.headers.features
agentEvaluation.traceFormats.items.langchain.nameagentEvaluation.traceFormats.items.langchain.sourceagentEvaluation.traceFormats.items.langchain.features
agentEvaluation.traceFormats.items.langfuse.nameagentEvaluation.traceFormats.items.langfuse.sourceagentEvaluation.traceFormats.items.langfuse.features
agentEvaluation.traceFormats.items.openai.nameagentEvaluation.traceFormats.items.openai.sourceagentEvaluation.traceFormats.items.openai.features
agentEvaluation.traceFormats.items.anthropic.nameagentEvaluation.traceFormats.items.anthropic.sourceagentEvaluation.traceFormats.items.anthropic.features
agentEvaluation.traceFormats.items.mcp.nameagentEvaluation.traceFormats.items.mcp.sourceagentEvaluation.traceFormats.items.mcp.features
agentEvaluation.traceFormats.items.opentelemetry.nameagentEvaluation.traceFormats.items.opentelemetry.sourceagentEvaluation.traceFormats.items.opentelemetry.features
agentEvaluation.traceFormats.items.atif.nameagentEvaluation.traceFormats.items.atif.sourceagentEvaluation.traceFormats.items.atif.features
agentEvaluation.traceFormats.items.webarena.nameagentEvaluation.traceFormats.items.webarena.sourceagentEvaluation.traceFormats.items.webarena.features
agentEvaluation.traceFormats.items.rawWeb.nameagentEvaluation.traceFormats.items.rawWeb.sourceagentEvaluation.traceFormats.items.rawWeb.features
agentEvaluation.traceFormats.items.claudeCode.nameagentEvaluation.traceFormats.items.claudeCode.sourceagentEvaluation.traceFormats.items.claudeCode.features
agentEvaluation.traceFormats.items.aider.nameagentEvaluation.traceFormats.items.aider.sourceagentEvaluation.traceFormats.items.aider.features
agentEvaluation.traceFormats.items.sweAgent.nameagentEvaluation.traceFormats.items.sweAgent.sourceagentEvaluation.traceFormats.items.sweAgent.features
agentEvaluation.traceFormats.items.react.nameagentEvaluation.traceFormats.items.react.sourceagentEvaluation.traceFormats.items.react.features

agentEvaluation.codingAgents.title

agentEvaluation.codingAgents.subtitle

agentEvaluation.codingAgents.features.diffRendering
agentEvaluation.codingAgents.features.terminalBlocks
agentEvaluation.codingAgents.features.fileTree
agentEvaluation.codingAgents.features.processReward
agentEvaluation.codingAgents.features.codeReview
agentEvaluation.codingAgents.features.traceConverters
# Quick start
pip install potato-annotation
potato start examples/agent-traces/coding-agent-eval/config.yaml -p 8000

agentEvaluation.liveAgent.title

agentEvaluation.liveAgent.subtitle

agentEvaluation.liveAgent.backends.ollama.title

agentEvaluation.liveAgent.backends.ollama.description

agentEvaluation.liveAgent.backends.anthropic.title

agentEvaluation.liveAgent.backends.anthropic.description

agentEvaluation.liveAgent.backends.claudeSdk.title

agentEvaluation.liveAgent.backends.claudeSdk.description

agentEvaluation.liveAgent.controls.pause
agentEvaluation.liveAgent.controls.instruct
agentEvaluation.liveAgent.controls.rollback
agentEvaluation.liveAgent.controls.branch

agentEvaluation.comparison.title

agentEvaluation.comparison.subtitle

agentEvaluation.comparison.headers.featureagentEvaluation.comparison.headers.potatoagentEvaluation.comparison.headers.langsmithagentEvaluation.comparison.headers.langfuseagentEvaluation.comparison.headers.labelStudioagentEvaluation.comparison.headers.argillaagentEvaluation.comparison.headers.scaleAI
agentEvaluation.comparison.rows.traceFormats.featureagentEvaluation.comparison.rows.traceFormats.potatoagentEvaluation.comparison.rows.traceFormats.langsmithagentEvaluation.comparison.rows.traceFormats.langfuseagentEvaluation.comparison.rows.traceFormats.labelStudioagentEvaluation.comparison.rows.traceFormats.argillaagentEvaluation.comparison.rows.traceFormats.scaleAI
agentEvaluation.comparison.rows.perStepAnnotation.featureagentEvaluation.comparison.rows.perStepAnnotation.potatoagentEvaluation.comparison.rows.perStepAnnotation.langsmithagentEvaluation.comparison.rows.perStepAnnotation.langfuseagentEvaluation.comparison.rows.perStepAnnotation.labelStudioagentEvaluation.comparison.rows.perStepAnnotation.argillaagentEvaluation.comparison.rows.perStepAnnotation.scaleAI
agentEvaluation.comparison.rows.liveObservation.featureagentEvaluation.comparison.rows.liveObservation.potatoagentEvaluation.comparison.rows.liveObservation.langsmithagentEvaluation.comparison.rows.liveObservation.langfuseagentEvaluation.comparison.rows.liveObservation.labelStudioagentEvaluation.comparison.rows.liveObservation.argillaagentEvaluation.comparison.rows.liveObservation.scaleAI
agentEvaluation.comparison.rows.pauseResume.featureagentEvaluation.comparison.rows.pauseResume.potatoagentEvaluation.comparison.rows.pauseResume.langsmithagentEvaluation.comparison.rows.pauseResume.langfuseagentEvaluation.comparison.rows.pauseResume.labelStudioagentEvaluation.comparison.rows.pauseResume.argillaagentEvaluation.comparison.rows.pauseResume.scaleAI
agentEvaluation.comparison.rows.codeDiffRendering.featureagentEvaluation.comparison.rows.codeDiffRendering.potatoagentEvaluation.comparison.rows.codeDiffRendering.langsmithagentEvaluation.comparison.rows.codeDiffRendering.langfuseagentEvaluation.comparison.rows.codeDiffRendering.labelStudioagentEvaluation.comparison.rows.codeDiffRendering.argillaagentEvaluation.comparison.rows.codeDiffRendering.scaleAI
agentEvaluation.comparison.rows.terminalRendering.featureagentEvaluation.comparison.rows.terminalRendering.potatoagentEvaluation.comparison.rows.terminalRendering.langsmithagentEvaluation.comparison.rows.terminalRendering.langfuseagentEvaluation.comparison.rows.terminalRendering.labelStudioagentEvaluation.comparison.rows.terminalRendering.argillaagentEvaluation.comparison.rows.terminalRendering.scaleAI
agentEvaluation.comparison.rows.prmCollection.featureagentEvaluation.comparison.rows.prmCollection.potatoagentEvaluation.comparison.rows.prmCollection.langsmithagentEvaluation.comparison.rows.prmCollection.langfuseagentEvaluation.comparison.rows.prmCollection.labelStudioagentEvaluation.comparison.rows.prmCollection.argillaagentEvaluation.comparison.rows.prmCollection.scaleAI
agentEvaluation.comparison.rows.codeReview.featureagentEvaluation.comparison.rows.codeReview.potatoagentEvaluation.comparison.rows.codeReview.langsmithagentEvaluation.comparison.rows.codeReview.langfuseagentEvaluation.comparison.rows.codeReview.labelStudioagentEvaluation.comparison.rows.codeReview.argillaagentEvaluation.comparison.rows.codeReview.scaleAI
agentEvaluation.comparison.rows.pairwiseComparison.featureagentEvaluation.comparison.rows.pairwiseComparison.potatoagentEvaluation.comparison.rows.pairwiseComparison.langsmithagentEvaluation.comparison.rows.pairwiseComparison.langfuseagentEvaluation.comparison.rows.pairwiseComparison.labelStudioagentEvaluation.comparison.rows.pairwiseComparison.argillaagentEvaluation.comparison.rows.pairwiseComparison.scaleAI
agentEvaluation.comparison.rows.rubricEval.featureagentEvaluation.comparison.rows.rubricEval.potatoagentEvaluation.comparison.rows.rubricEval.langsmithagentEvaluation.comparison.rows.rubricEval.langfuseagentEvaluation.comparison.rows.rubricEval.labelStudioagentEvaluation.comparison.rows.rubricEval.argillaagentEvaluation.comparison.rows.rubricEval.scaleAI
agentEvaluation.comparison.rows.selfHosted.featureagentEvaluation.comparison.rows.selfHosted.potatoagentEvaluation.comparison.rows.selfHosted.langsmithagentEvaluation.comparison.rows.selfHosted.langfuseagentEvaluation.comparison.rows.selfHosted.labelStudioagentEvaluation.comparison.rows.selfHosted.argillaagentEvaluation.comparison.rows.selfHosted.scaleAI
agentEvaluation.comparison.rows.free.featureagentEvaluation.comparison.rows.free.potatoagentEvaluation.comparison.rows.free.langsmithagentEvaluation.comparison.rows.free.langfuseagentEvaluation.comparison.rows.free.labelStudioagentEvaluation.comparison.rows.free.argillaagentEvaluation.comparison.rows.free.scaleAI

agentEvaluation.examples.title

agentEvaluation.examples.subtitle

agentEvaluation.examples.items.agentTraceEval.title

agentEvaluation.examples.items.agentTraceEval.description

agentEvaluation.examples.items.webAgentReview.title

agentEvaluation.examples.items.webAgentReview.description

agentEvaluation.examples.items.webAgentCreation.title

agentEvaluation.examples.items.webAgentCreation.description

agentEvaluation.examples.items.liveAgentEval.title

agentEvaluation.examples.items.liveAgentEval.description

agentEvaluation.examples.items.interactiveVlm.title

agentEvaluation.examples.items.interactiveVlm.description

agentEvaluation.examples.items.swebenchEval.title

agentEvaluation.examples.items.swebenchEval.description

agentEvaluation.examples.items.anthropicEval.title

agentEvaluation.examples.items.anthropicEval.description

agentEvaluation.examples.items.openaiEval.title

agentEvaluation.examples.items.openaiEval.description

agentEvaluation.examples.items.langchainIntegration.title

agentEvaluation.examples.items.langchainIntegration.description

agentEvaluation.examples.items.multiAgentEval.title

agentEvaluation.examples.items.multiAgentEval.description

agentEvaluation.examples.items.agentComparison.title

agentEvaluation.examples.items.agentComparison.description

agentEvaluation.examples.items.multiDimComparison.title

agentEvaluation.examples.items.multiDimComparison.description

agentEvaluation.examples.items.ragEvaluation.title

agentEvaluation.examples.items.ragEvaluation.description

agentEvaluation.examples.items.visualAgentEval.title

agentEvaluation.examples.items.visualAgentEval.description

agentEvaluation.cta.title

agentEvaluation.cta.subtitle