Retrieve Results of an Evaluation Run

get/v2/gen-ai/evaluation_runs/{evaluation_run_uuid}/results

To retrieve results of an evaluation run, send a GET request to /v2/gen-ai/evaluation_runs/{evaluation_run_uuid}/results.

Path ParametersExpand Collapse

evaluation_run_uuid: string

Query ParametersExpand Collapse

page: optional number

Page number.

per_page: optional number

Items per page.

ReturnsExpand Collapse

evaluation_run: optional APIEvaluationRun { agent_deleted, agent_deployment_name, agent_name, 20 more }

agent_deleted: optional boolean

Whether agent is deleted

agent_deployment_name: optional string

The agent deployment name

agent_name: optional string

Agent name

agent_uuid: optional string

Agent UUID.

agent_version_hash: optional string

Version hash

agent_workspace_uuid: optional string

Agent workspace uuid

created_by_user_email: optional string

created_by_user_id: optional string

formatuint64

error_description: optional string

The error description

evaluation_run_uuid: optional string

Evaluation run UUID.

evaluation_test_case_workspace_uuid: optional string

Evaluation test case workspace uuid

finished_at: optional string

Run end time.

formatdate-time

pass_status: optional boolean

The pass status of the evaluation run based on the star metric.

queued_at: optional string

Run queued time.

formatdate-time

run_level_metric_results: optional array of APIEvaluationMetricResult { error_description, metric_name, metric_value_type, 3 more }

error_description: optional string

Error description if the metric could not be calculated.

metric_name: optional string

Metric name

metric_value_type: optional "METRIC_VALUE_TYPE_UNSPECIFIED" or "METRIC_VALUE_TYPE_NUMBER" or "METRIC_VALUE_TYPE_STRING" or "METRIC_VALUE_TYPE_PERCENTAGE"

Accepts one of the following:

"METRIC_VALUE_TYPE_UNSPECIFIED"

"METRIC_VALUE_TYPE_NUMBER"

"METRIC_VALUE_TYPE_STRING"

"METRIC_VALUE_TYPE_PERCENTAGE"

number_value: optional number

The value of the metric as a number.

formatdouble

reasoning: optional string

Reasoning of the metric result.

string_value: optional string

The value of the metric as a string.

run_name: optional string

Run name.

star_metric_result: optional APIEvaluationMetricResult { error_description, metric_name, metric_value_type, 3 more }

error_description: optional string

Error description if the metric could not be calculated.

metric_name: optional string

Metric name

metric_value_type: optional "METRIC_VALUE_TYPE_UNSPECIFIED" or "METRIC_VALUE_TYPE_NUMBER" or "METRIC_VALUE_TYPE_STRING" or "METRIC_VALUE_TYPE_PERCENTAGE"

Accepts one of the following:

"METRIC_VALUE_TYPE_UNSPECIFIED"

"METRIC_VALUE_TYPE_NUMBER"

"METRIC_VALUE_TYPE_STRING"

"METRIC_VALUE_TYPE_PERCENTAGE"

number_value: optional number

The value of the metric as a number.

formatdouble

reasoning: optional string

Reasoning of the metric result.

string_value: optional string

The value of the metric as a string.

started_at: optional string

Run start time.

formatdate-time

status: optional "EVALUATION_RUN_STATUS_UNSPECIFIED" or "EVALUATION_RUN_QUEUED" or "EVALUATION_RUN_RUNNING_DATASET" or 6 more

Evaluation Run Statuses

Accepts one of the following:

"EVALUATION_RUN_STATUS_UNSPECIFIED"

"EVALUATION_RUN_QUEUED"

"EVALUATION_RUN_RUNNING_DATASET"

"EVALUATION_RUN_EVALUATING_RESULTS"

"EVALUATION_RUN_CANCELLING"

"EVALUATION_RUN_CANCELLED"

"EVALUATION_RUN_SUCCESSFUL"

"EVALUATION_RUN_PARTIALLY_SUCCESSFUL"

"EVALUATION_RUN_FAILED"

test_case_description: optional string

Test case description.

test_case_name: optional string

Test case name.

test_case_uuid: optional string

Test-case UUID.

test_case_version: optional number

Test-case-version.

formatint64

links: optional APILinks { pages }

Links to other pages

pages: optional object { first, last, next, previous }

Information about how to reach other pages

first: optional string

First page

last: optional string

Last page

next: optional string

previous: optional string

meta: optional APIMeta { page, pages, total }

Meta information about the data set

page: optional number

The current page

formatint64

pages: optional number

Total number of pages

formatint64

total: optional number

Total amount of items over all pages

formatint64

prompts: optional array of APIEvaluationPrompt { evaluation_trace_spans, ground_truth, input, 7 more }

The prompt level results.

evaluation_trace_spans: optional array of object { created_at, input, name, 4 more }

The evaluated trace spans.

created_at: optional string

When the span was created

formatdate-time

input: optional unknown

Input data for the span (flexible structure - can be messages array, string, etc.)

name: optional string

Name/identifier for the span

output: optional unknown

Output data from the span (flexible structure - can be message, string, etc.)

retriever_chunks: optional array of object { chunk_usage_pct, chunk_used, index_uuid, 2 more }

Any retriever span chunks that were included as part of the span.

chunk_usage_pct: optional number

The usage percentage of the chunk.

formatdouble

chunk_used: optional boolean

Indicates if the chunk was used in the prompt.

index_uuid: optional string

The index uuid (Knowledge Base) of the chunk.

source_name: optional string

The source name for the chunk, e.g., the file name or document title.

text: optional string

Text content of the chunk.

span_level_metric_results: optional array of APIEvaluationMetricResult { error_description, metric_name, metric_value_type, 3 more }

The span-level metric results.

error_description: optional string

Error description if the metric could not be calculated.

metric_name: optional string

Metric name

metric_value_type: optional "METRIC_VALUE_TYPE_UNSPECIFIED" or "METRIC_VALUE_TYPE_NUMBER" or "METRIC_VALUE_TYPE_STRING" or "METRIC_VALUE_TYPE_PERCENTAGE"

Accepts one of the following:

"METRIC_VALUE_TYPE_UNSPECIFIED"

"METRIC_VALUE_TYPE_NUMBER"

"METRIC_VALUE_TYPE_STRING"

"METRIC_VALUE_TYPE_PERCENTAGE"

number_value: optional number

The value of the metric as a number.

formatdouble

reasoning: optional string

Reasoning of the metric result.

string_value: optional string

The value of the metric as a string.

type: optional "TRACE_SPAN_TYPE_UNKNOWN" or "TRACE_SPAN_TYPE_LLM" or "TRACE_SPAN_TYPE_RETRIEVER" or "TRACE_SPAN_TYPE_TOOL"

Types of spans in a trace

Accepts one of the following:

"TRACE_SPAN_TYPE_UNKNOWN"

"TRACE_SPAN_TYPE_LLM"

"TRACE_SPAN_TYPE_RETRIEVER"

"TRACE_SPAN_TYPE_TOOL"

ground_truth: optional string

The ground truth for the prompt.

input: optional string

input_tokens: optional string

The number of input tokens used in the prompt.

formatuint64

output: optional string

output_tokens: optional string

The number of output tokens used in the prompt.

formatuint64

prompt_chunks: optional array of object { chunk_usage_pct, chunk_used, index_uuid, 2 more }

The list of prompt chunks.

chunk_usage_pct: optional number

The usage percentage of the chunk.

formatdouble

chunk_used: optional boolean

Indicates if the chunk was used in the prompt.

index_uuid: optional string

The index uuid (Knowledge Base) of the chunk.

source_name: optional string

The source name for the chunk, e.g., the file name or document title.

text: optional string

Text content of the chunk.

prompt_id: optional number

Prompt ID

formatint64

prompt_level_metric_results: optional array of APIEvaluationMetricResult { error_description, metric_name, metric_value_type, 3 more }

The metric results for the prompt.

error_description: optional string

Error description if the metric could not be calculated.

metric_name: optional string

Metric name

metric_value_type: optional "METRIC_VALUE_TYPE_UNSPECIFIED" or "METRIC_VALUE_TYPE_NUMBER" or "METRIC_VALUE_TYPE_STRING" or "METRIC_VALUE_TYPE_PERCENTAGE"

Accepts one of the following:

"METRIC_VALUE_TYPE_UNSPECIFIED"

"METRIC_VALUE_TYPE_NUMBER"

"METRIC_VALUE_TYPE_STRING"

"METRIC_VALUE_TYPE_PERCENTAGE"

number_value: optional number

The value of the metric as a number.

formatdouble

reasoning: optional string

Reasoning of the metric result.

string_value: optional string

The value of the metric as a string.

trace_id: optional string

The trace id for the prompt.

Retrieve Results of an Evaluation Run

curl https://api.digitalocean.com/v2/gen-ai/evaluation_runs/$EVALUATION_RUN_UUID/results \
    -H "Authorization: Bearer $DIGITALOCEAN_ACCESS_TOKEN"

{
  "evaluation_run": {
    "agent_deleted": true,
    "agent_deployment_name": "example name",
    "agent_name": "example name",
    "agent_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "agent_version_hash": "example string",
    "agent_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "created_by_user_email": "example@example.com",
    "created_by_user_id": "12345",
    "error_description": "example string",
    "evaluation_run_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "evaluation_test_case_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "finished_at": "2023-01-01T00:00:00Z",
    "pass_status": true,
    "queued_at": "2023-01-01T00:00:00Z",
    "run_level_metric_results": [
      {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      }
    ],
    "run_name": "example name",
    "star_metric_result": {
      "error_description": "example string",
      "metric_name": "example name",
      "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
      "number_value": 123,
      "reasoning": "example string",
      "string_value": "example string"
    },
    "started_at": "2023-01-01T00:00:00Z",
    "status": "EVALUATION_RUN_STATUS_UNSPECIFIED",
    "test_case_description": "example string",
    "test_case_name": "example name",
    "test_case_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "test_case_version": 123
  },
  "links": {
    "pages": {
      "first": "example string",
      "last": "example string",
      "next": "example string",
      "previous": "example string"
    }
  },
  "meta": {
    "page": 123,
    "pages": 123,
    "total": 123
  },
  "prompts": [
    {
      "evaluation_trace_spans": [
        {
          "created_at": "2023-01-01T00:00:00Z",
          "input": {},
          "name": "example name",
          "output": {},
          "retriever_chunks": [
            {
              "chunk_usage_pct": 123,
              "chunk_used": true,
              "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
              "source_name": "example name",
              "text": "example string"
            }
          ],
          "span_level_metric_results": [
            {
              "error_description": "example string",
              "metric_name": "example name",
              "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
              "number_value": 123,
              "reasoning": "example string",
              "string_value": "example string"
            }
          ],
          "type": "TRACE_SPAN_TYPE_UNKNOWN"
        }
      ],
      "ground_truth": "example string",
      "input": "example string",
      "input_tokens": "12345",
      "output": "example string",
      "output_tokens": "12345",
      "prompt_chunks": [
        {
          "chunk_usage_pct": 123,
          "chunk_used": true,
          "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
          "source_name": "example name",
          "text": "example string"
        }
      ],
      "prompt_id": 123,
      "prompt_level_metric_results": [
        {
          "error_description": "example string",
          "metric_name": "example name",
          "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
          "number_value": 123,
          "reasoning": "example string",
          "string_value": "example string"
        }
      ],
      "trace_id": "123e4567-e89b-12d3-a456-426614174000"
    }
  ]
}

Returns Examples

{
  "evaluation_run": {
    "agent_deleted": true,
    "agent_deployment_name": "example name",
    "agent_name": "example name",
    "agent_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "agent_version_hash": "example string",
    "agent_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "created_by_user_email": "example@example.com",
    "created_by_user_id": "12345",
    "error_description": "example string",
    "evaluation_run_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "evaluation_test_case_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "finished_at": "2023-01-01T00:00:00Z",
    "pass_status": true,
    "queued_at": "2023-01-01T00:00:00Z",
    "run_level_metric_results": [
      {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      }
    ],
    "run_name": "example name",
    "star_metric_result": {
      "error_description": "example string",
      "metric_name": "example name",
      "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
      "number_value": 123,
      "reasoning": "example string",
      "string_value": "example string"
    },
    "started_at": "2023-01-01T00:00:00Z",
    "status": "EVALUATION_RUN_STATUS_UNSPECIFIED",
    "test_case_description": "example string",
    "test_case_name": "example name",
    "test_case_uuid": "123e4567-e89b-12d3-a456-426614174000",
    "test_case_version": 123
  },
  "links": {
    "pages": {
      "first": "example string",
      "last": "example string",
      "next": "example string",
      "previous": "example string"
    }
  },
  "meta": {
    "page": 123,
    "pages": 123,
    "total": 123
  },
  "prompts": [
    {
      "evaluation_trace_spans": [
        {
          "created_at": "2023-01-01T00:00:00Z",
          "input": {},
          "name": "example name",
          "output": {},
          "retriever_chunks": [
            {
              "chunk_usage_pct": 123,
              "chunk_used": true,
              "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
              "source_name": "example name",
              "text": "example string"
            }
          ],
          "span_level_metric_results": [
            {
              "error_description": "example string",
              "metric_name": "example name",
              "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
              "number_value": 123,
              "reasoning": "example string",
              "string_value": "example string"
            }
          ],
          "type": "TRACE_SPAN_TYPE_UNKNOWN"
        }
      ],
      "ground_truth": "example string",
      "input": "example string",
      "input_tokens": "12345",
      "output": "example string",
      "output_tokens": "12345",
      "prompt_chunks": [
        {
          "chunk_usage_pct": 123,
          "chunk_used": true,
          "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
          "source_name": "example name",
          "text": "example string"
        }
      ],
      "prompt_id": 123,
      "prompt_level_metric_results": [
        {
          "error_description": "example string",
          "metric_name": "example name",
          "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
          "number_value": 123,
          "reasoning": "example string",
          "string_value": "example string"
        }
      ],
      "trace_id": "123e4567-e89b-12d3-a456-426614174000"
    }
  ]
}