For AI agents: a documentation index is available at the root level at /llms.txt and /llms-full.txt. Append /llms.txt to any URL for a page-level index, or .md for the markdown version of any page.
Sign inBook a demo
DocsReferenceChangelog
DocsReferenceChangelog
  • Introduction
    • SDKs
    • Errors
  • Humanloop API
      • GETGet
      • GETList Datapoints
      • POSTCreate
      • POSTLog
      • POSTResult
      • PATCHUpdate Status
      • PATCHAdd Evaluators
      • GETGet Evaluations
LogoLogo
Sign inBook a demo
Humanloop APIEvaluations

Result

POST
https://api.humanloop.com/v4/evaluations/:evaluation_id/result
POST
/v4/evaluations/:evaluation_id/result
$curl -X POST https://api.humanloop.com/v4/evaluations/evaluation_id/result \
> -H "X-API-KEY: <apiKey>" \
> -H "Content-Type: application/json" \
> -d '{
> "log_id": "log_id",
> "evaluator_id": "evaluator_id"
>}'
1{
2 "id": "id",
3 "evaluator_id": "evaluator_id",
4 "evaluator_version_id": "evaluator_version_id",
5 "log_id": "log_id",
6 "updated_at": "2024-01-15T09:30:00Z",
7 "created_at": "2024-01-15T09:30:00Z",
8 "evaluation_id": "evaluation_id",
9 "log": {
10 "id": "id",
11 "config": {
12 "type": "model",
13 "id": "id",
14 "model": "model",
15 "chat_template": [
16 {
17 "role": "user"
18 }
19 ],
20 "description": "description",
21 "endpoint": "complete",
22 "frequency_penalty": 1.1,
23 "max_tokens": 1,
24 "name": "name",
25 "other": {
26 "key": "value"
27 },
28 "presence_penalty": 1.1,
29 "prompt_template": "prompt_template",
30 "provider": "anthropic",
31 "reasoning_effort": "high",
32 "response_format": {
33 "type": "json_object"
34 },
35 "seed": 1,
36 "stop": "stop",
37 "temperature": 1.1,
38 "template_language": "default",
39 "tools": [
40 {
41 "id": "id",
42 "name": "name"
43 }
44 ],
45 "top_p": 1.1,
46 "tool_configs": [
47 {
48 "id": "id",
49 "status": "status",
50 "name": "name"
51 }
52 ]
53 },
54 "evaluation_results": [],
55 "observability_status": "pending",
56 "updated_at": "2024-01-15T09:30:00Z",
57 "project": "project",
58 "project_id": "project_id",
59 "session_id": "session_id",
60 "session_reference_id": "session_reference_id",
61 "parent_id": "parent_id",
62 "parent_reference_id": "parent_reference_id",
63 "inputs": {
64 "key": "value"
65 },
66 "source": "source",
67 "metadata": {
68 "key": "value"
69 },
70 "save": true,
71 "source_datapoint_id": "source_datapoint_id",
72 "reference_id": "reference_id",
73 "messages": [
74 {
75 "role": "user"
76 }
77 ],
78 "output": "output",
79 "judgment": true,
80 "config_id": "config_id",
81 "environment": "environment",
82 "feedback": [
83 {
84 "type": "rating",
85 "id": "id"
86 }
87 ],
88 "created_at": "2024-01-15T09:30:00Z",
89 "error": "error",
90 "stdout": "stdout",
91 "duration": 1.1,
92 "output_message": {
93 "role": "user",
94 "content": "content",
95 "name": "name",
96 "tool_call_id": "tool_call_id",
97 "tool_calls": [
98 {
99 "id": "id",
100 "type": "function",
101 "function": {
102 "name": "name"
103 }
104 }
105 ],
106 "thinking": [
107 {
108 "type": "thinking",
109 "signature": "signature",
110 "thinking": "thinking"
111 }
112 ],
113 "tool_call": {
114 "name": "name"
115 }
116 },
117 "prompt_tokens": 1,
118 "output_tokens": 1,
119 "prompt_cost": 1.1,
120 "output_cost": 1.1,
121 "provider_request": {
122 "key": "value"
123 },
124 "provider_response": {
125 "key": "value"
126 },
127 "user": "user",
128 "provider_latency": 1.1,
129 "tokens": 1,
130 "raw_output": "raw_output",
131 "finish_reason": "finish_reason",
132 "tools": [
133 {
134 "id": "id",
135 "name": "name",
136 "signature": "signature",
137 "result": "result"
138 }
139 ],
140 "tool_choice": "none",
141 "batch_ids": [
142 "batch_ids"
143 ]
144 },
145 "version_id": "version_id",
146 "version": {
147 "key": "value"
148 },
149 "value": true,
150 "error": "error",
151 "evaluator_log": {
152 "id": "id",
153 "config": {
154 "type": "model",
155 "id": "id",
156 "model": "model",
157 "chat_template": [
158 {
159 "role": "user"
160 }
161 ],
162 "description": "description",
163 "endpoint": "complete",
164 "frequency_penalty": 1.1,
165 "max_tokens": 1,
166 "name": "name",
167 "other": {
168 "key": "value"
169 },
170 "presence_penalty": 1.1,
171 "prompt_template": "prompt_template",
172 "provider": "anthropic",
173 "reasoning_effort": "high",
174 "response_format": {
175 "type": "json_object"
176 },
177 "seed": 1,
178 "stop": "stop",
179 "temperature": 1.1,
180 "template_language": "default",
181 "tools": [
182 {
183 "id": "id",
184 "name": "name"
185 }
186 ],
187 "top_p": 1.1,
188 "tool_configs": [
189 {
190 "id": "id",
191 "status": "status",
192 "name": "name"
193 }
194 ]
195 },
196 "evaluation_results": [],
197 "observability_status": "pending",
198 "updated_at": "2024-01-15T09:30:00Z",
199 "project": "project",
200 "project_id": "project_id",
201 "session_id": "session_id",
202 "session_reference_id": "session_reference_id",
203 "parent_id": "parent_id",
204 "parent_reference_id": "parent_reference_id",
205 "inputs": {
206 "key": "value"
207 },
208 "source": "source",
209 "metadata": {
210 "key": "value"
211 },
212 "save": true,
213 "source_datapoint_id": "source_datapoint_id",
214 "reference_id": "reference_id",
215 "messages": [
216 {
217 "role": "user"
218 }
219 ],
220 "output": "output",
221 "judgment": true,
222 "config_id": "config_id",
223 "environment": "environment",
224 "feedback": [
225 {
226 "type": "rating",
227 "id": "id"
228 }
229 ],
230 "created_at": "2024-01-15T09:30:00Z",
231 "error": "error",
232 "stdout": "stdout",
233 "duration": 1.1,
234 "output_message": {
235 "role": "user",
236 "content": "content",
237 "name": "name",
238 "tool_call_id": "tool_call_id",
239 "tool_calls": [
240 {
241 "id": "id",
242 "type": "function",
243 "function": {
244 "name": "name"
245 }
246 }
247 ],
248 "thinking": [
249 {
250 "type": "thinking",
251 "signature": "signature",
252 "thinking": "thinking"
253 }
254 ],
255 "tool_call": {
256 "name": "name"
257 }
258 },
259 "prompt_tokens": 1,
260 "output_tokens": 1,
261 "prompt_cost": 1.1,
262 "output_cost": 1.1,
263 "provider_request": {
264 "key": "value"
265 },
266 "provider_response": {
267 "key": "value"
268 },
269 "user": "user",
270 "provider_latency": 1.1,
271 "tokens": 1,
272 "raw_output": "raw_output",
273 "finish_reason": "finish_reason",
274 "tools": [
275 {
276 "id": "id",
277 "name": "name",
278 "signature": "signature",
279 "result": "result"
280 }
281 ],
282 "tool_choice": "none",
283 "batch_ids": [
284 "batch_ids"
285 ]
286 }
287}

Log an evaluation result to an evaluation run.

The run must have status ‘running’. One of result or error must be provided.

Was this page helpful?
Previous

Update Status

Next
Built with

Authentication

X-API-KEYstring
API Key authentication via header

Path parameters

evaluation_idstringRequired

ID of the evaluation run. Starts with evrun_.

Query parameters

evaluatee_idstringOptional

String ID of evaluatee version to return. If not defined, the first evaluatee will be returned. Starts with evv_.

Request

This endpoint expects an object.
log_idstringRequired

The log that was evaluated. Must have as its source_datapoint_id one of the datapoints in the dataset being evaluated.

evaluator_idstringRequired

ID of the evaluator that evaluated the log. Starts with evfn_. Must be one of the evaluator IDs associated with the evaluation run being logged to.

resultboolean or integer or doubleOptional
The result value of the evaluation.
errorstringOptional
An error that occurred during evaluation.

Response

Successful Response
idstring
evaluator_idstring
evaluator_version_idstring
log_idstring
updated_atdatetime
created_atdatetime
evaluation_idstring
logobject
Request model for logging a datapoint.
version_idstring
versionany
valueboolean or double or string or list of strings
errorstring
evaluator_logobject
Request model for logging a datapoint.

Errors

422
Evaluations Result Request Unprocessable Entity Error