Result

POST

https://api.humanloop.com/v4/evaluations/:evaluation_id/result

POST

/v4/evaluations/:evaluation_id/result

1 curl -X POST https://api.humanloop.com/v4/evaluations/evaluation_id/result \
2      -H "X-API-KEY: <apiKey>" \
3      -H "Content-Type: application/json" \
4      -d '{
5   "log_id": "log_id",
6   "evaluator_id": "evaluator_id"
7 }'

Try it

1 {
2   "id": "id",
3   "evaluator_id": "evaluator_id",
4   "evaluator_version_id": "evaluator_version_id",
5   "log_id": "log_id",
6   "updated_at": "2024-01-15T09:30:00Z",
7   "created_at": "2024-01-15T09:30:00Z",
8   "evaluation_id": "evaluation_id",
9   "log": {
10     "id": "id",
11     "config": {
12       "type": "model",
13       "id": "id",
14       "model": "model",
15       "chat_template": [
16         {
17           "role": "user"
18         }
19       ],
20       "description": "description",
21       "endpoint": "complete",
22       "frequency_penalty": 1.1,
23       "max_tokens": 1,
24       "name": "name",
25       "other": {
26         "key": "value"
27       },
28       "presence_penalty": 1.1,
29       "prompt_template": "prompt_template",
30       "provider": "anthropic",
31       "reasoning_effort": "high",
32       "response_format": {
33         "type": "json_object"
34       },
35       "seed": 1,
36       "stop": "stop",
37       "temperature": 1.1,
38       "template_language": "default",
39       "tools": [
40         {
41           "id": "id",
42           "name": "name"
43         }
44       ],
45       "top_p": 1.1,
46       "tool_configs": [
47         {
48           "id": "id",
49           "status": "status",
50           "name": "name"
51         }
52       ]
53     },
54     "evaluation_results": [],
55     "observability_status": "pending",
56     "updated_at": "2024-01-15T09:30:00Z",
57     "project": "project",
58     "project_id": "project_id",
59     "session_id": "session_id",
60     "session_reference_id": "session_reference_id",
61     "parent_id": "parent_id",
62     "parent_reference_id": "parent_reference_id",
63     "inputs": {
64       "key": "value"
65     },
66     "source": "source",
67     "metadata": {
68       "key": "value"
69     },
70     "save": true,
71     "source_datapoint_id": "source_datapoint_id",
72     "reference_id": "reference_id",
73     "messages": [
74       {
75         "role": "user"
76       }
77     ],
78     "output": "output",
79     "judgment": true,
80     "config_id": "config_id",
81     "environment": "environment",
82     "feedback": [
83       {
84         "type": "rating",
85         "id": "id"
86       }
87     ],
88     "created_at": "2024-01-15T09:30:00Z",
89     "error": "error",
90     "stdout": "stdout",
91     "duration": 1.1,
92     "output_message": {
93       "role": "user",
94       "content": "content",
95       "name": "name",
96       "tool_call_id": "tool_call_id",
97       "tool_calls": [
98         {
99           "id": "id",
100           "type": "function",
101           "function": {
102             "name": "name"
103           }
104         }
105       ],
106       "thinking": [
107         {
108           "type": "thinking",
109           "signature": "signature",
110           "thinking": "thinking"
111         }
112       ],
113       "tool_call": {
114         "name": "name"
115       }
116     },
117     "prompt_tokens": 1,
118     "output_tokens": 1,
119     "prompt_cost": 1.1,
120     "output_cost": 1.1,
121     "provider_request": {
122       "key": "value"
123     },
124     "provider_response": {
125       "key": "value"
126     },
127     "user": "user",
128     "provider_latency": 1.1,
129     "tokens": 1,
130     "raw_output": "raw_output",
131     "finish_reason": "finish_reason",
132     "tools": [
133       {
134         "id": "id",
135         "name": "name",
136         "signature": "signature",
137         "result": "result"
138       }
139     ],
140     "tool_choice": "none",
141     "batch_ids": [
142       "batch_ids"
143     ]
144   },
145   "version_id": "version_id",
146   "version": {
147     "key": "value"
148   },
149   "value": true,
150   "error": "error",
151   "evaluator_log": {
152     "id": "id",
153     "config": {
154       "type": "model",
155       "id": "id",
156       "model": "model",
157       "chat_template": [
158         {
159           "role": "user"
160         }
161       ],
162       "description": "description",
163       "endpoint": "complete",
164       "frequency_penalty": 1.1,
165       "max_tokens": 1,
166       "name": "name",
167       "other": {
168         "key": "value"
169       },
170       "presence_penalty": 1.1,
171       "prompt_template": "prompt_template",
172       "provider": "anthropic",
173       "reasoning_effort": "high",
174       "response_format": {
175         "type": "json_object"
176       },
177       "seed": 1,
178       "stop": "stop",
179       "temperature": 1.1,
180       "template_language": "default",
181       "tools": [
182         {
183           "id": "id",
184           "name": "name"
185         }
186       ],
187       "top_p": 1.1,
188       "tool_configs": [
189         {
190           "id": "id",
191           "status": "status",
192           "name": "name"
193         }
194       ]
195     },
196     "evaluation_results": [],
197     "observability_status": "pending",
198     "updated_at": "2024-01-15T09:30:00Z",
199     "project": "project",
200     "project_id": "project_id",
201     "session_id": "session_id",
202     "session_reference_id": "session_reference_id",
203     "parent_id": "parent_id",
204     "parent_reference_id": "parent_reference_id",
205     "inputs": {
206       "key": "value"
207     },
208     "source": "source",
209     "metadata": {
210       "key": "value"
211     },
212     "save": true,
213     "source_datapoint_id": "source_datapoint_id",
214     "reference_id": "reference_id",
215     "messages": [
216       {
217         "role": "user"
218       }
219     ],
220     "output": "output",
221     "judgment": true,
222     "config_id": "config_id",
223     "environment": "environment",
224     "feedback": [
225       {
226         "type": "rating",
227         "id": "id"
228       }
229     ],
230     "created_at": "2024-01-15T09:30:00Z",
231     "error": "error",
232     "stdout": "stdout",
233     "duration": 1.1,
234     "output_message": {
235       "role": "user",
236       "content": "content",
237       "name": "name",
238       "tool_call_id": "tool_call_id",
239       "tool_calls": [
240         {
241           "id": "id",
242           "type": "function",
243           "function": {
244             "name": "name"
245           }
246         }
247       ],
248       "thinking": [
249         {
250           "type": "thinking",
251           "signature": "signature",
252           "thinking": "thinking"
253         }
254       ],
255       "tool_call": {
256         "name": "name"
257       }
258     },
259     "prompt_tokens": 1,
260     "output_tokens": 1,
261     "prompt_cost": 1.1,
262     "output_cost": 1.1,
263     "provider_request": {
264       "key": "value"
265     },
266     "provider_response": {
267       "key": "value"
268     },
269     "user": "user",
270     "provider_latency": 1.1,
271     "tokens": 1,
272     "raw_output": "raw_output",
273     "finish_reason": "finish_reason",
274     "tools": [
275       {
276         "id": "id",
277         "name": "name",
278         "signature": "signature",
279         "result": "result"
280       }
281     ],
282     "tool_choice": "none",
283     "batch_ids": [
284       "batch_ids"
285     ]
286   }
287 }

Log an evaluation result to an evaluation run.

The run must have status ‘running’. One of result or error must be provided.

Path parameters

evaluation_idstringRequired

ID of the evaluation run. Starts with evrun_.

Query parameters

evaluatee_idstringOptional

String ID of evaluatee version to return. If not defined, the first evaluatee will be returned. Starts with evv_.

Request

This endpoint expects an object.

log_idstringRequired

The log that was evaluated. Must have as its source_datapoint_id one of the datapoints in the dataset being evaluated.

evaluator_idstringRequired

ID of the evaluator that evaluated the log. Starts with evfn_. Must be one of the evaluator IDs associated with the evaluation run being logged to.

resultboolean or integer or doubleOptional

The result value of the evaluation.

errorstringOptional

An error that occurred during evaluation.

Response

Successful Response

idstring

evaluator_idstring

evaluator_version_idstring

log_idstring

updated_atdatetime

created_atdatetime

evaluation_idstring or null

logobject or null

Request model for logging a datapoint.

version_idstring or null

versionany or null

valueboolean or double or string or list of strings or null

errorstring or null

evaluator_logobject or null