Skip to content

Commit 4ee7330

Browse files
dinmukhamedmamitalokbera
authored andcommitted
fix(openai): record exception as span events as well (traceloop#3067)
1 parent 86532c3 commit 4ee7330

File tree

11 files changed

+333
-27
lines changed

11 files changed

+333
-27
lines changed

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
)
4242
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
4343
from opentelemetry.metrics import Counter, Histogram
44+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
4445
from opentelemetry.semconv_ai import (
4546
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
4647
LLMRequestTypeValues,
@@ -89,7 +90,6 @@ def chat_wrapper(
8990
)
9091

9192
run_async(_handle_request(span, kwargs, instance))
92-
9393
try:
9494
start_time = time.time()
9595
response = wrapped(*args, **kwargs)
@@ -107,10 +107,12 @@ def chat_wrapper(
107107
if exception_counter:
108108
exception_counter.add(1, attributes=attributes)
109109

110+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
111+
span.record_exception(e)
110112
span.set_status(Status(StatusCode.ERROR, str(e)))
111113
span.end()
112114

113-
raise e
115+
raise
114116

115117
if is_streaming_response(response):
116118
# span will be closed after the generator is done
@@ -204,10 +206,12 @@ async def achat_wrapper(
204206
if exception_counter:
205207
exception_counter.add(1, attributes=attributes)
206208

209+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
210+
span.record_exception(e)
207211
span.set_status(Status(StatusCode.ERROR, str(e)))
208212
span.end()
209213

210-
raise e
214+
raise
211215

212216
if is_streaming_response(response):
213217
# span will be closed after the generator is done
@@ -637,7 +641,7 @@ def __next__(self):
637641
except Exception as e:
638642
if isinstance(e, StopIteration):
639643
self._process_complete_response()
640-
raise e
644+
raise
641645
else:
642646
self._process_item(chunk)
643647
return chunk
@@ -648,7 +652,7 @@ async def __anext__(self):
648652
except Exception as e:
649653
if isinstance(e, StopAsyncIteration):
650654
self._process_complete_response()
651-
raise e
655+
raise
652656
else:
653657
self._process_item(chunk)
654658
return chunk

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
should_record_stream_token_usage,
1616
)
1717
from opentelemetry.instrumentation.openai.shared.config import Config
18+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
1819
from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
1920
from opentelemetry.instrumentation.openai.shared.event_models import (
2021
ChoiceEvent,
@@ -61,9 +62,11 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs):
6162
try:
6263
response = wrapped(*args, **kwargs)
6364
except Exception as e:
65+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
66+
span.record_exception(e)
6467
span.set_status(Status(StatusCode.ERROR, str(e)))
6568
span.end()
66-
raise e
69+
raise
6770

6871
if is_streaming_response(response):
6972
# span will be closed after the generator is done
@@ -93,9 +96,11 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
9396
try:
9497
response = await wrapped(*args, **kwargs)
9598
except Exception as e:
99+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
100+
span.record_exception(e)
96101
span.set_status(Status(StatusCode.ERROR, str(e)))
97102
span.end()
98-
raise e
103+
raise
99104

100105
if is_streaming_response(response):
101106
# span will be closed after the generator is done

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)
3232
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
3333
from opentelemetry.metrics import Counter, Histogram
34+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
3435
from opentelemetry.semconv_ai import (
3536
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
3637
LLMRequestTypeValues,
@@ -89,10 +90,12 @@ def embeddings_wrapper(
8990
if exception_counter:
9091
exception_counter.add(1, attributes=attributes)
9192

93+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
94+
span.record_exception(e)
9295
span.set_status(Status(StatusCode.ERROR, str(e)))
9396
span.end()
9497

95-
raise e
98+
raise
9699

97100
duration = end_time - start_time
98101

@@ -152,10 +155,12 @@ async def aembeddings_wrapper(
152155
if exception_counter:
153156
exception_counter.add(1, attributes=attributes)
154157

158+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
159+
span.record_exception(e)
155160
span.set_status(Status(StatusCode.ERROR, str(e)))
156161
span.end()
157162

158-
raise e
163+
raise
159164

160165
duration = end_time - start_time
161166

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def image_gen_metrics_wrapper(
4747
if exception_counter:
4848
exception_counter.add(1, attributes=attributes)
4949

50-
raise e
50+
raise
5151

5252
if is_openai_v1():
5353
response_dict = model_as_dict(response)

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
should_emit_events,
1919
)
2020
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
21+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
2122
from opentelemetry.semconv_ai import LLMRequestTypeValues, SpanAttributes
22-
from opentelemetry.trace import SpanKind
23+
from opentelemetry.trace import SpanKind, Status, StatusCode
2324

2425
from openai._legacy_response import LegacyAPIResponse
2526
from openai.types.beta.threads.run import Run
@@ -53,17 +54,24 @@ def runs_create_wrapper(tracer, wrapped, instance, args, kwargs):
5354
thread_id = kwargs.get("thread_id")
5455
instructions = kwargs.get("instructions")
5556

56-
response = wrapped(*args, **kwargs)
57-
response_dict = model_as_dict(response)
57+
try:
58+
response = wrapped(*args, **kwargs)
59+
response_dict = model_as_dict(response)
5860

59-
runs[thread_id] = {
60-
"start_time": time.time_ns(),
61-
"assistant_id": kwargs.get("assistant_id"),
62-
"instructions": instructions,
63-
"run_id": response_dict.get("id"),
64-
}
61+
runs[thread_id] = {
62+
"start_time": time.time_ns(),
63+
"assistant_id": kwargs.get("assistant_id"),
64+
"instructions": instructions,
65+
"run_id": response_dict.get("id"),
66+
}
6567

66-
return response
68+
return response
69+
except Exception as e:
70+
runs[thread_id] = {
71+
"exception": e,
72+
"end_time": time.time_ns(),
73+
}
74+
raise
6775

6876

6977
@_with_tracer_wrapper
@@ -85,10 +93,16 @@ def process_response(response):
8593
if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
8694
return wrapped(*args, **kwargs)
8795

88-
response = wrapped(*args, **kwargs)
89-
process_response(response)
90-
91-
return response
96+
try:
97+
response = wrapped(*args, **kwargs)
98+
process_response(response)
99+
return response
100+
except Exception as e:
101+
thread_id = kwargs.get("thread_id")
102+
if thread_id in runs:
103+
runs[thread_id]["exception"] = e
104+
runs[thread_id]["end_time"] = time.time_ns()
105+
raise
92106

93107

94108
@_with_tracer_wrapper
@@ -113,6 +127,11 @@ def messages_list_wrapper(tracer, wrapped, instance, args, kwargs):
113127
attributes={SpanAttributes.LLM_REQUEST_TYPE: LLMRequestTypeValues.CHAT.value},
114128
start_time=run.get("start_time"),
115129
)
130+
if exception := run.get("exception"):
131+
span.set_attribute(ERROR_TYPE, exception.__class__.__name__)
132+
span.record_exception(exception)
133+
span.set_status(Status(StatusCode.ERROR, str(exception)))
134+
span.end(run.get("end_time"))
116135

117136
prompt_index = 0
118137
if assistants.get(run["assistant_id"]) is not None or Config.enrich_assistant:
@@ -288,6 +307,12 @@ def runs_create_and_stream_wrapper(tracer, wrapped, instance, args, kwargs):
288307
span=span,
289308
)
290309

291-
response = wrapped(*args, **kwargs)
292-
293-
return response
310+
try:
311+
response = wrapped(*args, **kwargs)
312+
return response
313+
except Exception as e:
314+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
315+
span.record_exception(e)
316+
span.set_status(Status(StatusCode.ERROR, str(e)))
317+
span.end()
318+
raise

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from opentelemetry.instrumentation.openai.shared.event_emitter import emit_event
33
from opentelemetry.instrumentation.openai.shared.event_models import ChoiceEvent
44
from opentelemetry.instrumentation.openai.utils import should_emit_events
5+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
56
from opentelemetry.semconv_ai import SpanAttributes
7+
from opentelemetry.trace import Status, StatusCode
68
from typing_extensions import override
79

810
from openai import AssistantEventHandler
@@ -66,6 +68,9 @@ def on_tool_call_done(self, tool_call):
6668

6769
@override
6870
def on_exception(self, exception: Exception):
71+
self._span.set_attribute(ERROR_TYPE, exception.__class__.__name__)
72+
self._span.record_exception(exception)
73+
self._span.set_status(Status(StatusCode.ERROR, str(exception)))
6974
self._original_handler.on_exception(exception)
7075

7176
@override

packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from opentelemetry import context as context_api
4040
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
4141
from opentelemetry.semconv_ai import SpanAttributes
42+
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
4243
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
4344
GEN_AI_COMPLETION,
4445
GEN_AI_PROMPT,
@@ -426,6 +427,7 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
426427
start_time if traced_data is None else int(traced_data.start_time)
427428
),
428429
)
430+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
429431
span.record_exception(e)
430432
span.set_status(StatusCode.ERROR, str(e))
431433
if traced_data:
@@ -519,6 +521,7 @@ async def async_responses_get_or_create_wrapper(
519521
start_time if traced_data is None else int(traced_data.start_time)
520522
),
521523
)
524+
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
522525
span.record_exception(e)
523526
span.set_status(StatusCode.ERROR, str(e))
524527
if traced_data:

packages/opentelemetry-instrumentation-openai/tests/traces/test_chat.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
gen_ai_attributes as GenAIAttributes,
1616
)
1717
from opentelemetry.semconv_ai import SpanAttributes
18+
from opentelemetry.trace import StatusCode
1819

1920
from .utils import assert_request_contains_tracecontext, spy_decorator
2021

@@ -1436,3 +1437,78 @@ def test_chat_history_message_pydantic(span_exporter, openai_client):
14361437
== second_user_message["content"]
14371438
)
14381439
assert second_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.2.role"] == "user"
1440+
1441+
1442+
def test_chat_exception(instrument_legacy, span_exporter, openai_client):
1443+
openai_client.api_key = "invalid"
1444+
with pytest.raises(Exception):
1445+
openai_client.chat.completions.create(
1446+
model="gpt-3.5-turbo",
1447+
messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
1448+
)
1449+
1450+
spans = span_exporter.get_finished_spans()
1451+
1452+
assert [span.name for span in spans] == [
1453+
"openai.chat",
1454+
]
1455+
open_ai_span = spans[0]
1456+
assert (
1457+
open_ai_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
1458+
== "Tell me a joke about opentelemetry"
1459+
)
1460+
assert (
1461+
open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
1462+
== "https://api.openai.com/v1/"
1463+
)
1464+
assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
1465+
assert open_ai_span.status.status_code == StatusCode.ERROR
1466+
assert open_ai_span.status.description.startswith("Error code: 401")
1467+
events = open_ai_span.events
1468+
assert len(events) == 1
1469+
event = events[0]
1470+
assert event.name == "exception"
1471+
assert event.attributes["exception.type"] == "openai.AuthenticationError"
1472+
assert event.attributes["exception.message"].startswith("Error code: 401")
1473+
assert open_ai_span.attributes.get("error.type") == "AuthenticationError"
1474+
assert "Traceback (most recent call last):" in event.attributes["exception.stacktrace"]
1475+
assert "openai.AuthenticationError" in event.attributes["exception.stacktrace"]
1476+
assert "invalid_api_key" in event.attributes["exception.stacktrace"]
1477+
1478+
1479+
@pytest.mark.asyncio
1480+
async def test_chat_async_exception(instrument_legacy, span_exporter, async_openai_client):
1481+
async_openai_client.api_key = "invalid"
1482+
with pytest.raises(Exception):
1483+
await async_openai_client.chat.completions.create(
1484+
model="gpt-3.5-turbo",
1485+
messages=[{"role": "user", "content": "Tell me a joke about opentelemetry"}],
1486+
)
1487+
1488+
spans = span_exporter.get_finished_spans()
1489+
1490+
assert [span.name for span in spans] == [
1491+
"openai.chat",
1492+
]
1493+
open_ai_span = spans[0]
1494+
assert (
1495+
open_ai_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"]
1496+
== "Tell me a joke about opentelemetry"
1497+
)
1498+
assert (
1499+
open_ai_span.attributes.get(SpanAttributes.LLM_OPENAI_API_BASE)
1500+
== "https://api.openai.com/v1/"
1501+
)
1502+
assert open_ai_span.attributes.get(SpanAttributes.LLM_IS_STREAMING) is False
1503+
assert open_ai_span.status.status_code == StatusCode.ERROR
1504+
assert open_ai_span.status.description.startswith("Error code: 401")
1505+
events = open_ai_span.events
1506+
assert len(events) == 1
1507+
event = events[0]
1508+
assert event.name == "exception"
1509+
assert event.attributes["exception.type"] == "openai.AuthenticationError"
1510+
assert event.attributes["exception.message"].startswith("Error code: 401")
1511+
assert "Traceback (most recent call last):" in event.attributes["exception.stacktrace"]
1512+
assert "openai.AuthenticationError" in event.attributes["exception.stacktrace"]
1513+
assert "invalid_api_key" in event.attributes["exception.stacktrace"]
1514+
assert open_ai_span.attributes.get("error.type") == "AuthenticationError"

0 commit comments

Comments
 (0)