From 9eb36ab4594e61e7da4d75adb9226fe4ab826a85 Mon Sep 17 00:00:00 2001
From: cotovanu-cristian <cristian.cotovanu@uipath.com>
Date: Mon, 29 Jun 2026 12:37:56 +0300
Subject: [PATCH] fix(tracing): reconstruct flattened LLM tool_calls as a list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_get_llm_messages rebuilds nested structures from flattened OpenInference
attributes, but its container-type heuristic looked two segments ahead
(parts[i+2]) to decide list-vs-dict. For the key shape
llm.output_messages.0.message.tool_calls.0.tool_call.function.name the index
after tool_calls is followed by a non-digit ("tool_call"), so tool_calls was
built as a {"0": ...} dict. The toolCalls mapping iterates the value as a
list, so every tool call was silently dropped from the exported span.

Decide each child container's type from the next segment instead: a following
digit means the child is a list (indexed by int), otherwise a dict. This
rebuilds tool_calls — and any other list-valued message field — as a list.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../src/uipath/tracing/_otel_exporters.py     | 40 ++++++++------
 .../tests/tracing/test_otel_exporters.py      | 54 +++++++++++++++++++
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/packages/uipath/src/uipath/tracing/_otel_exporters.py b/packages/uipath/src/uipath/tracing/_otel_exporters.py
index d2bf3a7c1..f55b0bda1 100644
--- a/packages/uipath/src/uipath/tracing/_otel_exporters.py
+++ b/packages/uipath/src/uipath/tracing/_otel_exporters.py
@@ -61,27 +61,35 @@ def _get_llm_messages(attributes: Dict[str, Any], prefix: str) -> List[Dict[str,
                     messages[index] = {}
                 current: Any = messages[index]
 
-                # Traverse parts except the last one
+                # Reconstruct nested containers. A digit segment indexes a list;
+                # any other segment keys a dict. Each child's container type is
+                # decided by the NEXT segment (a following digit => the child is a
+                # list), so e.g. `tool_calls.0.tool_call...` rebuilds tool_calls
+                # as a list rather than a {"0": ...} dict the consumer skips.
                 parts_len = len(parts)
-                for i in range(1, parts_len - 1):
+                for i in range(1, parts_len):
                     part = parts[i]
-                    key_part: str | int = part
-                    if part.isdigit() and (
-                        i + 2 < parts_len and parts[i + 2].isdigit()
-                    ):
-                        key_part = int(part)
-
-                    if isinstance(current, dict):
-                        if key_part not in current:
-                            current[key_part] = {}
+                    key_part: str | int = int(part) if part.isdigit() else part
+                    is_last = i == parts_len - 1
+                    child: Any = (
+                        value if is_last else ([] if parts[i + 1].isdigit() else {})
+                    )
+
+                    if isinstance(current, list) and isinstance(key_part, int):
+                        while len(current) <= key_part:
+                            current.append(None)
+                        if is_last or current[key_part] is None:
+                            current[key_part] = child
                         current = current[key_part]
-                    elif isinstance(current, list) and isinstance(key_part, int):
-                        if key_part >= len(current):
-                            current.append({})
+                    elif isinstance(current, dict):
+                        if (
+                            is_last
+                            or key_part not in current
+                            or current[key_part] is None
+                        ):
+                            current[key_part] = child
                         current = current[key_part]
 
-                current[parts[-1]] = value
-
     # Convert dict to list, ordered by index, avoid sorted() if we can use range
     if not messages:
         return []
diff --git a/packages/uipath/tests/tracing/test_otel_exporters.py b/packages/uipath/tests/tracing/test_otel_exporters.py
index fc5a370c0..21fb1f5fe 100644
--- a/packages/uipath/tests/tracing/test_otel_exporters.py
+++ b/packages/uipath/tests/tracing/test_otel_exporters.py
@@ -569,6 +569,60 @@ def test_llm_span_mapping_consistency(self):
         self.assertEqual(usage["completionTokens"], 66)
         self.assertEqual(usage["totalTokens"], 285)
 
+    def test_llm_span_tool_calls_reconstructed_as_list(self):
+        """An LLM span's flattened tool_calls must rebuild into a list.
+
+        OpenInference flattens a tool call as
+        ``llm.output_messages.0.message.tool_calls.0.tool_call.function.name``.
+        The reconstruction must turn the ``tool_calls`` index segment into a
+        list element, not a ``{"0": ...}`` dict, so the toolCalls mapping (which
+        iterates the value as a list) actually picks the call up.
+        """
+        span_data = {
+            "Id": "1f1a4d8e-2b3c-4d5e-8f90-112233445566",
+            "TraceId": "2f1a4d8e-2b3c-4d5e-8f90-112233445566",
+            "ParentId": "3f1a4d8e-2b3c-4d5e-8f90-112233445566",
+            "Name": "UiPathChat",
+            "StartTime": "2025-09-18T15:25:36.486Z",
+            "EndTime": "2025-09-18T15:25:37.720Z",
+            "Attributes": {
+                "input.value": '{"messages": []}',
+                "output.value": '{"generations": []}',
+                "llm.model_name": "gpt-4o-mini-2024-07-18",
+                "openinference.span.kind": "LLM",
+                "llm.output_messages.0.message.role": "assistant",
+                "llm.output_messages.0.message.tool_calls.0.tool_call.id": "call_abc",
+                "llm.output_messages.0.message.tool_calls.0.tool_call.function.name": "get_weather",
+                "llm.output_messages.0.message.tool_calls.0.tool_call.function.arguments": '{"city": "NYC"}',
+                "llm.output_messages.0.message.tool_calls.1.tool_call.id": "call_def",
+                "llm.output_messages.0.message.tool_calls.1.tool_call.function.name": "get_time",
+                "llm.output_messages.0.message.tool_calls.1.tool_call.function.arguments": '{"tz": "EST"}',
+            },
+            "Status": 1,
+            "SpanType": "OpenTelemetry",
+            "ReferenceId": None,
+        }
+
+        self.exporter._process_span_attributes(span_data)
+
+        attributes = span_data["Attributes"]
+        assert isinstance(attributes, dict)
+        self.assertEqual(span_data["SpanType"], "completion")
+
+        # tool_calls reconstructed as a list, not a {"0": ...}/{"1": ...} dict.
+        message = attributes["output"][0]["message"]
+        self.assertIsInstance(message["tool_calls"], list)
+        self.assertEqual(len(message["tool_calls"]), 2)
+
+        # The toolCalls mapping picks both calls up, in order.
+        self.assertEqual(
+            attributes["toolCalls"],
+            [
+                {"id": "call_abc", "name": "get_weather", "arguments": {"city": "NYC"}},
+                {"id": "call_def", "name": "get_time", "arguments": {"tz": "EST"}},
+            ],
+        )
+
     def test_unknown_span_type_preserved(self):
         """
         Test that spans with UNKNOWN or unrecognized openinference.span.kind