scaleapi · stainless-app · May 4, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.10.4"
+  ".": "0.11.0"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 45
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sgp/agentex-sdk-c108a179582f0e0c6d479ea4b3bc6310a83693987073967c2b6203df23718eb2.yml
-openapi_spec_hash: 53b8e5866709af71bef94816b8ede38b
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/sgp/agentex-sdk-307ea66bdd28f83ddc0c526365cfe06f4c1bb2fd421d19f6ebb7f687d06f9ee6.yml
+openapi_spec_hash: 5bbd18a405a11e8497d38a5a88b98018
 config_hash: fb079ef7936611b032568661b8165f19
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 0.11.0 (2026-05-05)
+
+Full Changelog: [v0.10.4...v0.11.0](https://github.com/scaleapi/scale-agentex-python/compare/v0.10.4...v0.11.0)
+
+### Features
+
+* **openai_agents:** expose real `usage`, `response_id`, plumb `previous_response_id`, opt-in `prompt_cache_key` for stateful responses and prompt caching ([#335](https://github.com/scaleapi/scale-agentex-python/issues/335)) ([ba5d64b](https://github.com/scaleapi/scale-agentex-python/commit/ba5d64be1f959ff1a35b30e647a0a5ead21a8402))
+
+
+### Chores
+
+* **internal:** reformat pyproject.toml ([76e0299](https://github.com/scaleapi/scale-agentex-python/commit/76e0299a84d283bbaa1b51c1d9c19f507c4858ba))
+* **internal:** version bump ([0d318ad](https://github.com/scaleapi/scale-agentex-python/commit/0d318adfcbe6a8b09bef2a81ff70fe5a59acef46))
+
 ## 0.10.4 (2026-05-04)
 
 Full Changelog: [v0.10.3...v0.10.4](https://github.com/scaleapi/scale-agentex-python/compare/v0.10.3...v0.10.4)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "agentex-sdk"
-version = "0.10.4"
+version = "0.11.0"
 description = "The official Python library for the agentex API"
 dynamic = ["readme"]
 license = "Apache-2.0"

diff --git a/src/agentex/_version.py b/src/agentex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "agentex"
-__version__ = "0.10.4"  # x-release-please-version
+__version__ = "0.11.0"  # x-release-please-version
diff --git a/src/agentex/lib/core/tracing/processors/sgp_tracing_processor.py b/src/agentex/lib/core/tracing/processors/sgp_tracing_processor.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import override
 
 import scale_gp_beta.lib.tracing as tracing
@@ -125,48 +127,64 @@ def _add_source_to_span(self, span: Span) -> None:
 
     @override
     async def on_span_start(self, span: Span) -> None:
-        self._add_source_to_span(span)
-        sgp_span = create_span(
-            name=span.name,
-            span_type=_get_span_type(span),
-            span_id=span.id,
-            parent_id=span.parent_id,
-            trace_id=span.trace_id,
-            input=span.input,
-            output=span.output,
-            metadata=span.data,
-        )
-        sgp_span.start_time = span.start_time.isoformat()  # type: ignore[union-attr]
+        await self.on_spans_start([span])
+
+    @override
+    async def on_span_end(self, span: Span) -> None:
+        await self.on_spans_end([span])
+
+    @override
+    async def on_spans_start(self, spans: list[Span]) -> None:
+        if not spans:
+            return
+
+        sgp_spans: list[SGPSpan] = []
+        for span in spans:
+            self._add_source_to_span(span)
+            sgp_span = create_span(
+                name=span.name,
+                span_type=_get_span_type(span),
+                span_id=span.id,
+                parent_id=span.parent_id,
+                trace_id=span.trace_id,
+                input=span.input,
+                output=span.output,
+                metadata=span.data,
+            )
+            sgp_span.start_time = span.start_time.isoformat()  # type: ignore[union-attr]
+            self._spans[span.id] = sgp_span
+            sgp_spans.append(sgp_span)
 
         if self.disabled:
             logger.warning("SGP is disabled, skipping span upsert")
             return
-        # TODO(AGX1-198): Batch multiple spans into a single upsert_batch call
-        # instead of one span per HTTP request.
-        # https://linear.app/scale-epd/issue/AGX1-198/actually-use-sgp-batching-for-spans
         await self.sgp_async_client.spans.upsert_batch(  # type: ignore[union-attr]
-            items=[sgp_span.to_request_params()]
+            items=[s.to_request_params() for s in sgp_spans]
         )
 
-        self._spans[span.id] = sgp_span
-
     @override
-    async def on_span_end(self, span: Span) -> None:
-        sgp_span = self._spans.pop(span.id, None)
-        if sgp_span is None:
-            logger.warning(f"Span {span.id} not found in stored spans, skipping span end")
+    async def on_spans_end(self, spans: list[Span]) -> None:
+        if not spans:
             return
 
-        self._add_source_to_span(span)
-        sgp_span.input = span.input  # type: ignore[assignment]
-        sgp_span.output = span.output  # type: ignore[assignment]
-        sgp_span.metadata = span.data  # type: ignore[assignment]
-        sgp_span.end_time = span.end_time.isoformat()  # type: ignore[union-attr]
-
-        if self.disabled:
+        to_upsert: list[SGPSpan] = []
+        for span in spans:
+            sgp_span = self._spans.pop(span.id, None)
+            if sgp_span is None:
+                logger.warning(f"Span {span.id} not found in stored spans, skipping span end")
+                continue
+
+            self._add_source_to_span(span)
+            sgp_span.input = span.input  # type: ignore[assignment]
+            sgp_span.output = span.output  # type: ignore[assignment]
+            sgp_span.metadata = span.data  # type: ignore[assignment]
+            sgp_span.end_time = span.end_time.isoformat()  # type: ignore[union-attr]
+            to_upsert.append(sgp_span)
+
+        if self.disabled or not to_upsert:
             return
         await self.sgp_async_client.spans.upsert_batch(  # type: ignore[union-attr]
-            items=[sgp_span.to_request_params()]
+            items=[s.to_request_params() for s in to_upsert]
         )
 
     @override

diff --git a/src/agentex/lib/core/tracing/processors/tracing_processor_interface.py b/src/agentex/lib/core/tracing/processors/tracing_processor_interface.py
@@ -1,7 +1,13 @@
+from __future__ import annotations
+
+import asyncio
 from abc import ABC, abstractmethod
 
 from agentex.types.span import Span
 from agentex.lib.types.tracing import TracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+
+logger = make_logger(__name__)
 
 
 class SyncTracingProcessor(ABC):
@@ -35,6 +41,43 @@ async def on_span_start(self, span: Span) -> None:
     async def on_span_end(self, span: Span) -> None:
         pass
 
+    async def on_spans_start(self, spans: list[Span]) -> None:
+        """Batched variant of on_span_start.
+
+        Default fallback fans out to the single-span method in parallel so
+        existing processors keep working unchanged.  Processors that support
+        real batching (e.g. sending all spans in one HTTP call) should
+        override this to avoid the per-span round trip.
+
+        Per-span exceptions are captured and logged individually so that one
+        failing span does not prevent the others from being processed.
+        """
+        results = await asyncio.gather(
+            *(self.on_span_start(s) for s in spans), return_exceptions=True
+        )
+        for span, result in zip(spans, results):
+            if isinstance(result, Exception):
+                logger.error(
+                    "Tracing processor %s failed on_span_start for span %s",
+                    type(self).__name__,
+                    span.id,
+                    exc_info=result,
+                )
+
+    async def on_spans_end(self, spans: list[Span]) -> None:
+        """Batched variant of on_span_end.  See on_spans_start for details."""
+        results = await asyncio.gather(
+            *(self.on_span_end(s) for s in spans), return_exceptions=True
+        )
+        for span, result in zip(spans, results):
+            if isinstance(result, Exception):
+                logger.error(
+                    "Tracing processor %s failed on_span_end for span %s",
+                    type(self).__name__,
+                    span.id,
+                    exc_info=result,
+                )
+
     @abstractmethod
     async def shutdown(self) -> None:
         pass
diff --git a/src/agentex/lib/core/tracing/span_queue.py b/src/agentex/lib/core/tracing/span_queue.py
@@ -95,29 +95,40 @@ async def _drain_loop(self) -> None:
 
     @staticmethod
     async def _process_items(items: list[_SpanQueueItem]) -> None:
-        """Process a list of span events concurrently."""
+        """Dispatch a batch of same-event-type items to each processor in one call.
 
-        async def _handle(item: _SpanQueueItem) -> None:
+        Groups spans by processor so each processor sees its full slice of the
+        drain batch at once.  Processors that override the batched methods can
+        then send a single HTTP request per drain cycle instead of N.
+        """
+        if not items:
+            return
+
+        event_type = items[0].event_type
+        assert all(i.event_type == event_type for i in items), (
+            "_process_items requires all items to share the same event_type; "
+            "callers must split START and END batches before dispatching."
+        )
+        by_processor: dict[AsyncTracingProcessor, list[Span]] = {}
+        for item in items:
+            for p in item.processors:
+                by_processor.setdefault(p, []).append(item.span)
+
+        async def _handle(p: AsyncTracingProcessor, spans: list[Span]) -> None:
             try:
-                if item.event_type == SpanEventType.START:
-                    coros = [p.on_span_start(item.span) for p in item.processors]
+                if event_type == SpanEventType.START:
+                    await p.on_spans_start(spans)
                 else:
-                    coros = [p.on_span_end(item.span) for p in item.processors]
-                results = await asyncio.gather(*coros, return_exceptions=True)
-                for result in results:
-                    if isinstance(result, Exception):
-                        logger.error(
-                            "Tracing processor error during %s for span %s",
-                            item.event_type.value,
-                            item.span.id,
-                            exc_info=result,
-                        )
+                    await p.on_spans_end(spans)
             except Exception:
                 logger.exception(
-                    "Unexpected error in span queue for span %s", item.span.id
+                    "Tracing processor %s failed handling %d spans during %s",
+                    type(p).__name__,
+                    len(spans),
+                    event_type.value,
                 )
 
-        await asyncio.gather(*[_handle(item) for item in items])
+        await asyncio.gather(*[_handle(p, spans) for p, spans in by_processor.items()])
 
     # ------------------------------------------------------------------
     # Shutdown

diff --git a/tests/lib/core/tracing/processors/test_sgp_tracing_processor.py b/tests/lib/core/tracing/processors/test_sgp_tracing_processor.py
@@ -188,3 +188,42 @@ async def test_sgp_span_input_updated_on_end(self):
         assert len(processor._spans) == 0
         # The end upsert should have been called
         assert processor.sgp_async_client.spans.upsert_batch.call_count == 2  # start + end
+
+    async def test_on_spans_start_sends_single_upsert_for_batch(self):
+        """Given N spans at once, on_spans_start should make ONE upsert_batch HTTP call."""
+        processor, _ = self._make_processor()
+
+        n = 10
+        spans = [_make_span() for _ in range(n)]
+        with patch(f"{MODULE}.create_span", side_effect=lambda **kw: _make_mock_sgp_span()):
+            await processor.on_spans_start(spans)
+
+        assert processor.sgp_async_client.spans.upsert_batch.call_count == 1, (
+            "Batched on_spans_start must make exactly one upsert_batch HTTP call"
+        )
+        items = processor.sgp_async_client.spans.upsert_batch.call_args.kwargs["items"]
+        assert len(items) == n
+        # All spans should be tracked for the subsequent end call
+        assert len(processor._spans) == n
+
+    async def test_on_spans_end_sends_single_upsert_for_batch(self):
+        """Given N spans at once, on_spans_end should make ONE upsert_batch HTTP call."""
+        processor, _ = self._make_processor()
+
+        n = 10
+        spans = [_make_span() for _ in range(n)]
+        with patch(f"{MODULE}.create_span", side_effect=lambda **kw: _make_mock_sgp_span()):
+            await processor.on_spans_start(spans)
+
+        processor.sgp_async_client.spans.upsert_batch.reset_mock()
+
+        for span in spans:
+            span.end_time = datetime.now(UTC)
+        await processor.on_spans_end(spans)
+
+        assert processor.sgp_async_client.spans.upsert_batch.call_count == 1, (
+            "Batched on_spans_end must make exactly one upsert_batch HTTP call"
+        )
+        items = processor.sgp_async_client.spans.upsert_batch.call_args.kwargs["items"]
+        assert len(items) == n
+        assert len(processor._spans) == 0