SAP · davidkna-sap · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/.changeset/four-tigers-accept.md b/.changeset/four-tigers-accept.md
@@ -0,0 +1,7 @@
+---
+"@sap-ai-sdk/langchain": minor
+---
+
+[feat] Add `cache_control` call option to the LangChain orchestration client.
+When the `cache_control` option is set (directly or via the `orchestrationPromptCachingMiddleware()` middleware), a cache breakpoint is automatically applied to the request.
+
diff --git a/.changeset/great-dots-cheat.md b/.changeset/great-dots-cheat.md
@@ -0,0 +1,6 @@
+---
+"@sap-ai-sdk/langchain": patch
+---
+
+[feat] Expose `cached_tokens` and `cache_creation_tokens` in `usage_metadata.input_token_details` for LangChain orchestration responses.
+
diff --git a/.changeset/short-waves-worry.md b/.changeset/short-waves-worry.md
@@ -0,0 +1,8 @@
+---
+"@sap-ai-sdk/langchain": minor
+---
+
+[feat] Export `orchestrationPromptCachingMiddleware()` middleware for the LangChain Orchestration client from `@sap-ai-sdk/langchain/orchestration/prompt-caching-middleware`.
+It enables automatic cache control for orchestration requests.
+The middleware requires the optional `langchain` peer dependency to be installed.
+
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -64,10 +64,10 @@ jobs:
       - name: Set NODE_PATH
         run: echo "NODE_PATH=$(pwd)/node_modules" >> "$GITHUB_ENV"
       - name: Check public API
-        uses: sap/cloud-sdk-js/.github/actions/check-public-api@main
+        uses: sap/cloud-sdk-js/.github/actions/check-public-api@extend-generator-ts-options
         with:
           force_internal_exports: 'false'
-          ignored_path_pattern: '.*?/client/.*?/schema|.*?/zod/.*?'
+          ignored_path_pattern: '.*?/client/.*?/schema|.*?/zod/.*?|.*?/prompt-caching-middleware.*?'
       - name: Check dependencies
         run: pnpm check:deps
       - name: License Check

diff --git a/packages/core/src/stream/line-decoder.ts b/packages/core/src/stream/line-decoder.ts
@@ -36,9 +36,7 @@ export class LineDecoder {
       return [];
     }
 
-    const trailingNewline = LineDecoder.NEWLINE_CHARS.has(
-      text[text.length - 1] || ''
-    );
+    const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text.at(-1) || '');
     let lines = text.split(LineDecoder.NEWLINE_REGEXP);
 
     // if there is a trailing new line then the last entry will be an empty

diff --git a/packages/langchain/package.json b/packages/langchain/package.json
@@ -20,6 +20,10 @@
       "types": "./dist/index.d.ts",
       "default": "./dist/index.js"
     },
+    "./orchestration/prompt-caching-middleware": {
+      "types": "./dist/orchestration/prompt-caching-middleware.d.ts",
+      "default": "./dist/orchestration/prompt-caching-middleware.js"
+    },
     "./internal.js": {
       "types": "./dist/internal.d.ts",
       "default": "./dist/internal.js"
@@ -49,9 +53,16 @@
   "devDependencies": {
     "@langchain/core": "^1.1.16",
     "@langchain/langgraph": "^1.4.1",
+    "langchain": "^1.4.4",
     "zod": "^4.4.3"
   },
   "peerDependencies": {
-    "@langchain/core": "^1.1.16"
+    "@langchain/core": "^1.1.16",
+    "langchain": "^1.4.4"
+  },
+  "peerDependenciesMeta": {
+    "langchain": {
+      "optional": true
+    }
   }
 }
diff --git a/packages/langchain/src/orchestration/client.ts b/packages/langchain/src/orchestration/client.ts
@@ -23,7 +23,8 @@ import {
   mapLangChainMessagesToOrchestrationMessages,
   mapOutputToChatResult,
   mapToolToChatCompletionTool,
-  mapOrchestrationChunkToLangChainMessageChunk
+  mapOrchestrationChunkToLangChainMessageChunk,
+  applyCacheControlToLastMessage
 } from './util.js';
 import type { NewTokenIndices } from '@langchain/core/callbacks/base';
 import type {
@@ -140,6 +141,9 @@ export class OrchestrationClient extends BaseChatModel<
 
     const { placeholderValues, customRequestConfig } = options;
     const allMessages = mapLangChainMessagesToOrchestrationMessages(messages);
+    if (options.cache_control) {
+      applyCacheControlToLastMessage(allMessages, options.cache_control);
+    }
     const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);
 
     const res = await this.caller.callWithOptions(
@@ -352,6 +356,12 @@ export class OrchestrationClient extends BaseChatModel<
     options.signal?.throwIfAborted();
     const orchestrationMessages =
       mapLangChainMessagesToOrchestrationMessages(messages);
+    if (options.cache_control) {
+      applyCacheControlToLastMessage(
+        orchestrationMessages,
+        options.cache_control
+      );
+    }
 
     const { placeholderValues, customRequestConfig } = options;
     const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);

diff --git a/packages/langchain/src/orchestration/prompt-caching-middleware.test.ts b/packages/langchain/src/orchestration/prompt-caching-middleware.test.ts
@@ -0,0 +1,196 @@
+import { jest } from '@jest/globals';
+import { AIMessage, HumanMessage } from '@langchain/core/messages';
+import { createAgent } from 'langchain';
+import { AzureOpenAiChatClient } from '../openai/chat.js';
+import { OrchestrationClient } from './client.js';
+import { orchestrationPromptCachingMiddleware } from './prompt-caching-middleware.js';
+import type { LanguageModelLike } from '@langchain/core/language_models/base';
+
+function getBindToolsOptions(
+  model: LanguageModelLike
+): Record<string, unknown> | undefined {
+  const bindToolsMock = (model as any).bindTools as jest.Mock;
+  return bindToolsMock.mock.calls.at(-1)?.[1] as
+    | Record<string, unknown>
+    | undefined;
+}
+
+function stubModel<T extends LanguageModelLike>(model: T): T {
+  const bindToolsMock = jest.fn().mockReturnValue(model);
+  const invokeMock = jest
+    .fn()
+    .mockResolvedValue(new AIMessage('Response from model') as never);
+
+  Object.assign(model as object, {
+    bindTools: bindToolsMock,
+    invoke: invokeMock
+  });
+
+  return model;
+}
+
+function createSupportedModel(): LanguageModelLike {
+  return stubModel(
+    new OrchestrationClient({
+      promptTemplating: {
+        model: {
+          name: 'gpt-5.4-nano',
+          params: {}
+        }
+      }
+    })
+  );
+}
+
+function createUnsupportedModel(): LanguageModelLike {
+  return stubModel(new AzureOpenAiChatClient({ modelName: 'gpt-5.4-nano' }));
+}
+
+describe('orchestrationPromptCachingMiddleware', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('adds cache_control to modelSettings when conditions are met', async () => {
+    const model = createSupportedModel();
+    const middleware = orchestrationPromptCachingMiddleware({
+      ttl: '5m',
+      minMessagesToCache: 3
+    });
+
+    const agent = createAgent({ model, middleware: [middleware] });
+
+    await agent.invoke({
+      messages: [
+        new HumanMessage('Hello'),
+        new AIMessage('Hi there!'),
+        new HumanMessage('How are you?')
+      ]
+    });
+
+    expect(model.bindTools).toHaveBeenCalled();
+    expect(getBindToolsOptions(model)).toHaveProperty('cache_control');
+    expect(getBindToolsOptions(model)?.cache_control).toEqual({
+      type: 'ephemeral',
+      ttl: '5m'
+    });
+  });
+
+  it('does not add cache_control when message count is below threshold', async () => {
+    const model = createSupportedModel();
+    const middleware = orchestrationPromptCachingMiddleware({
+      ttl: '1h',
+      minMessagesToCache: 5
+    });
+
+    const agent = createAgent({ model, middleware: [middleware] });
+
+    await agent.invoke({
+      messages: [new HumanMessage('Hello'), new AIMessage('Hi there!')]
+    });
+
+    expect(model.bindTools).toHaveBeenCalled();
+    expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
+  });
+
+  it('skips cache_control when enableCaching is false', async () => {
+    const model = createSupportedModel();
+    const middleware = orchestrationPromptCachingMiddleware({
+      enableCaching: false,
+      minMessagesToCache: 1
+    });
+
+    const agent = createAgent({ model, middleware: [middleware] });
+
+    await agent.invoke({
+      messages: [
+        new HumanMessage('Hello'),
+        new AIMessage('Hi there!'),
+        new HumanMessage('How are you?')
+      ]
+    });
+
+    expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
+  });
+
+  it('includes the system message in the threshold count', async () => {
+    const model = createSupportedModel();
+    const middleware = orchestrationPromptCachingMiddleware({
+      ttl: '1h',
+      minMessagesToCache: 3
+    });
+
+    const agent = createAgent({
+      model,
+      systemPrompt: 'You are a helpful assistant',
+      middleware: [middleware]
+    });
+
+    // Only 2 messages, but system prompt pushes the total to 3.
+    await agent.invoke({
+      messages: [new HumanMessage('Hello'), new AIMessage('Hi there!')]
+    });
+
+    expect(getBindToolsOptions(model)).toHaveProperty('cache_control');
+    expect(getBindToolsOptions(model)?.cache_control).toEqual({
+      type: 'ephemeral',
+      ttl: '1h'
+    });
+  });
+
+  describe('non-Orchestration models', () => {
+    it('warns and skips caching for non-OrchestrationClient models by default', async () => {
+      const model = createUnsupportedModel();
+      const middleware = orchestrationPromptCachingMiddleware({
+        minMessagesToCache: 1
+      });
+
+      const agent = createAgent({ model, middleware: [middleware] });
+
+      await expect(
+        agent.invoke({ messages: [new HumanMessage('Hello')] })
+      ).resolves.toBeDefined();
+
+      expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
+    });
+
+    it('throws when unsupportedModelBehavior is raise', async () => {
+      const model = createUnsupportedModel();
+      const middleware = orchestrationPromptCachingMiddleware({
+        unsupportedModelBehavior: 'raise',
+        minMessagesToCache: 1
+      });
+
+      const agent = createAgent({ model, middleware: [middleware] });
+
+      await expect(
+        agent.invoke({ messages: [new HumanMessage('Hello')] })
+      ).rejects.toThrow(
+        "Unsupported model 'AzureOpenAiChatClient'. orchestrationPromptCachingMiddleware requires an OrchestrationClient"
+      );
+    });
+
+    it('prefers runtime context unsupportedModelBehavior over middleware options', async () => {
+      const model = createUnsupportedModel();
+      const middleware = orchestrationPromptCachingMiddleware({
+        unsupportedModelBehavior: 'warn',
+        minMessagesToCache: 1
+      });
+
+      const agent = createAgent({ model, middleware: [middleware] });
+
+      await expect(
+        agent.invoke(
+          { messages: [new HumanMessage('Hello')] },
+          {
+            context: {
+              unsupportedModelBehavior: 'raise'
+            }
+          }
+        )
+      ).rejects.toThrow(
+        "Unsupported model 'AzureOpenAiChatClient'. orchestrationPromptCachingMiddleware requires an OrchestrationClient"
+      );
+    });
+  });
+});