Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/four-tigers-accept.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@sap-ai-sdk/langchain": minor
---

[feat] Add `cache_control` call option to the LangChain orchestration client.
When the `cache_control` option is set (directly or via the `orchestrationPromptCachingMiddleware()` middleware), a cache breakpoint is automatically applied to the request.

6 changes: 6 additions & 0 deletions .changeset/great-dots-cheat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@sap-ai-sdk/langchain": patch
---

[feat] Expose `cached_tokens` and `cache_creation_tokens` in `usage_metadata.input_token_details` for LangChain orchestration responses.

8 changes: 8 additions & 0 deletions .changeset/short-waves-worry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@sap-ai-sdk/langchain": minor
---

[feat] Export `orchestrationPromptCachingMiddleware()` middleware for the LangChain Orchestration client from `@sap-ai-sdk/langchain/orchestration/prompt-caching-middleware`.
It enables automatic cache control for orchestration requests.
The middleware requires the optional `langchain` peer dependency to be installed.

4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ jobs:
- name: Set NODE_PATH
run: echo "NODE_PATH=$(pwd)/node_modules" >> "$GITHUB_ENV"
- name: Check public API
uses: sap/cloud-sdk-js/.github/actions/check-public-api@main
uses: sap/cloud-sdk-js/.github/actions/check-public-api@extend-generator-ts-options

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be reverted after merge of SAP/cloud-sdk-js#6694

with:
force_internal_exports: 'false'
ignored_path_pattern: '.*?/client/.*?/schema|.*?/zod/.*?'
ignored_path_pattern: '.*?/client/.*?/schema|.*?/zod/.*?|.*?/prompt-caching-middleware.*?'
- name: Check dependencies
run: pnpm check:deps
- name: License Check
Expand Down
4 changes: 1 addition & 3 deletions packages/core/src/stream/line-decoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ export class LineDecoder {
return [];
}

const trailingNewline = LineDecoder.NEWLINE_CHARS.has(
text[text.length - 1] || ''
);
const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text.at(-1) || '');
let lines = text.split(LineDecoder.NEWLINE_REGEXP);

// if there is a trailing new line then the last entry will be an empty
Expand Down
13 changes: 12 additions & 1 deletion packages/langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
},
"./orchestration/prompt-caching-middleware": {

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this as explicit export to avoid having to dynamically import langchain as this would have also required some manual type definitions.

"types": "./dist/orchestration/prompt-caching-middleware.d.ts",
"default": "./dist/orchestration/prompt-caching-middleware.js"
},
"./internal.js": {
"types": "./dist/internal.d.ts",
"default": "./dist/internal.js"
Expand Down Expand Up @@ -49,9 +53,16 @@
"devDependencies": {
"@langchain/core": "^1.1.16",
"@langchain/langgraph": "^1.4.1",
"langchain": "^1.4.4",
"zod": "^4.4.3"
},
"peerDependencies": {
"@langchain/core": "^1.1.16"
"@langchain/core": "^1.1.16",
"langchain": "^1.4.4"
},
"peerDependenciesMeta": {
"langchain": {
"optional": true
}
}
}
12 changes: 11 additions & 1 deletion packages/langchain/src/orchestration/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import {
mapLangChainMessagesToOrchestrationMessages,
mapOutputToChatResult,
mapToolToChatCompletionTool,
mapOrchestrationChunkToLangChainMessageChunk
mapOrchestrationChunkToLangChainMessageChunk,
applyCacheControlToLastMessage
} from './util.js';
import type { NewTokenIndices } from '@langchain/core/callbacks/base';
import type {
Expand Down Expand Up @@ -140,6 +141,9 @@ export class OrchestrationClient extends BaseChatModel<

const { placeholderValues, customRequestConfig } = options;
const allMessages = mapLangChainMessagesToOrchestrationMessages(messages);
if (options.cache_control) {
applyCacheControlToLastMessage(allMessages, options.cache_control);
}
const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);

const res = await this.caller.callWithOptions(
Expand Down Expand Up @@ -352,6 +356,12 @@ export class OrchestrationClient extends BaseChatModel<
options.signal?.throwIfAborted();
const orchestrationMessages =
mapLangChainMessagesToOrchestrationMessages(messages);
if (options.cache_control) {
applyCacheControlToLastMessage(
orchestrationMessages,
options.cache_control
);
}

const { placeholderValues, customRequestConfig } = options;
const mergedOrchestrationConfig = this.mergeOrchestrationConfigs(options);
Expand Down
196 changes: 196 additions & 0 deletions packages/langchain/src/orchestration/prompt-caching-middleware.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import { jest } from '@jest/globals';
import { AIMessage, HumanMessage } from '@langchain/core/messages';
import { createAgent } from 'langchain';
import { AzureOpenAiChatClient } from '../openai/chat.js';
import { OrchestrationClient } from './client.js';
import { orchestrationPromptCachingMiddleware } from './prompt-caching-middleware.js';
import type { LanguageModelLike } from '@langchain/core/language_models/base';

function getBindToolsOptions(
model: LanguageModelLike
): Record<string, unknown> | undefined {
const bindToolsMock = (model as any).bindTools as jest.Mock;
return bindToolsMock.mock.calls.at(-1)?.[1] as
| Record<string, unknown>
| undefined;
}

function stubModel<T extends LanguageModelLike>(model: T): T {
const bindToolsMock = jest.fn().mockReturnValue(model);
const invokeMock = jest
.fn()
.mockResolvedValue(new AIMessage('Response from model') as never);

Object.assign(model as object, {
bindTools: bindToolsMock,
invoke: invokeMock
});

return model;
}

function createSupportedModel(): LanguageModelLike {
return stubModel(
new OrchestrationClient({
promptTemplating: {
model: {
name: 'gpt-5.4-nano',
params: {}
}
}
})
);
}

function createUnsupportedModel(): LanguageModelLike {
return stubModel(new AzureOpenAiChatClient({ modelName: 'gpt-5.4-nano' }));
}

describe('orchestrationPromptCachingMiddleware', () => {
beforeEach(() => {
jest.clearAllMocks();
});

it('adds cache_control to modelSettings when conditions are met', async () => {
const model = createSupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
ttl: '5m',
minMessagesToCache: 3
});

const agent = createAgent({ model, middleware: [middleware] });

await agent.invoke({
messages: [
new HumanMessage('Hello'),
new AIMessage('Hi there!'),
new HumanMessage('How are you?')
]
});

expect(model.bindTools).toHaveBeenCalled();
expect(getBindToolsOptions(model)).toHaveProperty('cache_control');
expect(getBindToolsOptions(model)?.cache_control).toEqual({
type: 'ephemeral',
ttl: '5m'
});
});

it('does not add cache_control when message count is below threshold', async () => {
const model = createSupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
ttl: '1h',
minMessagesToCache: 5
});

const agent = createAgent({ model, middleware: [middleware] });

await agent.invoke({
messages: [new HumanMessage('Hello'), new AIMessage('Hi there!')]
});

expect(model.bindTools).toHaveBeenCalled();
expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
});

it('skips cache_control when enableCaching is false', async () => {
const model = createSupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
enableCaching: false,
minMessagesToCache: 1
});

const agent = createAgent({ model, middleware: [middleware] });

await agent.invoke({
messages: [
new HumanMessage('Hello'),
new AIMessage('Hi there!'),
new HumanMessage('How are you?')
]
});

expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
});

it('includes the system message in the threshold count', async () => {
const model = createSupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
ttl: '1h',
minMessagesToCache: 3
});

const agent = createAgent({
model,
systemPrompt: 'You are a helpful assistant',
middleware: [middleware]
});

// Only 2 messages, but system prompt pushes the total to 3.
await agent.invoke({
messages: [new HumanMessage('Hello'), new AIMessage('Hi there!')]
});

expect(getBindToolsOptions(model)).toHaveProperty('cache_control');
expect(getBindToolsOptions(model)?.cache_control).toEqual({
type: 'ephemeral',
ttl: '1h'
});
});

describe('non-Orchestration models', () => {
it('warns and skips caching for non-OrchestrationClient models by default', async () => {
const model = createUnsupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
minMessagesToCache: 1
});

const agent = createAgent({ model, middleware: [middleware] });

await expect(
agent.invoke({ messages: [new HumanMessage('Hello')] })
).resolves.toBeDefined();

expect(getBindToolsOptions(model)?.cache_control).toBeUndefined();
});

it('throws when unsupportedModelBehavior is raise', async () => {
const model = createUnsupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
unsupportedModelBehavior: 'raise',
minMessagesToCache: 1
});

const agent = createAgent({ model, middleware: [middleware] });

await expect(
agent.invoke({ messages: [new HumanMessage('Hello')] })
).rejects.toThrow(
"Unsupported model 'AzureOpenAiChatClient'. orchestrationPromptCachingMiddleware requires an OrchestrationClient"
);
});

it('prefers runtime context unsupportedModelBehavior over middleware options', async () => {
const model = createUnsupportedModel();
const middleware = orchestrationPromptCachingMiddleware({
unsupportedModelBehavior: 'warn',
minMessagesToCache: 1
});

const agent = createAgent({ model, middleware: [middleware] });

await expect(
agent.invoke(
{ messages: [new HumanMessage('Hello')] },
{
context: {
unsupportedModelBehavior: 'raise'
}
}
)
).rejects.toThrow(
"Unsupported model 'AzureOpenAiChatClient'. orchestrationPromptCachingMiddleware requires an OrchestrationClient"
);
});
});
});
Loading
Loading