Merge pull request #223 from ollama/drifkin/thinking-support

drifkin · web-flow · commit c9793cc64459 · 2025-05-29T19:39:14.000-07:00
add thinking support and examples
diff --git a/README.md b/README.md
@@ -59,6 +59,7 @@ ollama.chat(request)
     - `images` `<Uint8Array[] | string[]>`: (Optional) Images to be included in the message, either as Uint8Array or base64 encoded strings.
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
+  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `tools` `<Tool[]>`: (Optional) A list of tool calls the model may make.
   - `options` `<Options>`: (Optional) Options to configure the runtime.
@@ -81,6 +82,7 @@ ollama.generate(request)
   - `images` `<Uint8Array[] | string[]>`: (Optional) Images to be included, either as Uint8Array or base64 encoded strings.
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
+  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `options` `<Options>`: (Optional) Options to configure the runtime.
 - Returns: `<GenerateResponse>`
diff --git a/examples/README.md b/examples/README.md
@@ -1,7 +1,7 @@
 ## Examples
 
 > [!IMPORTANT]
-> Note: Ensure that `npm build` has been run before running the examples.
+> Note: Ensure that `npm run build` has been run before running the examples.
 
 To run the examples run:
 
diff --git a/examples/thinking/thinking-enabled.ts b/examples/thinking/thinking-enabled.ts
@@ -0,0 +1,20 @@
+import ollama from 'ollama'
+
+async function main() {
+  const response = await ollama.chat({
+    model: 'deepseek-r1',
+    messages: [
+      {
+        role: 'user',
+        content: 'What is 10 + 23',
+      },
+    ],
+    stream: false,
+    think: true,
+  })
+
+  console.log('Thinking:\n========\n\n' + response.message.thinking)
+  console.log('\nResponse:\n========\n\n' + response.message.content + '\n\n')
+}
+
+main()
diff --git a/examples/thinking/thinking-streaming.ts b/examples/thinking/thinking-streaming.ts
@@ -0,0 +1,36 @@
+import ollama from 'ollama'
+
+async function main() {
+  const response = await ollama.chat({
+    model: 'deepseek-r1',
+    messages: [
+      {
+        role: 'user',
+        content: 'What is 10 + 23',
+      },
+    ],
+    stream: true,
+    think: true,
+  })
+
+  let startedThinking = false
+  let finishedThinking = false
+
+  for await (const chunk of response) {
+    if (chunk.message.thinking && !startedThinking) {
+      startedThinking = true
+      process.stdout.write('Thinking:\n========\n\n')
+    } else if (chunk.message.content && startedThinking && !finishedThinking) {
+      finishedThinking = true
+      process.stdout.write('\n\nResponse:\n========\n\n')
+    }
+
+    if (chunk.message.thinking) {
+      process.stdout.write(chunk.message.thinking)
+    } else if (chunk.message.content) {
+      process.stdout.write(chunk.message.content)
+    }
+  }
+}
+
+main()
diff --git a/src/interfaces.ts b/src/interfaces.ts
@@ -56,13 +56,15 @@ export interface GenerateRequest {
   format?: string | object
   images?: Uint8Array[] | string[]
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
+  think?: boolean
 
   options?: Partial<Options>
 }
 
 export interface Message {
   role: string
   content: string
+  thinking?: string
   images?: Uint8Array[] | string[]
   tool_calls?: ToolCall[]
 }
@@ -106,6 +108,7 @@ export interface ChatRequest {
   format?: string | object
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
   tools?: Tool[]
+  think?: boolean
 
   options?: Partial<Options>
 }
@@ -174,6 +177,7 @@ export interface GenerateResponse {
   model: string
   created_at: Date
   response: string
+  thinking?: string
   done: boolean
   done_reason: string
   context: number[]