Merge pull request #239 from ollama/drifkin/thinking-levels

drifkin · web-flow · commit 60bfed19efa6 · 2025-08-07T15:44:32.000-07:00
add support for thinking levels
diff --git a/README.md b/README.md
@@ -66,7 +66,7 @@ ollama.chat(request)
     - `tool_name` `<string>`: (Optional) Add the name of the tool that was executed to inform the model of the result 
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
-  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
+  - `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `tools` `<Tool[]>`: (Optional) A list of tool calls the model may make.
   - `options` `<Options>`: (Optional) Options to configure the runtime.
@@ -89,7 +89,7 @@ ollama.generate(request)
   - `images` `<Uint8Array[] | string[]>`: (Optional) Images to be included, either as Uint8Array or base64 encoded strings.
   - `format` `<string>`: (Optional) Set the expected format of the response (`json`).
   - `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
-  - `think` `<boolean>`: (Optional) When true, the model will think about the response before responding. Requires thinking support from the model.
+  - `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
   - `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
   - `options` `<Options>`: (Optional) Options to configure the runtime.
 - Returns: `<GenerateResponse>`
diff --git a/examples/thinking/thinking-levels.ts b/examples/thinking/thinking-levels.ts
@@ -0,0 +1,36 @@
+import ollama from 'ollama'
+
+function printHeading(text: string) {
+  console.log(text)
+  console.log('='.repeat(text.length))
+}
+
+async function main() {
+  const messages = [{ role: 'user', content: 'What is 10 + 23?' }]
+
+  // gpt-oss supports 'low', 'medium', 'high'
+  const thinkingLevels = ['low', 'medium', 'high'] as const
+
+  for (const [index, level] of thinkingLevels.entries()) {
+    const response = await ollama.chat({
+      model: 'gpt-oss:20b',
+      messages,
+      think: level,
+    })
+
+    printHeading(`Thinking (${level})`)
+    console.log(response.message.thinking ?? '')
+    console.log('\n')
+
+    printHeading('Response')
+    console.log(response.message.content)
+    console.log('\n')
+
+    if (index < thinkingLevels.length - 1) {
+      console.log('-'.repeat(20))
+      console.log('\n')
+    }
+  }
+}
+
+main()
diff --git a/src/interfaces.ts b/src/interfaces.ts
@@ -56,7 +56,7 @@ export interface GenerateRequest {
   format?: string | object
   images?: Uint8Array[] | string[]
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
-  think?: boolean
+  think?: boolean | 'high' | 'medium' | 'low'
 
   options?: Partial<Options>
 }
@@ -109,7 +109,7 @@ export interface ChatRequest {
   format?: string | object
   keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
   tools?: Tool[]
-  think?: boolean
+  think?: boolean | 'high' | 'medium' | 'low'
 
   options?: Partial<Options>
 }