Create AsyncOpenAI model

RobinPicard · rlouf · commit b3fba1369736 · 2025-08-11T23:20:33.000+02:00
diff --git a/docs/features/models/index.md b/docs/features/models/index.md
@@ -93,7 +93,7 @@ In alphabetical order:
 | Regex | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
 | Grammar | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | 🟠 | ❌ | ✅ | ✅ | ✅ | ✅ |
 | **Generation Features** | | | | | | | | | | | | | |
-| Async | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
+| Async | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
 | Streaming | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
 | Vision | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 | Batching | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ |
@@ -142,6 +142,7 @@ print(type(model)) # outlines.models.tgi.AsyncTGI
 The models that have an async version are the following:
 
 - Ollama
+- OpenAI
 - SgLang
 - TGI
 - VLLM
diff --git a/docs/features/models/openai.md b/docs/features/models/openai.md
@@ -10,23 +10,32 @@
 
 To create an OpenAI model instance, you can use the `from_openai` function. It takes 2 arguments:
 
-- `client`: an `openai.OpenAI` or `openai.AzureOpenAI` instance
+- `client`: an `openai.OpenAI`, `openai.AzureOpenAI`, `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance
 - `model_name`: the name of the model you want to use
 
+Based on whether the inference client instance is synchronous or asynchronous, you will receive an `OpenAI` or an `AsyncOpenAI` model instance.
+
 For instance:
 
 ```python
 import outlines
 import openai
 
-# Create the client
+# Create the client or async client
 client = openai.OpenAI()
+async_client = openai.AsyncOpenAI()
 
-# Create the model
+# Create a sync model
 model = outlines.from_openai(
     client,
     "gpt-4o"
 )
+
+# Create aa async model
+model = outlines.from_openai(
+    async_client,
+    "gpt-4o"
+)
 ```
 
 Check the [OpenAI documentation](https://platform.openai.com/docs/models) for an up-to-date list of available models. As shown above, you can use Azure OpenAI in Outlines the same way you would use OpenAI, just provide an `openai.AzureOpenAI` instance to the Outlines model class.
@@ -190,6 +199,47 @@ result = model("Create a character, use the json format.", dict, temperature=0.5
 print(result) # '{"first_name": "Henri", "last_name": "Smith", "height": "170"}'
 ```
 
+## Asynchronous Calls
+
+All features presented above for the sync model are also available for the async model.
+
+For instance:
+
+```python
+import asyncio
+import openai
+import outlines
+from pydantic import BaseModel
+from typing import List
+
+class Character(BaseModel):
+    name: str
+    age: int
+    skills: List[str]
+
+# Create the model
+model = outlines.from_openai(
+    openai.AsyncOpenAI(),
+    "gpt-4o"
+)
+
+async def text_generation():
+    # Regular generation
+    response = await model("What's the capital of Latvia?", max_tokens=20)
+    print(response) # 'Riga'
+
+    # Streaming
+    async for chunk in  model.stream("Tell me a short story about a cat.", max_tokens=50):
+        print(chunk, end="") # 'Once...'
+
+    # Structured generation
+    result = await model("Create a character, use the json format.", Character, top_p=0.1)
+    print(result) # '{"name": "Evelyn", "age": 34, "skills": ["archery", "stealth", "alchemy"]}'
+    print(Character.model_validate_json(result)) # name=Evelyn, age=34, skills=['archery', 'stealth', 'alchemy']
+
+asyncio.run(text_generation())
+```
+
 ## Inference arguments
 
 When calling the model, you can provide keyword arguments that will be passed down to the `chat.completions.create` method of the OpenAI client. Some of the most common arguments include `max_tokens`, `temperature`, `stop` and `top_p`.
diff --git a/outlines/models/__init__.py b/outlines/models/__init__.py
@@ -15,7 +15,7 @@
 from .llamacpp import LlamaCpp, from_llamacpp
 from .mlxlm import MLXLM, from_mlxlm
 from .ollama import AsyncOllama, Ollama, from_ollama
-from .openai import OpenAI, from_openai
+from .openai import AsyncOpenAI, OpenAI, from_openai
 from .sglang import AsyncSGLang, SGLang, from_sglang
 from .tgi import AsyncTGI, TGI, from_tgi
 from .transformers import (
@@ -41,6 +41,7 @@
 ]
 AsyncBlackBoxModel = Union[
     AsyncOllama,
+    AsyncOpenAI,
     AsyncTGI,
     AsyncSGLang,
     AsyncVLLM,
diff --git a/outlines/models/openai.py b/outlines/models/openai.py
@@ -4,6 +4,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    AsyncIterator,
     Iterator,
     Optional,
     Union,
@@ -13,7 +14,7 @@
 from pydantic import BaseModel, TypeAdapter
 
 from outlines.inputs import Chat, Image
-from outlines.models.base import Model, ModelTypeAdapter
+from outlines.models.base import AsyncModel, Model, ModelTypeAdapter
 from outlines.models.utils import set_additional_properties_false_json_schema
 from outlines.types import JsonSchema, Regex, CFG
 from outlines.types.utils import (
@@ -25,9 +26,14 @@
 )
 
 if TYPE_CHECKING:
-    from openai import OpenAI as OpenAIClient, AzureOpenAI as AzureOpenAIClient
+    from openai import (
+        OpenAI as OpenAIClient,
+        AsyncOpenAI as AsyncOpenAIClient,
+        AzureOpenAI as AzureOpenAIClient,
+        AsyncAzureOpenAI as AsyncAzureOpenAIClient,
+    )
 
-__all__ = ["OpenAI", "from_openai"]
+__all__ = ["AsyncOpenAI", "OpenAI", "from_openai"]
 
 
 class OpenAITypeAdapter(ModelTypeAdapter):
@@ -348,36 +354,211 @@ def generate_stream(
         if "model" not in inference_kwargs and self.model_name is not None:
             inference_kwargs["model"] = self.model_name
 
-        stream = self.client.chat.completions.create(
-            stream=True,
-            messages=messages,
-            **response_format,
-            **inference_kwargs
-        )
+        try:
+            stream = self.client.chat.completions.create(
+                stream=True,
+                messages=messages,
+                **response_format,
+                **inference_kwargs
+            )
+        except openai.BadRequestError as e:
+            if e.body["message"].startswith("Invalid schema"):
+                raise TypeError(
+                    f"OpenAI does not support your schema: {e.body['message']}. "
+                    "Try a local model or dottxt instead."
+                )
+            else:
+                raise e
 
         for chunk in stream:
             if chunk.choices and chunk.choices[0].delta.content is not None:
                 yield chunk.choices[0].delta.content
 
 
+class AsyncOpenAI(AsyncModel):
+    """Thin wrapper around the `openai.AsyncOpenAI` client.
+
+    This wrapper is used to convert the input and output types specified by the
+    users at a higher level to arguments to the `openai.AsyncOpenAI` client.
+
+    """
+
+    def __init__(
+        self,
+        client: Union["AsyncOpenAIClient", "AsyncAzureOpenAIClient"],
+        model_name: Optional[str] = None,
+    ):
+        """
+        Parameters
+        ----------
+        client
+            The `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` client.
+        model_name
+            The name of the model to use.
+
+        """
+        self.client = client
+        self.model_name = model_name
+        self.type_adapter = OpenAITypeAdapter()
+
+    async def generate(
+        self,
+        model_input: Union[Chat, list, str],
+        output_type: Optional[Union[type[BaseModel], str]] = None,
+        **inference_kwargs: Any,
+    ) -> Union[str, list[str]]:
+        """Generate text using OpenAI.
+
+        Parameters
+        ----------
+        model_input
+            The prompt based on which the model will generate a response.
+        output_type
+            The desired format of the response generated by the model. The
+            output type must be of a type that can be converted to a JSON
+            schema or an empty dictionary.
+        **inference_kwargs
+            Additional keyword arguments to pass to the client.
+
+        Returns
+        -------
+        Union[str, list[str]]
+            The text generated by the model.
+
+        """
+        import openai
+
+        messages = self.type_adapter.format_input(model_input)
+        response_format = self.type_adapter.format_output_type(output_type)
+
+        if "model" not in inference_kwargs and self.model_name is not None:
+            inference_kwargs["model"] = self.model_name
+
+        try:
+            result = await self.client.chat.completions.create(
+                messages=messages,
+                **response_format,
+                **inference_kwargs,
+            )
+        except openai.BadRequestError as e:
+            if e.body["message"].startswith("Invalid schema"):
+                raise TypeError(
+                    f"OpenAI does not support your schema: {e.body['message']}. "
+                    "Try a local model or dottxt instead."
+                )
+            else:
+                raise e
+
+        messages = [choice.message for choice in result.choices]
+        for message in messages:
+            if message.refusal is not None:
+                raise ValueError(
+                    f"OpenAI refused to answer the request: {message.refusal}"
+                )
+
+        if len(messages) == 1:
+            return messages[0].content
+        else:
+            return [message.content for message in messages]
+
+    async def generate_batch(
+        self,
+        model_input,
+        output_type = None,
+        **inference_kwargs,
+    ):
+        raise NotImplementedError(
+            "The `openai` library does not support batch inference."
+        )
+
+    async def generate_stream( # type: ignore
+        self,
+        model_input: Union[Chat, list, str],
+        output_type: Optional[Union[type[BaseModel], str]] = None,
+        **inference_kwargs,
+    ) -> AsyncIterator[str]:
+        """Stream text using OpenAI.
+
+        Parameters
+        ----------
+        model_input
+            The prompt based on which the model will generate a response.
+        output_type
+            The desired format of the response generated by the model. The
+            output type must be of a type that can be converted to a JSON
+            schema or an empty dictionary.
+        **inference_kwargs
+            Additional keyword arguments to pass to the client.
+
+        Returns
+        -------
+        Iterator[str]
+            An iterator that yields the text generated by the model.
+
+        """
+        import openai
+
+        messages = self.type_adapter.format_input(model_input)
+        response_format = self.type_adapter.format_output_type(output_type)
+
+        if "model" not in inference_kwargs and self.model_name is not None:
+            inference_kwargs["model"] = self.model_name
+
+        try:
+            stream = await self.client.chat.completions.create(
+                stream=True,
+                messages=messages,
+                **response_format,
+                **inference_kwargs
+            )
+        except openai.BadRequestError as e:
+            if e.body["message"].startswith("Invalid schema"):
+                raise TypeError(
+                    f"OpenAI does not support your schema: {e.body['message']}. "
+                    "Try a local model or dottxt instead."
+                )
+            else:
+                raise e
+
+        async for chunk in stream:
+            if chunk.choices and chunk.choices[0].delta.content is not None:
+                yield chunk.choices[0].delta.content
+
+
 def from_openai(
-    client: Union["OpenAIClient", "AzureOpenAIClient"],
+    client: Union[
+        "OpenAIClient",
+        "AsyncOpenAIClient",
+        "AzureOpenAIClient",
+        "AsyncAzureOpenAIClient",
+    ],
     model_name: Optional[str] = None,
-) -> OpenAI:
-    """Create an Outlines `OpenAI` model instance from an `openai.OpenAI`
-    client.
+) -> Union[OpenAI, AsyncOpenAI]:
+    """Create an Outlines `OpenAI` or `AsyncOpenAI` model instance from an
+    `openai.OpenAI` or `openai.AsyncOpenAI` client.
 
     Parameters
     ----------
     client
-        An `openai.OpenAI` client instance.
+        An `openai.OpenAI`, `openai.AsyncOpenAI`, `openai.AzureOpenAI` or
+        `openai.AsyncAzureOpenAI` client instance.
     model_name
         The name of the model to use.
 
     Returns
     -------
     OpenAI
-        An Outlines `OpenAI` model instance.
+        An Outlines `OpenAI` or `AsyncOpenAI` model instance.
 
     """
-    return OpenAI(client, model_name)
+    import openai
+
+    if isinstance(client, openai.OpenAI):
+        return OpenAI(client, model_name)
+    elif isinstance(client, openai.AsyncOpenAI):
+        return AsyncOpenAI(client, model_name)
+    else:
+        raise ValueError(
+            "Invalid client type. The client must be an instance of "
+            "+ `openai.OpenAI` or `openai.AsyncOpenAI`."
+        )
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py