Add hugging face to the quickstart guide (#726)

mlejva · web-flow · commit af0773b74ddd · 2025-05-14T01:26:52.000+02:00
Adds an example how to use E2B with Hugging Face's serverless inference
API to the quickstart guide.
diff --git a/apps/web/src/app/(docs)/docs/quickstart/connect-llms/page.mdx b/apps/web/src/app/(docs)/docs/quickstart/connect-llms/page.mdx
@@ -13,6 +13,7 @@ If the LLM doesn't support tool use, you can, for example, prompt the LLM to out
 - [LangChain](#langchain)
 - [LlamaIndex](#llamaindex)
 - [Ollama](#ollama)
+- [Hugging Face](#hugging-face)
 
 ---
 
@@ -503,7 +504,7 @@ from crewai import Agent, Task, Crew, LLM
 from e2b_code_interpreter import Sandbox
 
 # Update tool definition using the decorator
-@tool("Python Interpreter")  
+@tool("Python Interpreter")
 def execute_python(code: str) -> str:
     """
     Execute Python code and return the results.
@@ -741,3 +742,63 @@ with Sandbox() as sandbox:
 print(result)
 ```
 </CodeGroup>
+
+---
+
+## Hugging Face
+
+Hugging Face offers support for serverless inference for models on their model hub with [Hugging Face's Inference API](https://huggingface.co/docs/inference-providers/en/index).
+
+<Note>
+Note that not every model on Hugging Face has native support for tool use and function calling.
+</Note>
+
+<CodeGroup>
+```python
+from huggingface_hub import InferenceClient
+from e2b_code_interpreter import Sandbox
+import re
+
+# Not all models are capable of direct tools use - we need to extract the code block manually and prompting the LLM to generate the code.
+def match_code_block(llm_response):
+  pattern = re.compile(r'```python\n(.*?)\n```', re.DOTALL) # Match everything in between ```python and ```
+  match = pattern.search(llm_response)
+  if match:
+    code = match.group(1)
+    print(code)
+    return code
+  return ""
+
+
+system_prompt = """You are a helpful coding assistant that can execute python code in a Jupyter notebook. You are given tasks to complete and you run Python code to solve them.
+Generally, you follow these rules:
+- ALWAYS FORMAT YOUR RESPONSE IN MARKDOWN
+- ALWAYS RESPOND ONLY WITH CODE IN CODE BLOCK LIKE THIS:
+\`\`\`python
+{code}
+\`\`\`
+"""
+prompt = "Calculate how many r's are in the word 'strawberry.'"
+
+# Initialize the client
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="HF_INFERENCE_API_KEY"
+)
+
+completion = client.chat.completions.create(
+    model="Qwen/Qwen3-235B-A22B", # Or use any other model from Hugging Face
+    messages=[
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+)
+
+content = completion.choices[0].message.content
+code = match_code_block(content)
+
+with Sandbox() as sandbox:
+    execution = sandbox.run_code(code)
+    print(execution)
+```
+</CodeGroup>