Skip to content

Commit 4fa2cc5

Browse files
authored
Upgrade RabbitMQ and ONNX dependencies (#1038)
1 parent b18de36 commit 4fa2cc5

File tree

10 files changed

+176
-49
lines changed

10 files changed

+176
-49
lines changed

Directory.Packages.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.1" />
2525
<PackageVersion Include="Microsoft.Extensions.Http" Version="9.0.3" />
2626
<PackageVersion Include="Microsoft.Extensions.Logging" Version="8.0.0" />
27-
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.5.2" />
27+
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" />
2828
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.3" />
2929
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.5.2" />
3030
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.5.2" />
@@ -34,7 +34,7 @@
3434
<PackageVersion Include="OllamaSharp" Version="5.1.7" />
3535
<PackageVersion Include="PdfPig" Version="0.1.10" />
3636
<PackageVersion Include="Polly.Core" Version="8.5.2" />
37-
<PackageVersion Include="RabbitMQ.Client" Version="7.0.0" />
37+
<PackageVersion Include="RabbitMQ.Client" Version="7.1.2" />
3838
<PackageVersion Include="ReadLine" Version="2.0.1" />
3939
<PackageVersion Include="Swashbuckle.AspNetCore" Version="8.0.0" />
4040
<PackageVersion Include="System.Linq.Async" Version="6.0.1" />

KernelMemory.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "InteractiveSetup", "tools\I
224224
EndProject
225225
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "testapps", "testapps", "{AEF463F6-F813-498C-830A-3B4CED6DC4A7}"
226226
EndProject
227+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "213-onnx", "examples\213-onnx\213-onnx.csproj", "{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}"
228+
EndProject
227229
Global
228230
GlobalSection(SolutionConfigurationPlatforms) = preSolution
229231
Debug|Any CPU = Debug|Any CPU
@@ -530,6 +532,9 @@ Global
530532
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Debug|Any CPU.Build.0 = Debug|Any CPU
531533
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Release|Any CPU.ActiveCfg = Release|Any CPU
532534
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Release|Any CPU.Build.0 = Release|Any CPU
535+
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
536+
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Debug|Any CPU.Build.0 = Debug|Any CPU
537+
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Release|Any CPU.ActiveCfg = Release|Any CPU
533538
EndGlobalSection
534539
GlobalSection(SolutionProperties) = preSolution
535540
HideSolutionNode = FALSE
@@ -630,6 +635,7 @@ Global
630635
{82670921-FDCD-4672-84BD-4353F5AC24A0} = {AEF463F6-F813-498C-830A-3B4CED6DC4A7}
631636
{CCA96699-483E-4B2A-95DF-25F0C98E3BB6} = {AEF463F6-F813-498C-830A-3B4CED6DC4A7}
632637
{AEF463F6-F813-498C-830A-3B4CED6DC4A7} = {5E7DD43D-B5E7-4827-B57D-447E5B428589}
638+
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
633639
EndGlobalSection
634640
GlobalSection(ExtensibilityGlobals) = postSolution
635641
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}

examples/212-dotnet-ollama/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static async Task Main()
4848
// Import some text
4949
await memory.ImportTextAsync("Today is October 32nd, 2476");
5050

51-
// Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer
51+
// Generate an answer
5252
var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
5353
Console.WriteLine("-------------------");
5454
Console.WriteLine(answer.Question);

examples/213-onnx/213-onnx.csproj

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<Project Sdk="Microsoft.NET.Sdk.Web">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net8.0</TargetFramework>
5+
<RollForward>LatestMajor</RollForward>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<ProjectReference Include="..\..\extensions\KM\KernelMemory\KernelMemory.csproj" />
11+
</ItemGroup>
12+
13+
</Project>

examples/213-onnx/Program.cs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using Microsoft.KernelMemory;
4+
5+
/* This example shows how to use KM with Ollama
6+
*
7+
* 1. Download phi4 model from https://huggingface.co/microsoft/phi-4-onnx
8+
*
9+
* 2. Edit appsettings.json (or appsettings.Development.json) and set the model path.
10+
*
11+
* 3. Run the code
12+
*/
13+
public static class Program
14+
{
15+
public static async Task Main()
16+
{
17+
var onnxCfg = new OnnxConfig();
18+
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig();
19+
20+
new ConfigurationBuilder()
21+
.AddJsonFile("appsettings.json")
22+
.AddJsonFile("appsettings.development.json", optional: true)
23+
.AddJsonFile("appsettings.Development.json", optional: true)
24+
.Build()
25+
.BindSection("KernelMemory:Services:Onnx", onnxCfg)
26+
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig);
27+
28+
var memory = new KernelMemoryBuilder()
29+
.WithOnnxTextGeneration(onnxCfg)
30+
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
31+
.Configure(builder => builder.Services.AddLogging(l =>
32+
{
33+
l.SetMinimumLevel(LogLevel.Warning);
34+
l.AddSimpleConsole(c => c.SingleLine = true);
35+
}))
36+
.Build();
37+
38+
// Import some text
39+
await memory.ImportTextAsync("Yesterday was October 21st, 2476");
40+
await memory.ImportTextAsync("Tomorrow will be October 23rd, 2476");
41+
42+
// Generate an answer
43+
var answer = await memory.AskAsync("What's the current date?");
44+
Console.WriteLine(answer.Result);
45+
46+
/*
47+
48+
-- Output using phi-4-onnx:
49+
50+
Based on the provided information, if yesterday was October 21st, 2476, then today is October 22nd, 2476.
51+
*/
52+
}
53+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"profiles": {
3+
"console": {
4+
"commandName": "Project",
5+
"launchBrowser": false,
6+
"environmentVariables": {
7+
"ASPNETCORE_ENVIRONMENT": "Development"
8+
}
9+
}
10+
}
11+
}

examples/213-onnx/appsettings.json

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"KernelMemory": {
3+
"Services": {
4+
"Onnx": {
5+
// Source: https://huggingface.co/microsoft/phi-4-onnx/tree/main
6+
"TextModelDir": "/tmp/onnx/phi-4-onnx",
7+
"MaxTokens": 16384
8+
},
9+
"AzureOpenAIEmbedding": {
10+
// "ApiKey" or "AzureIdentity"
11+
// AzureIdentity: use automatic Entra (AAD) authentication mechanism.
12+
// You can test locally using the AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET env vars.
13+
"Auth": "AzureIdentity",
14+
// Optional when Auth == AzureIdentity. Leave it null to use the default.
15+
// in which case use this to change the client audience.
16+
"AzureIdentityAudience": null,
17+
"Endpoint": "https://<...>.openai.azure.com/",
18+
"APIKey": "",
19+
// Your Azure Deployment name
20+
"Deployment": "",
21+
// The max number of tokens supported by model deployed
22+
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
23+
"MaxTokenTotal": 8191,
24+
// Which tokenizer to use to correctly measure the size of chunks.
25+
// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
26+
// - Use p50k for the old text-davinci-003 models
27+
// - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
28+
// - Use o200k for the most recent gpt-4o family
29+
"Tokenizer": "cl100k",
30+
// The number of dimensions output embeddings should have.
31+
// Only supported in "text-embedding-3" and later models developed with
32+
// MRL, see https://arxiv.org/abs/2205.13147
33+
"EmbeddingDimensions": null,
34+
// How many embeddings to calculate in parallel. The max value depends on
35+
// the model and deployment in use.
36+
// See https://learn.microsoft.com/azure/ai-services/openai/reference#embeddings
37+
"MaxEmbeddingBatchSize": 1,
38+
// How many times to retry in case of throttling.
39+
"MaxRetries": 10,
40+
// Thumbprints of certificates that should be trusted for HTTPS requests when SSL policy errors are detected.
41+
// This should only be used for local development when using a proxy to call the OpenAI endpoints.
42+
"TrustedCertificateThumbprints": []
43+
}
44+
}
45+
}
46+
}

examples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Some examples about how to use Kernel Memory.
2121
8. Local models and external connectors
2222
* [Using custom LLMs](104-dotnet-custom-LLM)
2323
* [Using local LLMs with Ollama](212-dotnet-ollama)
24+
* [Using local LLMs with ONNX models](213-onnx)
2425
* [Using local LLMs with llama.cpp via LlamaSharp](105-dotnet-serverless-llamasharp)
2526
* [Using local models with LM Studio](208-dotnet-lmstudio)
2627
* [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)

extensions/Chunkers/Chunkers.UnitTests/doc2.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ Examples and Tools
450450
8. Local models and external connectors
451451
* [Using custom LLMs](examples/104-dotnet-custom-LLM)
452452
* [Using local LLMs with Ollama](examples/212-dotnet-ollama)
453+
* [Using local LLMs with ONNX models](examples/213-onnx)
453454
* [Using local LLMs with llama.cpp via LlamaSharp](examples/105-dotnet-serverless-llamasharp)
454455
* [Using local models with LM Studio](examples/208-dotnet-lmstudio)
455456
* [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion)

extensions/ONNX/Onnx/OnnxTextGenerator.cs

Lines changed: 42 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public sealed class OnnxTextGenerator : ITextGenerator, IDisposable
3333
/// Tokenizer used with the Onnx Generator and Model classes to produce tokens.
3434
/// This has the potential to contain a null value, depending on the contents of the Model Directory.
3535
/// </summary>
36-
private readonly Tokenizer? _tokenizer = default;
36+
private readonly Tokenizer _tokenizer;
3737

3838
/// <summary>
3939
/// Tokenizer used for GetTokens() and CountTokens()
@@ -84,15 +84,55 @@ public OnnxTextGenerator(
8484
this._log.LogDebug("Onnx model loaded");
8585
}
8686

87+
/// <inheritdoc/>
88+
public int CountTokens(string text)
89+
{
90+
// TODO: Implement with _tokenizer and remove _textTokenizer
91+
return this._textTokenizer.CountTokens(text);
92+
}
93+
94+
/// <inheritdoc/>
95+
public IReadOnlyList<string> GetTokens(string text)
96+
{
97+
// TODO: Implement with _tokenizer and remove _textTokenizer
98+
return this._textTokenizer.GetTokens(text);
99+
}
100+
87101
/// <inheritdoc/>
88102
public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
89103
string prompt,
90104
TextGenerationOptions? options = null,
91105
[EnumeratorCancellation] CancellationToken cancellationToken = default)
92106
{
93-
var tokens = this._tokenizer?.Encode(prompt);
107+
// TODO: the prompt format should be configurable
108+
using var sequences = this._tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");
109+
94110
using var generatorParams = new GeneratorParams(this._model);
111+
this.SetGeneratorParams(generatorParams, options);
95112

113+
using var tokenizerStream = this._tokenizer.CreateStream();
114+
using var generator = new Generator(this._model, generatorParams);
115+
generator.AppendTokenSequences(sequences);
116+
117+
while (!generator.IsDone())
118+
{
119+
generator.GenerateNextToken();
120+
var x = tokenizerStream.Decode(generator.GetSequence(0)[^1]);
121+
yield return new GeneratedTextContent(x);
122+
}
123+
124+
await Task.CompletedTask.ConfigureAwait(false);
125+
}
126+
127+
/// <inheritdoc/>
128+
public void Dispose()
129+
{
130+
this._model.Dispose();
131+
this._tokenizer.Dispose();
132+
}
133+
134+
private void SetGeneratorParams(GeneratorParams generatorParams, TextGenerationOptions? options)
135+
{
96136
generatorParams.SetSearchOption("max_length", this.MaxTokenTotal);
97137
generatorParams.SetSearchOption("min_length", this._config.MinLength);
98138
generatorParams.SetSearchOption("num_return_sequences", this._config.ResultsPerPrompt);
@@ -145,49 +185,5 @@ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
145185

146186
break;
147187
}
148-
149-
generatorParams.SetInputSequences(tokens);
150-
151-
using (var generator = new Generator(this._model, generatorParams))
152-
{
153-
List<int> outputTokens = [];
154-
155-
while (!generator.IsDone() && cancellationToken.IsCancellationRequested == false)
156-
{
157-
generator.ComputeLogits();
158-
generator.GenerateNextToken();
159-
160-
outputTokens.AddRange(generator.GetSequence(0));
161-
162-
if (outputTokens.Count > 0 && this._tokenizer != null)
163-
{
164-
var newToken = outputTokens[^1];
165-
yield return this._tokenizer.Decode([newToken]);
166-
}
167-
}
168-
}
169-
170-
await Task.CompletedTask.ConfigureAwait(false);
171-
}
172-
173-
/// <inheritdoc/>
174-
public int CountTokens(string text)
175-
{
176-
// TODO: Implement with _tokenizer and remove _textTokenizer
177-
return this._textTokenizer.CountTokens(text);
178-
}
179-
180-
/// <inheritdoc/>
181-
public IReadOnlyList<string> GetTokens(string text)
182-
{
183-
// TODO: Implement with _tokenizer and remove _textTokenizer
184-
return this._textTokenizer.GetTokens(text);
185-
}
186-
187-
/// <inheritdoc/>
188-
public void Dispose()
189-
{
190-
this._model?.Dispose();
191-
this._tokenizer?.Dispose();
192188
}
193189
}

0 commit comments

Comments
 (0)