Skip to content

Commit e976a5c

Browse files
authored
Adds Analyze Single vs Multi-thread benchmark (#215)
* Add benchmarks * Delete defaultRulesPkd.json * Switch to using threadsafe ConcurrentDictionary over Unsafe HashSet * Improve multithreaded reliability Switch to concurrentstack instead of list internally * Cleanup * Update multiextractor from OSSGadget current Dramatically improves speed and reliability. * Fix two edge cases in MultiExtractor. * No need to check null, this no longer returns nulls. * Add note about path for benchmarks
1 parent 82b996c commit e976a5c

25 files changed

+2147
-17466
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,4 @@ __pycache__/
265265
/UnitTest.Commands/output
266266
/RulesPacker/log.txt
267267
/AppInspector/ApplicationInspector.Commands.xml
268+
AppInspector/Resources/defaultRulesPkd.json

AppInspector.CLI/Writers/AnalyzeHtmlWriter.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public void PopulateTagGroups()
144144
{
145145
foreach (TagSearchPattern pattern in tagGroup.Patterns)
146146
{
147-
pattern.Detected = _appMetaData.UniqueTags.Any(v => v.Contains(pattern.SearchPattern));
147+
pattern.Detected = _appMetaData.UniqueTags.Any(v => v.Key.Contains(pattern.SearchPattern));
148148
//create dynamic "category" groups of tags with pattern relationship established from TagReportGroups.json
149149
//that can be used to populate reports with various attributes for each tag detected
150150
if (pattern.Detected)
@@ -382,7 +382,7 @@ private List<TagInfo> GetAllMatchingTagInfoList(TagGroup tagGroup, bool addNotFo
382382
/// <returns></returns>
383383
private List<TagInfo> GetTagInfoListByName()
384384
{
385-
List<string> orderedTags = _appMetaData.UniqueTags.ToList<string>();
385+
List<string> orderedTags = _appMetaData.UniqueTags.Keys.ToList<string>();
386386
orderedTags.Sort();
387387
HashSet<string> dupCheck = new HashSet<string>();
388388
List<TagInfo> result = new List<TagInfo>();
@@ -427,7 +427,7 @@ private List<TagInfo> GetTagInfoListByConfidence()
427427
RulesEngine.Confidence[] confidences = { Confidence.High, Confidence.Medium, Confidence.Low };
428428

429429

430-
foreach (string tag in _appMetaData.UniqueTags)
430+
foreach (string tag in _appMetaData.UniqueTags.Keys)
431431
{
432432
var searchPattern = new Regex(tag, RegexOptions.IgnoreCase);
433433
foreach (Confidence test in confidences)
@@ -470,7 +470,7 @@ private List<TagInfo> GetTagInfoListBySeverity()
470470
RulesEngine.Severity[] severities = { Severity.Critical, Severity.Important, Severity.Moderate, Severity.BestPractice, Severity.ManualReview };
471471

472472

473-
foreach (string tag in _appMetaData.UniqueTags)
473+
foreach (string tag in _appMetaData.UniqueTags.Keys)
474474
{
475475
// TODO: How frequently are these generated? Cache?
476476
var searchPattern = new Regex(tag, RegexOptions.IgnoreCase);

AppInspector.CLI/Writers/AnalyzeJsonWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public override void WriteResults(Result result, CLICommandOptions commandOption
4040

4141
if (cLIAnalyzeCmdOptions.SimpleTagsOnly)
4242
{
43-
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags);
43+
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags.Keys);
4444
keys.Sort();
4545
TagsFile tags = new TagsFile();
4646
tags.Tags = keys.ToArray();

AppInspector.CLI/Writers/AnalyzeTextWriter.cs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public override void WriteResults(Result result, CLICommandOptions commandOption
2626

2727
if (cLIAnalyzeCmdOptions.SimpleTagsOnly)
2828
{
29-
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags);
29+
List<string> keys = new List<string>(analyzeResult.Metadata.UniqueTags.Keys);
3030
keys.Sort();
3131

3232
foreach (string tag in keys)
@@ -66,17 +66,9 @@ public AnalyzeTextWriter(string formatString)
6666

6767
#region helpers
6868

69-
private string StringList(HashSet<string> data)
69+
private string StringList(ConcurrentDictionary<string, byte> data)
7070
{
71-
StringBuilder build = new StringBuilder();
72-
73-
foreach (string s in data)
74-
{
75-
build.Append(s);
76-
build.Append(" ");
77-
}
78-
79-
return build.ToString();
71+
return string.Join(' ', data.Keys);
8072
}
8173

8274
private string StringList(Dictionary<string, int> data)
@@ -166,7 +158,7 @@ public void WriteAppMeta(MetaData metaData)
166158
WriteOnce.General(string.Format("Unique matches: {0}", metaData.UniqueMatchesCount));
167159

168160
WriteOnce.General(MakeHeading("UniqueTags"));
169-
List<string> orderedTags = metaData.UniqueTags.ToList<string>();
161+
List<string> orderedTags = metaData.UniqueTags.Keys.ToList<string>();
170162
orderedTags.Sort();
171163

172164
foreach (string tag in orderedTags)
@@ -205,7 +197,7 @@ private void WriteDependencies(MetaData metaData)
205197
{
206198
WriteOnce.General(MakeHeading("Dependencies"));
207199

208-
foreach (string s in metaData.UniqueDependencies)
200+
foreach (string s in metaData.UniqueDependencies.Keys)
209201
{
210202
WriteOnce.General(s);
211203
}

AppInspector.sln

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
1313
EndProject
1414
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AppInspector.RulesEngine", "RulesEngine\AppInspector.RulesEngine.csproj", "{C19A98D2-629D-4F4D-87E4-3154416970BA}"
1515
EndProject
16-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MultiExtractor", "MultiExtractor\MultiExtractor.csproj", "{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}"
17-
EndProject
1816
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AppInspector.CLI", "AppInspector.CLI\AppInspector.CLI.csproj", "{824ED27E-A4CF-46A6-A01F-98B0821EB61C}"
1917
EndProject
2018
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "RulesPacker", "RulesPacker", "{C464D0CE-5254-4EA5-87C9-C0C96E40C3CB}"
@@ -39,6 +37,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "RulesPacker", "RulesPacker"
3937
EndProject
4038
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTest.Commands", "UnitTest.Commands\UnitTest.Commands.csproj", "{181BD826-A428-41D9-8BEC-0D8EB2288DF5}"
4139
EndProject
40+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{F031887C-EA60-4390-9940-765E99E69B8F}"
41+
EndProject
42+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Shared.MultiExtractor", "MultiExtractor\Shared.MultiExtractor.csproj", "{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}"
43+
EndProject
4244
Global
4345
GlobalSection(SolutionConfigurationPlatforms) = preSolution
4446
Debug|Any CPU = Debug|Any CPU
@@ -53,10 +55,6 @@ Global
5355
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Debug|Any CPU.Build.0 = Debug|Any CPU
5456
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Release|Any CPU.ActiveCfg = Release|Any CPU
5557
{C19A98D2-629D-4F4D-87E4-3154416970BA}.Release|Any CPU.Build.0 = Release|Any CPU
56-
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
57-
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
58-
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
59-
{7C07A2A2-508E-4BBE-873F-F60F9FB4A9D9}.Release|Any CPU.Build.0 = Release|Any CPU
6058
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6159
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Debug|Any CPU.Build.0 = Debug|Any CPU
6260
{824ED27E-A4CF-46A6-A01F-98B0821EB61C}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -65,6 +63,14 @@ Global
6563
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
6664
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
6765
{181BD826-A428-41D9-8BEC-0D8EB2288DF5}.Release|Any CPU.Build.0 = Release|Any CPU
66+
{F031887C-EA60-4390-9940-765E99E69B8F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
67+
{F031887C-EA60-4390-9940-765E99E69B8F}.Debug|Any CPU.Build.0 = Debug|Any CPU
68+
{F031887C-EA60-4390-9940-765E99E69B8F}.Release|Any CPU.ActiveCfg = Release|Any CPU
69+
{F031887C-EA60-4390-9940-765E99E69B8F}.Release|Any CPU.Build.0 = Release|Any CPU
70+
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
71+
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Debug|Any CPU.Build.0 = Debug|Any CPU
72+
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Release|Any CPU.ActiveCfg = Release|Any CPU
73+
{9D6C861B-845F-4ADC-86ED-2F1E7BB4A229}.Release|Any CPU.Build.0 = Release|Any CPU
6874
EndGlobalSection
6975
GlobalSection(SolutionProperties) = preSolution
7076
HideSolutionNode = FALSE

AppInspector/AppInspector.Commands.csproj

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,7 @@
7676
</ItemGroup>
7777

7878
<ItemGroup>
79-
<ProjectReference Include="..\MultiExtractor\MultiExtractor.csproj">
80-
<ReferenceOutputAssembly>true</ReferenceOutputAssembly>
81-
<IncludeAssets>MultiExtractor.dll</IncludeAssets>
82-
</ProjectReference>
83-
79+
<ProjectReference Include="..\MultiExtractor\Shared.MultiExtractor.csproj"/>
8480
<ProjectReference Include="..\RulesEngine\AppInspector.RulesEngine.csproj">
8581
<ReferenceOutputAssembly>true</ReferenceOutputAssembly>
8682
<IncludeAssets>ApplicationInspector.RulesEngine.dll</IncludeAssets>

AppInspector/Commands/AnalyzeCommand.cs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Licensed under the MIT License. See LICENSE.txt in the project root for license information.
33

44
using Microsoft.ApplicationInspector.RulesEngine;
5-
using MultiExtractor;
5+
using Microsoft.CST.OpenSource.MultiExtractor;
66
using Newtonsoft.Json;
77
using NLog;
88
using System;
@@ -11,6 +11,7 @@
1111
using System.Linq;
1212
using System.Text;
1313
using System.Text.RegularExpressions;
14+
using System.Threading.Tasks;
1415

1516
namespace Microsoft.ApplicationInspector.Commands
1617
{
@@ -289,7 +290,6 @@ private void ConfigRules()
289290
public AnalyzeResult GetResult()
290291
{
291292
WriteOnce.SafeLog("AnalyzeCommand::Run", LogLevel.Trace);
292-
293293
WriteOnce.Operation(MsgHelp.FormatString(MsgHelp.ID.CMD_RUNNING, "Analyze"));
294294
AnalyzeResult analyzeResult = new AnalyzeResult()
295295
{
@@ -339,7 +339,7 @@ public AnalyzeResult GetResult()
339339
}
340340
else
341341
{
342-
_srcfileList.AsParallel().ForAll(filename => ProcessFile(filename));
342+
Parallel.ForEach(_srcfileList, filename => ProcessFile(filename));
343343
}
344344

345345
WriteOnce.General("\r" + MsgHelp.FormatString(MsgHelp.ID.ANALYZE_FILES_PROCESSED_PCNT, 100));
@@ -385,7 +385,7 @@ private void ProcessAsFile(string filename)
385385
if (FileChecksPassed(filename, ref languageInfo))
386386
{
387387
LastUpdated = File.GetLastWriteTime(filename);
388-
_metaDataHelper.Metadata.PackageTypes.Add(MsgHelp.GetString(MsgHelp.ID.ANALYZE_UNCOMPRESSED_FILETYPE));
388+
_ = _metaDataHelper.Metadata.PackageTypes.TryAdd(MsgHelp.GetString(MsgHelp.ID.ANALYZE_UNCOMPRESSED_FILETYPE),0);
389389

390390
string fileText = File.ReadAllText(filename);
391391
ProcessInMemory(filename, fileText, languageInfo);
@@ -602,7 +602,7 @@ private string ExtractDependency(string text, int startIndex, SearchPattern patt
602602
}
603603

604604
string finalResult = rawResult.Replace(";", "");
605-
_metaDataHelper.Metadata.UniqueDependencies.Add(finalResult);
605+
_ = _metaDataHelper.Metadata.UniqueDependencies.TryAdd(finalResult,0);
606606

607607
return System.Net.WebUtility.HtmlEncode(finalResult);
608608
}
@@ -645,11 +645,12 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
645645
}
646646

647647
LastUpdated = File.GetLastWriteTime(filePath);
648-
_metaDataHelper.Metadata.PackageTypes.Add(MsgHelp.GetString(MsgHelp.ID.ANALYZE_COMPRESSED_FILETYPE));
648+
_ = _metaDataHelper.Metadata.PackageTypes.TryAdd(MsgHelp.GetString(MsgHelp.ID.ANALYZE_COMPRESSED_FILETYPE),0);
649649

650650
try
651651
{
652-
IEnumerable<FileEntry> files = Extractor.ExtractFile(filePath).Where(x => x != null);
652+
var extractor = new Extractor();
653+
IEnumerable<FileEntry> files = extractor.ExtractFile(filePath,!_options.SingleThread);
653654

654655
if (_options.SingleThread)
655656
{
@@ -661,8 +662,9 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
661662
LanguageInfo languageInfo = new LanguageInfo();
662663
if (FileChecksPassed(file.FullPath, ref languageInfo, file.Content.Length))
663664
{
664-
byte[] streamByteArray = file.Content.ToArray();
665-
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray, 0, streamByteArray.Length), languageInfo);
665+
var streamByteArray = new byte[file.Content.Length];
666+
file.Content.Read(streamByteArray);
667+
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray), languageInfo);
666668
}
667669
}
668670
catch (Exception)
@@ -673,16 +675,17 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
673675
}
674676
else
675677
{
676-
files.AsParallel().ForAll(file =>
678+
Parallel.ForEach(files, file =>
677679
{
678680
try
679681
{
680682
//check uncompressed file passes standard checks
681683
LanguageInfo languageInfo = new LanguageInfo();
682684
if (FileChecksPassed(file.FullPath, ref languageInfo, file.Content.Length))
683685
{
684-
byte[] streamByteArray = file.Content.ToArray();
685-
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray, 0, streamByteArray.Length), languageInfo);
686+
var streamByteArray = new byte[file.Content.Length];
687+
file.Content.Read(streamByteArray);
688+
ProcessInMemory(file.FullPath, Encoding.UTF8.GetString(streamByteArray), languageInfo);
686689
}
687690
}
688691
catch (Exception)
@@ -712,7 +715,7 @@ private void UnZipAndProcess(string filePath, ArchiveFileType archiveFileType, b
712715
/// <returns></returns>
713716
private bool FileChecksPassed(string filePath, ref LanguageInfo languageInfo, long fileLength = 0)
714717
{
715-
_metaDataHelper.Metadata.FileExtensions.Add(Path.GetExtension(filePath).Replace('.', ' ').TrimStart());
718+
_ = _metaDataHelper.Metadata.FileExtensions.TryAdd(Path.GetExtension(filePath).Replace('.', ' ').TrimStart(),0);
716719

717720
// 1. Skip files written in unknown language
718721
if (!Language.FromFileName(filePath, ref languageInfo))

AppInspector/Commands/TagDiffCommand.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ public TagDiffResult GetResult()
222222
int sizeTags1 = analyze1.Metadata.UniqueTags.Count;
223223
string[] file1Tags = new string[sizeTags1];
224224

225-
foreach (string tag in analyze1.Metadata.UniqueTags.ToList<string>())
225+
foreach (string tag in analyze1.Metadata.UniqueTags.Keys.ToList<string>())
226226
{
227227
file1Tags[count1++] = tag;
228228
}
@@ -231,7 +231,7 @@ public TagDiffResult GetResult()
231231
int sizeTags2 = analyze2.Metadata.UniqueTags.Count;
232232
string[] file2Tags = new string[sizeTags2];
233233

234-
foreach (string tag in analyze2.Metadata.UniqueTags.ToList<string>())
234+
foreach (string tag in analyze2.Metadata.UniqueTags.Keys.ToList<string>())
235235
{
236236
file2Tags[count2++] = tag;
237237
}

AppInspector/Commands/TagTestCommand.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ public TagTestResult GetResult()
213213
int sizeTags = analyze1.Metadata.UniqueTags.Count;
214214
string[] tagsFound = new string[sizeTags];
215215

216-
foreach (string tag in analyze1.Metadata.UniqueTags.ToList<string>())
216+
foreach (string tag in analyze1.Metadata.UniqueTags.Keys.ToList<string>())
217217
{
218218
tagsFound[count++] = tag;
219219
}

0 commit comments

Comments
 (0)