forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Job completed in 17 minutes 55 seconds (remote runner delay: 51 seconds).
dotnet/runtime#118079
Using arguments: regexdiff
38 out of 18857 patterns have generated source code changes.
Examples of GeneratedRegex source diffs
"(?<!\\\\)?\".*?(?<!\\\\)\"" (89 uses)
[GeneratedRegex("(?<!\\\\)?\".*?(?<!\\\\)\"")] /// <code>(?<!\\\\)?".*?(?<!\\\\)"</code><br/>
/// Explanation:<br/>
/// <code>
- /// ○ Optional (greedy).<br/>
- /// ○ Zero-width negative lookbehind.<br/>
- /// ○ Match '\\' right-to-left.<br/>
/// ○ Match '"'.<br/>
/// ○ Match a character other than '\n' lazily any number of times.<br/>
/// ○ Zero-width negative lookbehind.<br/>
int pos = base.runtextpos;
int matchStart = pos;
int lazyloop_pos = 0;
- int loop_iteration = 0, loop_starting_pos = 0;
- int stackpos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
- // Optional (greedy).
- //{
- loop_starting_pos = pos;
- loop_iteration = 0;
-
- LoopBody:
- Utilities.StackPush(ref base.runstack!, ref stackpos, loop_starting_pos, pos);
-
- loop_starting_pos = pos;
- loop_iteration++;
-
- // Zero-width negative lookbehind.
- {
- slice = inputSpan.Slice(pos);
- int negativelookbehind__starting_pos = pos;
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- // Match '\\' right-to-left.
- if ((uint)(pos - 1) >= inputSpan.Length || inputSpan[pos - 1] != '\\')
- {
- goto NegativeLookaroundMatch;
- }
- pos--;
-
- goto LoopIterationNoMatch;
-
- NegativeLookaroundMatch:
- pos = negativelookbehind__starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
-
- // The loop has an upper bound of 1. Continue iterating greedily if the upper bound hasn't
- // yet been reached (as long as the last iteration wasn't empty).
- if (pos != loop_starting_pos && loop_iteration == 0)
- {
- goto LoopBody;
- }
- goto LoopEnd;
-
- // The loop iteration failed. Put state back to the way it was before the iteration.
- LoopIterationNoMatch:
- if (--loop_iteration < 0)
- {
- // Unable to match the remainder of the expression after exhausting the loop.
- return false; // The input didn't match.
- }
- Utilities.StackPop(base.runstack!, ref stackpos, out pos, out loop_starting_pos);
- slice = inputSpan.Slice(pos);
- LoopEnd:;
- //}
-
// Match '"'.
if (slice.IsEmpty || slice[0] != '"')
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
// Match a character other than '\n' lazily any number of times.
slice = inputSpan.Slice(pos);
if (slice.IsEmpty || slice[0] == '\n')
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
pos++;
slice = inputSpan.Slice(pos);
// Zero-width negative lookbehind.
{
slice = inputSpan.Slice(pos);
- int negativelookbehind__starting_pos1 = pos;
+ int negativelookbehind__starting_pos = pos;
if (Utilities.s_hasTimeout)
{
base.CheckTimeout();
// Match '\\' right-to-left.
if ((uint)(pos - 1) >= inputSpan.Length || inputSpan[pos - 1] != '\\')
{
- goto NegativeLookaroundMatch1;
+ goto NegativeLookaroundMatch;
}
pos--;
goto LazyLoopBacktrack;
- NegativeLookaroundMatch1:
- pos = negativelookbehind__starting_pos1;
+ NegativeLookaroundMatch:
+ pos = negativelookbehind__starting_pos;
slice = inputSpan.Slice(pos);
}
/// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
-
- /// <summary>Pops 2 values from the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPop(int[] stack, ref int pos, out int arg0, out int arg1)
- {
- arg0 = stack[--pos];
- arg1 = stack[--pos];
- }
-
- /// <summary>Pushes 2 values onto the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPush(ref int[] stack, ref int pos, int arg0, int arg1)
- {
- // If there's space available for all 2 values, store them.
- int[] s = stack;
- int p = pos;
- if ((uint)(p + 1) < (uint)s.Length)
- {
- s[p] = arg0;
- s[p + 1] = arg1;
- pos += 2;
- return;
- }
-
- // Otherwise, resize the stack to make room and try again.
- WithResize(ref stack, ref pos, arg0, arg1);
-
- // <summary>Resize the backtracking stack array and push 2 values onto the stack.</summary>
- [MethodImpl(MethodImplOptions.NoInlining)]
- static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1)
- {
- Array.Resize(ref stack, (pos + 1) * 2);
- StackPush(ref stack, ref pos, arg0, arg1);
- }
- }
}
}"(?<!\\\\)?'.{1,2}?(?<!\\\\)'" (89 uses)
[GeneratedRegex("(?<!\\\\)?'.{1,2}?(?<!\\\\)'")] /// <code>(?<!\\\\)?'.{1,2}?(?<!\\\\)'</code><br/>
/// Explanation:<br/>
/// <code>
- /// ○ Optional (greedy).<br/>
- /// ○ Zero-width negative lookbehind.<br/>
- /// ○ Match '\\' right-to-left.<br/>
/// ○ Match '\''.<br/>
/// ○ Match a character other than '\n' lazily at least 1 and at most 2 times.<br/>
/// ○ Zero-width negative lookbehind.<br/>
int matchStart = pos;
int lazyloop_iteration = 0;
int lazyloop_pos = 0;
- int loop_iteration = 0, loop_starting_pos = 0;
- int stackpos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
- // Optional (greedy).
- //{
- loop_starting_pos = pos;
- loop_iteration = 0;
-
- LoopBody:
- Utilities.StackPush(ref base.runstack!, ref stackpos, loop_starting_pos, pos);
-
- loop_starting_pos = pos;
- loop_iteration++;
-
- // Zero-width negative lookbehind.
- {
- slice = inputSpan.Slice(pos);
- int negativelookbehind__starting_pos = pos;
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- // Match '\\' right-to-left.
- if ((uint)(pos - 1) >= inputSpan.Length || inputSpan[pos - 1] != '\\')
- {
- goto NegativeLookaroundMatch;
- }
- pos--;
-
- goto LoopIterationNoMatch;
-
- NegativeLookaroundMatch:
- pos = negativelookbehind__starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
-
- // The loop has an upper bound of 1. Continue iterating greedily if the upper bound hasn't
- // yet been reached (as long as the last iteration wasn't empty).
- if (pos != loop_starting_pos && loop_iteration == 0)
- {
- goto LoopBody;
- }
- goto LoopEnd;
-
- // The loop iteration failed. Put state back to the way it was before the iteration.
- LoopIterationNoMatch:
- if (--loop_iteration < 0)
- {
- // Unable to match the remainder of the expression after exhausting the loop.
- return false; // The input didn't match.
- }
- Utilities.StackPop(base.runstack!, ref stackpos, out pos, out loop_starting_pos);
- slice = inputSpan.Slice(pos);
- LoopEnd:;
- //}
-
// Match '\''.
if (slice.IsEmpty || slice[0] != '\'')
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
// Match a character other than '\n' lazily at least 1 and at most 2 times.
//{
if ((uint)slice.Length < 2 || slice[1] == '\n')
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
pos += 2;
LazyLoopBacktrack:
if (lazyloop_iteration >= 1)
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
lazyloop_iteration++;
if (Utilities.s_hasTimeout)
slice = inputSpan.Slice(pos);
if (slice.IsEmpty || slice[0] == '\n')
{
- goto LoopIterationNoMatch;
+ return false; // The input didn't match.
}
pos++;
slice = inputSpan.Slice(pos);
// Zero-width negative lookbehind.
{
slice = inputSpan.Slice(pos);
- int negativelookbehind__starting_pos1 = pos;
+ int negativelookbehind__starting_pos = pos;
if (Utilities.s_hasTimeout)
{
base.CheckTimeout();
// Match '\\' right-to-left.
if ((uint)(pos - 1) >= inputSpan.Length || inputSpan[pos - 1] != '\\')
{
- goto NegativeLookaroundMatch1;
+ goto NegativeLookaroundMatch;
}
pos--;
goto LazyLoopBacktrack;
- NegativeLookaroundMatch1:
- pos = negativelookbehind__starting_pos1;
+ NegativeLookaroundMatch:
+ pos = negativelookbehind__starting_pos;
slice = inputSpan.Slice(pos);
}
/// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
-
- /// <summary>Pops 2 values from the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPop(int[] stack, ref int pos, out int arg0, out int arg1)
- {
- arg0 = stack[--pos];
- arg1 = stack[--pos];
- }
-
- /// <summary>Pushes 2 values onto the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPush(ref int[] stack, ref int pos, int arg0, int arg1)
- {
- // If there's space available for all 2 values, store them.
- int[] s = stack;
- int p = pos;
- if ((uint)(p + 1) < (uint)s.Length)
- {
- s[p] = arg0;
- s[p + 1] = arg1;
- pos += 2;
- return;
- }
-
- // Otherwise, resize the stack to make room and try again.
- WithResize(ref stack, ref pos, arg0, arg1);
-
- // <summary>Resize the backtracking stack array and push 2 values onto the stack.</summary>
- [MethodImpl(MethodImplOptions.NoInlining)]
- static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1)
- {
- Array.Resize(ref stack, (pos + 1) * 2);
- StackPush(ref stack, ref pos, arg0, arg1);
- }
- }
}
}"(?<day>01|02|03|04|05|06|07|08|09|10|11|12|1 ..." (61 uses)
[GeneratedRegex("(?<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)日?(?=\\b|t|まで|から)?", RegexOptions.ExplicitCapture | RegexOptions.Singleline)] /// ○ Match '3'.<br/>
/// ○ Match a character in the set [01].<br/>
/// ○ Match a character in the set [3-9].<br/>
- /// ○ Match '日' greedily, optionally.<br/>
- /// ○ Optional (greedy).<br/>
- /// ○ Zero-width positive lookahead.<br/>
- /// ○ Match with 4 alternative expressions, atomically.<br/>
- /// ○ Match if at a word boundary.<br/>
- /// ○ Match 't'.<br/>
- /// ○ Match the string "まで".<br/>
- /// ○ Match the string "から".<br/>
+ /// ○ Match '日' atomically, optionally.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")]
int alternation_starting_pos1 = 0;
int alternation_starting_pos2 = 0;
int capture_starting_pos = 0;
- int charloop_capture_pos = 0;
- int charloop_starting_pos = 0, charloop_ending_pos = 0;
- int loop_iteration = 0, loop_starting_pos = 0;
- int stackpos = 0;
- int startingStackpos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// "day" capture group.
CaptureSkipBacktrack:;
//}
- // Match '日' greedily, optionally.
- //{
- charloop_starting_pos = pos;
-
+ // Match '日' atomically, optionally.
+ {
if (!slice.IsEmpty && slice[0] == '日')
{
slice = slice.Slice(1);
pos++;
}
-
- charloop_ending_pos = pos;
- goto CharLoopEnd;
-
- CharLoopBacktrack:
- UncaptureUntil(charloop_capture_pos);
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos >= charloop_ending_pos)
- {
- goto CaptureBacktrack;
- }
- pos = --charloop_ending_pos;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd:
- charloop_capture_pos = base.Crawlpos();
- //}
-
- // Optional (greedy).
- {
- startingStackpos = stackpos;
- loop_starting_pos = pos;
- loop_iteration = 0;
-
- LoopBody:
- Utilities.StackPush(ref base.runstack!, ref stackpos, base.Crawlpos(), loop_starting_pos, pos);
-
- loop_starting_pos = pos;
- loop_iteration++;
-
- // Zero-width positive lookahead.
- {
- int positivelookahead_starting_pos = pos;
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- // Match with 4 alternative expressions, atomically.
- {
- int alternation_starting_pos3 = pos;
-
- // Branch 0
- {
- // Match if at a word boundary.
- if (!Utilities.IsBoundary(inputSpan, pos))
- {
- goto AlternationBranch6;
- }
-
- goto AlternationMatch3;
-
- AlternationBranch6:
- pos = alternation_starting_pos3;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 1
- {
- // Match 't'.
- if (slice.IsEmpty || slice[0] != 't')
- {
- goto AlternationBranch7;
- }
-
- pos++;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch3;
-
- AlternationBranch7:
- pos = alternation_starting_pos3;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 2
- {
- // Match the string "まで".
- if (!slice.StartsWith("まで"))
- {
- goto AlternationBranch8;
- }
-
- pos += 2;
- slice = inputSpan.Slice(pos);
- goto AlternationMatch3;
-
- AlternationBranch8:
- pos = alternation_starting_pos3;
- slice = inputSpan.Slice(pos);
- }
-
- // Branch 3
- {
- // Match the string "から".
- if (!slice.StartsWith("から"))
- {
- goto LoopIterationNoMatch;
- }
-
- pos += 2;
- slice = inputSpan.Slice(pos);
- }
-
- AlternationMatch3:;
- }
-
- pos = positivelookahead_starting_pos;
- slice = inputSpan.Slice(pos);
- }
-
-
- // The loop has an upper bound of 1. Continue iterating greedily if the upper bound hasn't
- // yet been reached (as long as the last iteration wasn't empty).
- if (pos != loop_starting_pos && loop_iteration == 0)
- {
- goto LoopBody;
- }
- goto LoopEnd;
-
- // The loop iteration failed. Put state back to the way it was before the iteration.
- LoopIterationNoMatch:
- if (--loop_iteration < 0)
- {
- // Unable to match the remainder of the expression after exhausting the loop.
- goto CharLoopBacktrack;
- }
- Utilities.StackPop(base.runstack!, ref stackpos, out pos, out loop_starting_pos);
- UncaptureUntil(base.runstack![--stackpos]);
- slice = inputSpan.Slice(pos);
- LoopEnd:
- stackpos = startingStackpos; // Ensure any remaining backtracking state is removed.
}
// The input matched.
/// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
-
- /// <summary>Determines whether the specified index is a boundary.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)
- {
- int indexMinus1 = index - 1;
- return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=
- ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));
-
- static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\u200C' | ch == '\u200D');
- }
-
- /// <summary>Determines whether the character is part of the [\w] set.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static bool IsWordChar(char ch)
- {
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
- // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
- int chDiv8 = ch >> 3;
- return (uint)chDiv8 < (uint)ascii.Length ?
- (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
- (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
- }
-
- /// <summary>Pops 2 values from the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPop(int[] stack, ref int pos, out int arg0, out int arg1)
- {
- arg0 = stack[--pos];
- arg1 = stack[--pos];
- }
-
- /// <summary>Pushes 3 values onto the backtracking stack.</summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static void StackPush(ref int[] stack, ref int pos, int arg0, int arg1, int arg2)
- {
- // If there's space available for all 3 values, store them.
- int[] s = stack;
- int p = pos;
- if ((uint)(p + 2) < (uint)s.Length)
- {
- s[p] = arg0;
- s[p + 1] = arg1;
- s[p + 2] = arg2;
- pos += 3;
- return;
- }
-
- // Otherwise, resize the stack to make room and try again.
- WithResize(ref stack, ref pos, arg0, arg1, arg2);
-
- // <summary>Resize the backtracking stack array and push 3 values onto the stack.</summary>
- [MethodImpl(MethodImplOptions.NoInlining)]
- static void WithResize(ref int[] stack, ref int pos, int arg0, int arg1, int arg2)
- {
- Array.Resize(ref stack, (pos + 2) * 2);
- StackPush(ref stack, ref pos, arg0, arg1, arg2);
- }
- }
}
}For more diff examples, see https://gist.github.com/MihuBot/a78f3bd6a086ff6dbc537a93cce8b69b
Total bytes of base: 54274946
Total bytes of diff: 54187026
Total bytes of delta: -87920 (-0.16 % of base)
Total relative delta: -11.21
diff is an improvement.
relative diff is an improvement.
For a list of JIT diff regressions, see Regressions.md
For a list of JIT diff improvements, see Improvements.md
Sample source code for further analysis
const string JsonPath = "RegexResults-1283.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/E2hp_ZI");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}Artifacts:
- ShortExampleDiffs.md (27 KB)
- LongExampleDiffs.md (895 KB)
- Results.zip (42 MB)
- JitAnalyzeSummary.txt (40 KB)
- JitDiffRegressions.md (270 KB)
- JitDiffImprovements.md (893 KB)
- LongJitDiffImprovements.md (8 MB)
- jit-diffs.zip (391 MB)
Metadata
Metadata
Assignees
Labels
No labels