Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,15 @@ private unsafe int IcuIndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char>
}
else
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr);
#endif
// GetReference may return nullptr if the input span is defaulted. The native layer handles
// this appropriately; no workaround is needed on the managed side.

fixed (char* pSource = &MemoryMarshal.GetReference(source))
fixed (char* pTarget = &MemoryMarshal.GetReference(target))
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return IndexOfCoreNative(pTarget, target.Length, pSource, source.Length, options, fromBeginning, matchLengthPtr);
#endif
if (fromBeginning)
return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr);
else
Expand Down Expand Up @@ -207,7 +206,7 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, Rea
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr);
return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr);
#endif
if (fromBeginning)
return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
Expand Down Expand Up @@ -301,7 +300,7 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr);
return IndexOfCoreNative(target, source, options, fromBeginning, matchLengthPtr);
#endif
if (fromBeginning)
return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
Expand All @@ -328,13 +327,13 @@ private unsafe bool IcuStartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char>
}
else
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeStartsWith(prefix, source, options);
#endif
fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced)
fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeStartsWith(pPrefix, prefix.Length, pSource, source.Length, options);
#endif
return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options, matchLengthPtr);
}
}
Expand Down Expand Up @@ -416,7 +415,7 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source,
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeStartsWith(bp, prefix.Length, ap, source.Length, options);
return NativeStartsWith(prefix, source, options);
#endif
return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr);
}
Expand Down Expand Up @@ -488,7 +487,7 @@ private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlyS
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeStartsWith(bp, prefix.Length, ap, source.Length, options);
return NativeStartsWith(prefix, source, options);
#endif
return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr);
}
Expand All @@ -512,13 +511,13 @@ private unsafe bool IcuEndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> su
}
else
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeEndsWith(suffix, source, options);
#endif
fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced)
fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
{
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeEndsWith(pSuffix, suffix.Length, pSource, source.Length, options);
#endif
return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options, matchLengthPtr);
}
}
Expand Down Expand Up @@ -601,7 +600,7 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, R
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeEndsWith(bp, suffix.Length, ap, source.Length, options);
return NativeEndsWith(suffix, source, options);
#endif
return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr);
}
Expand Down Expand Up @@ -673,7 +672,7 @@ private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpa
InteropCall:
#if TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
if (GlobalizationMode.Hybrid)
return NativeEndsWith(bp, suffix.Length, ap, source.Length, options);
return NativeEndsWith(suffix, source, options);
#endif
return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ private unsafe int CompareStringNative(ReadOnlySpan<char> string1, ReadOnlySpan<

AssertComparisonSupported(options);

// Handle IgnoreSymbols preprocessing
if ((options & CompareOptions.IgnoreSymbols) != 0)
{
string1 = FilterSymbolsFromSpan(string1, out _);
string2 = FilterSymbolsFromSpan(string2, out _);

// Remove the flag before passing to native since we handled it here
options &= ~CompareOptions.IgnoreSymbols;
}

// GetReference may return nullptr if the input span is defaulted. The native layer handles
// this appropriately; no workaround is needed on the managed side.
int result;
Expand All @@ -39,38 +49,211 @@ private unsafe int CompareStringNative(ReadOnlySpan<char> string1, ReadOnlySpan<
return result;
}

private unsafe int IndexOfCoreNative(char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, bool fromBeginning, int* matchLengthPtr)
private unsafe int IndexOfCoreNative(ReadOnlySpan<char> target, ReadOnlySpan<char> source, CompareOptions options, bool fromBeginning, int* matchLengthPtr)
{
AssertComparisonSupported(options);
// We only implement managed preprocessing for IgnoreSymbols.
bool ignoreSymbols = (options & CompareOptions.IgnoreSymbols) != 0;
int[]? sourceIndexMap = null; // maps each char index in filteredSource to original source char index

// If we are ignoring symbols, preprocess the strings by removing specified Unicode categories.
if (ignoreSymbols)
{
target = FilterSymbolsFromSpan(target, out _);
source = FilterSymbolsFromSpan(source, out sourceIndexMap);

// Remove the flag before passing to native since we handled it here
options &= ~CompareOptions.IgnoreSymbols;
}

int nativeLocation;
int nativeLength;
fixed (char* pTarget = &MemoryMarshal.GetReference(target))
fixed (char* pSource = &MemoryMarshal.GetReference(source))
{
Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, pTarget, target.Length, pSource, source.Length, options, fromBeginning);
Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND);
if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND)
throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions);
nativeLocation = result.Location;
nativeLength = result.Length;
}

// If not ignoring symbols / nothing found / an error code / no source index map (no ignorable symbols in source string), just propagate.
if (!ignoreSymbols || nativeLocation < 0 || sourceIndexMap == null)
{
if (matchLengthPtr != null)
*matchLengthPtr = nativeLength;
return nativeLocation;
}

// If ignoring symbols, map filtered indices back to original indices, expanding match length to include removed symbol chars inside the span.
// nativeLocation is index into filtered source; nativeLength is length in filtered source UTF-16 code units.
int originalStart = sourceIndexMap[nativeLocation];
int filteredEnd = nativeLocation + nativeLength - 1;

Debug.Assert(filteredEnd < sourceIndexMap.Length,
$"Filtered end index {filteredEnd} should not exceed the length of the filtered string {sourceIndexMap.Length}. nativeLocation={nativeLocation}, nativeLength={nativeLength}");

// Find the end position of the character at filteredEnd in the original string.
int endCharStartPos = sourceIndexMap[filteredEnd];

// Check if the previous position belongs to the same character (first unit of a surrogate pair)
int firstUnit = (filteredEnd > 0 && sourceIndexMap[filteredEnd - 1] == endCharStartPos)
? filteredEnd - 1
: filteredEnd;

// Check if the next position belongs to the same character (second unit of a surrogate pair)
int lastUnit = (filteredEnd + 1 < sourceIndexMap.Length && sourceIndexMap[filteredEnd + 1] == endCharStartPos)
? filteredEnd + 1
: filteredEnd;

int endCharWidth = lastUnit - firstUnit + 1;
int originalEnd = endCharStartPos + endCharWidth;
int originalLength = originalEnd - originalStart;

Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, target, cwTargetLength, pSource, cwSourceLength, options, fromBeginning);
Debug.Assert(result.Location != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND);
if (result.Location == (int)ErrorCodes.ERROR_MIXED_COMPOSITION_NOT_FOUND)
throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions);
if (matchLengthPtr != null)
*matchLengthPtr = result.Length;
*matchLengthPtr = originalLength;
return originalStart;
}

/// <summary>
/// Determines whether the specified rune should be ignored when using CompareOptions.IgnoreSymbols.
/// </summary>
/// <param name="rune">The rune to check.</param>
/// <returns>
/// <c>true</c> if the rune should be ignored; otherwise, <c>false</c>.
/// </returns>
/// <remarks>
/// This method returns <c>true</c> for:
/// - All separator categories (SpaceSeparator, LineSeparator, ParagraphSeparator)
/// - All punctuation categories (ConnectorPunctuation through OtherPunctuation)
/// - All symbol categories (MathSymbol through ModifierSymbol)
/// - Whitespace control characters (tab, line feed, vertical tab, form feed, carriage return, etc.)
/// </remarks>
private static bool IsIgnorableSymbol(Rune rune)
{
UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(rune.Value);

// Check for separator categories (11-13)
if (category >= UnicodeCategory.SpaceSeparator && category <= UnicodeCategory.ParagraphSeparator)
return true;

// Check for punctuation/symbol categories (18-27)
if (category >= UnicodeCategory.ConnectorPunctuation && category <= UnicodeCategory.ModifierSymbol)
return true;

// For Control (14) and Format (15) categories, only include whitespace characters
// This includes: tab (U+0009), LF (U+000A), VT (U+000B), FF (U+000C), CR (U+000D), NEL (U+0085)
if (category == UnicodeCategory.Control || category == UnicodeCategory.Format)
return Rune.IsWhiteSpace(rune);

return result.Location;
return false;
}

private unsafe bool NativeStartsWith(char* pPrefix, int cwPrefixLength, char* pSource, int cwSourceLength, CompareOptions options)
private unsafe bool NativeStartsWith(ReadOnlySpan<char> prefix, ReadOnlySpan<char> source, CompareOptions options)
{
AssertComparisonSupported(options);

int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, cwPrefixLength, pSource, cwSourceLength, options);
// Handle IgnoreSymbols preprocessing
if ((options & CompareOptions.IgnoreSymbols) != 0)
{
prefix = FilterSymbolsFromSpan(prefix, out _);
source = FilterSymbolsFromSpan(source, out _);

// Remove the flag before passing to native since we handled it here
options &= ~CompareOptions.IgnoreSymbols;
}

int result;
fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
fixed (char* pSource = &MemoryMarshal.GetReference(source))
{
result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, prefix.Length, pSource, source.Length, options);
}
Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND);

return result > 0 ? true : false;
return result > 0;
}

private unsafe bool NativeEndsWith(char* pSuffix, int cwSuffixLength, char* pSource, int cwSourceLength, CompareOptions options)
private unsafe bool NativeEndsWith(ReadOnlySpan<char> suffix, ReadOnlySpan<char> source, CompareOptions options)
{
AssertComparisonSupported(options);

int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, cwSuffixLength, pSource, cwSourceLength, options);
// Handle IgnoreSymbols preprocessing
if ((options & CompareOptions.IgnoreSymbols) != 0)
{
suffix = FilterSymbolsFromSpan(suffix, out _);
source = FilterSymbolsFromSpan(source, out _);

// Remove the flag before passing to native since we handled it here
options &= ~CompareOptions.IgnoreSymbols;
}

int result;
fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix))
fixed (char* pSource = &MemoryMarshal.GetReference(source))
{
result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, suffix.Length, pSource, source.Length, options);
}
Debug.Assert(result != (int)ErrorCodes.ERROR_COMPARISON_OPTIONS_NOT_FOUND);

return result > 0 ? true : false;
return result > 0;
}

/// <summary>
/// Filters out ignorable symbol characters from the input span.
/// </summary>
/// <param name="input">The input span to filter.</param>
/// <param name="indexMap">
/// When this method returns, contains a mapping array where each index in the filtered output
/// maps to the corresponding character start position in the original input span.
/// This parameter is passed uninitialized and will be null if no symbols were removed.
/// </param>
/// <returns>
/// A read-only span with ignorable symbols removed. If no symbols were found, returns the
/// original input span unchanged.
/// </returns>
private static ReadOnlySpan<char> FilterSymbolsFromSpan(ReadOnlySpan<char> input, out int[]? indexMap)
{
int length = input.Length;
bool hasSymbols = false;
List<char> keptChars = new List<char>(length);
List<int> mapping = new List<int>(length);
// TODO: Use ArrayPool<char> for keptChars and mapping to avoid allocations.
for (int i = 0; i < length;)
{
Rune.DecodeFromUtf16(input.Slice(i), out Rune rune, out int consumed);
bool remove = IsIgnorableSymbol(rune);

if (!remove)
{
// Copy the UTF-16 units and map each filtered position to the start of the original character
for (int j = 0; j < consumed; j++)
{
keptChars.Add(input[i + j]);
mapping.Add(i);
}
}
else
{
hasSymbols = true;
}

i += consumed;
}

if (!hasSymbols)
{
// No symbols removed; return original span and no mapping.
indexMap = null;
return input;
}
else
{
indexMap = mapping.ToArray();
return keptChars.ToArray();
}
}

private static void AssertComparisonSupported(CompareOptions options)
Expand All @@ -80,7 +263,7 @@ private static void AssertComparisonSupported(CompareOptions options)
}

private const CompareOptions SupportedCompareOptions = CompareOptions.None | CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace |
CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType;
CompareOptions.IgnoreWidth | CompareOptions.StringSort | CompareOptions.IgnoreKanaType | CompareOptions.IgnoreSymbols;

private static string GetPNSE(CompareOptions options) =>
SR.Format(SR.PlatformNotSupported_HybridGlobalizationWithCompareOptions, options);
Expand Down
Loading
Loading