From 7b1c2f1b76cae0e088f1e11b726855d4079992bd Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Tue, 17 Jun 2025 16:34:00 -0600 Subject: [PATCH 1/3] Stop passing original string for error reporting --- .../Formatting/Date+HTTPFormatStyle.swift | 58 +++++++-------- .../DateComponents+ISO8601FormatStyle.swift | 74 +++++++++---------- .../Formatting/FormatParsingUtilities.swift | 14 +++- 3 files changed, 77 insertions(+), 69 deletions(-) diff --git a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift index f3860f457..7077f87e4 100644 --- a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift @@ -68,7 +68,7 @@ extension Date { let result = v.withUTF8 { buffer -> (Int, Date)? in let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? componentsStyle.components(from: value, in: view) else { + guard let comps = try? componentsStyle.components(in: view) else { return nil } @@ -324,7 +324,7 @@ extension DateComponents { let result = v.withUTF8 { buffer -> (Int, DateComponents)? in let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? components(from: value, in: view) else { + guard let comps = try? components(in: view) else { return nil } @@ -338,8 +338,8 @@ extension DateComponents { let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0) return (endIndex, result.1) } - - fileprivate func components(from inputString: String, in view: borrowing BufferView) throws -> ComponentsParseResult { + + fileprivate func components(in view: borrowing BufferView) throws -> ComponentsParseResult { // https://www.rfc-editor.org/rfc/rfc9110.html#http.date // , :: GMT @@ -348,7 +348,7 @@ extension DateComponents { // Despite the spec, we allow the weekday name to be optional. guard let maybeWeekday1 = it.peek() else { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) } if isASCIIDigit(maybeWeekday1) { @@ -356,9 +356,9 @@ extension DateComponents { } else { // Anything else must be a day-name (Mon, Tue, ... Sun) guard let weekday1 = it.next(), let weekday2 = it.next(), let weekday3 = it.next() else { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) } - + dc.weekday = switch (weekday1, weekday2, weekday3) { case (UInt8(ascii: "S"), UInt8(ascii: "u"), UInt8(ascii: "n")): 1 @@ -375,20 +375,20 @@ extension DateComponents { case (UInt8(ascii: "S"), UInt8(ascii: "a"), UInt8(ascii: "t")): 7 default: - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Malformed weekday name") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Malformed weekday name") } // Move past , and space to weekday - try it.expectCharacter(UInt8(ascii: ","), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing , after weekday") - try it.expectCharacter(UInt8(ascii: " "), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing space after weekday") + try it.expectCharacter(UInt8(ascii: ","), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing , after weekday") + try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing space after weekday") } - dc.day = try it.digits(minDigits: 2, maxDigits: 2, input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing or malformed day") - try it.expectCharacter(UInt8(ascii: " "), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + dc.day = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing or malformed day") + try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) // month-name (Jan, Feb, ... Dec) guard let month1 = it.next(), let month2 = it.next(), let month3 = it.next() else { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing month") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing month") } dc.month = switch (month1, month2, month3) { @@ -417,32 +417,32 @@ extension DateComponents { case (UInt8(ascii: "D"), UInt8(ascii: "e"), UInt8(ascii: "c")): 12 default: - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Month \(String(describing: dc.month)) is out of bounds") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Month \(String(describing: dc.month)) is out of bounds") } - try it.expectCharacter(UInt8(ascii: " "), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) - dc.year = try it.digits(minDigits: 4, maxDigits: 4, input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) - try it.expectCharacter(UInt8(ascii: " "), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + dc.year = try it.digits(minDigits: 4, maxDigits: 4, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) - let hour = try it.digits(minDigits: 2, maxDigits: 2, input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + let hour = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) if hour < 0 || hour > 23 { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Hour \(hour) is out of bounds") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Hour \(hour) is out of bounds") } dc.hour = hour - try it.expectCharacter(UInt8(ascii: ":"), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) - let minute = try it.digits(minDigits: 2, maxDigits: 2, input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + let minute = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) if minute < 0 || minute > 59 { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Minute \(minute) is out of bounds") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Minute \(minute) is out of bounds") } dc.minute = minute - try it.expectCharacter(UInt8(ascii: ":"), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) - let second = try it.digits(minDigits: 2, maxDigits: 2, input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + let second = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) // second '60' is supported in the spec for leap seconds, but Foundation does not support leap seconds. 60 is adjusted to 59. if second < 0 || second > 60 { - throw parseError(inputString, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Second \(second) is out of bounds") + throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Second \(second) is out of bounds") } // Foundation does not support leap seconds. We convert 60 seconds into 59 seconds. if second == 60 { @@ -450,12 +450,12 @@ extension DateComponents { } else { dc.second = second } - try it.expectCharacter(UInt8(ascii: " "), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now)) + try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) // "GMT" - try it.expectCharacter(UInt8(ascii: "G"), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") - try it.expectCharacter(UInt8(ascii: "M"), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") - try it.expectCharacter(UInt8(ascii: "T"), input: inputString, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") + try it.expectCharacter(UInt8(ascii: "G"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") + try it.expectCharacter(UInt8(ascii: "M"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") + try it.expectCharacter(UInt8(ascii: "T"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") // Time zone is always GMT, calendar is always Gregorian dc.timeZone = .gmt diff --git a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift index 7bf1d3dc0..280fbcbfd 100644 --- a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift @@ -407,7 +407,7 @@ extension DateComponents.ISO8601FormatStyle { var components: DateComponents } - private func components(from inputString: String, fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: borrowing BufferView) throws -> ComponentsParseResult { + private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: borrowing BufferView) throws -> ComponentsParseResult { let fields = formatFields var it = view.makeIterator() @@ -429,7 +429,7 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.year) { let max = dateSeparator == .omitted ? 4 : nil - let value = try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) if fields.contains(.weekOfYear) { yearForWeekOfYear = value } else { @@ -444,30 +444,30 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.month) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) } // parse month digits let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) guard _calendar.maximumRange(of: .month)!.contains(value) else { - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } month = value needsSeparator = true } else if fields.contains(.weekOfYear) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) } // parse W - try it.expectCharacter(UInt8(ascii: "W"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: "W"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) // parse week of year digits let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) guard _calendar.maximumRange(of: .weekOfYear)!.contains(value) else { - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } weekOfYear = value @@ -479,26 +479,26 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.day) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) } if fields.contains(.weekOfYear) { // parse day of week ('ee') // ISO8601 "1" is Monday. For our date components, 2 is Monday. Add 1 to account for difference. let max = dateSeparator == .omitted ? 2 : nil - let value = (try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) % 7) + 1 - + let value = (try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) % 7) + 1 + guard _calendar.maximumRange(of: .weekday)!.contains(value) else { - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } weekday = value } else if fields.contains(.month) { // parse day of month ('dd') let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) guard _calendar.maximumRange(of: .day)!.contains(value) else { - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } day = value @@ -506,9 +506,9 @@ extension DateComponents.ISO8601FormatStyle { } else { // parse 3 digit day of year ('DDD') let max = dateSeparator == .omitted ? 3 : nil - let value = try it.digits(maxDigits: max, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) guard _calendar.maximumRange(of: .dayOfYear)!.contains(value) else { - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } dayOfYear = value @@ -522,24 +522,24 @@ extension DateComponents.ISO8601FormatStyle { switch dateTimeSeparator { case .standard: // parse T - try it.expectCharacter(UInt8(ascii: "T"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: "T"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) case .space: // parse any number of spaces - try it.expectOneOrMoreCharacters(UInt8(ascii: " "), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectOneOrMoreCharacters(UInt8(ascii: " "), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) } } switch timeSeparator { case .colon: - hour = try it.digits(input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - try it.expectCharacter(UInt8(ascii: ":"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - minute = try it.digits(input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - try it.expectCharacter(UInt8(ascii: ":"), input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - second = try it.digits(input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + hour = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + minute = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + second = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) case .omitted: - hour = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - minute = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - second = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + hour = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + minute = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + second = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) } // When parsing, fractional seconds are always optional (as of Swift 6.2). @@ -547,7 +547,7 @@ extension DateComponents.ISO8601FormatStyle { if let next = it.peek(), next == UInt8(ascii: ".") { // Looks like a fractional seconds let _ = it.next() // consume the period - let fractionalSeconds = try it.digits(nanoseconds: true, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let fractionalSeconds = try it.digits(nanoseconds: true, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) nanosecond = fractionalSeconds } @@ -562,7 +562,7 @@ extension DateComponents.ISO8601FormatStyle { guard let plusOrMinusOrZ = it.next() else { // Expected time zone - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } @@ -606,7 +606,7 @@ extension DateComponents.ISO8601FormatStyle { positive = false } else { // Expected time zone, found garbage - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } if !skipDigits { @@ -614,8 +614,8 @@ extension DateComponents.ISO8601FormatStyle { // parse Time Zone: ISO8601 extended hms?, with Z // examples: -08:00, -07:52:58, Z - let hours = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - + let hours = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + // Expect a colon, or a minutes value, or the end. let expectMinutes: Bool if let next = it.peek() { @@ -641,8 +641,8 @@ extension DateComponents.ISO8601FormatStyle { tzOffset = hours * 3600 } else { // Continue on - let minutes = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - + let minutes = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + if let maybeColon = it.peek(), maybeColon == UInt8(ascii: ":") { // Throw it away it.advance() @@ -650,7 +650,7 @@ extension DateComponents.ISO8601FormatStyle { if let secondsTens = it.peek(), isASCIIDigit(secondsTens) { // We have seconds - let seconds = try it.digits(maxDigits: 2, input: inputString, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + let seconds = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) tzOffset = (hours * 3600) + (minutes * 60) + seconds } else { // If the next character is missing, that's allowed - the time can be something like just -0852 and then the string can end @@ -664,7 +664,7 @@ extension DateComponents.ISO8601FormatStyle { } else { guard let parsedTimeZone = TimeZone(secondsFromGMT: positive ? tzOffset : -tzOffset) else { // Out of range time zone - throw parseError(inputString, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) } timeZone = parsedTimeZone @@ -740,7 +740,7 @@ extension DateComponents.ISO8601FormatStyle : ParseStrategy { let result = v.withUTF8 { buffer -> (Int, DateComponents)? in let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? components(from: value, fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: view) else { + guard let comps = try? components(fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: view) else { return nil } diff --git a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift index a7142388f..83c97c7d9 100644 --- a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift +++ b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift @@ -10,6 +10,14 @@ // //===----------------------------------------------------------------------===// +internal // NOTE: internal because BufferView is internal, `parseError` below is `package` +func parseError( + _ value: BufferView, exampleFormattedString: String?, extendedDescription: String? = nil +) -> CocoaError { + // TODO: change to UTF8Span, and prototype string append and interpolation taking UTF8Span + parseError(String(decoding: value, as: UTF8.self), exampleFormattedString: exampleFormattedString, extendedDescription: extendedDescription) +} + package func parseError(_ value: String, exampleFormattedString: String?, extendedDescription: String? = nil) -> CocoaError { let errorStr: String if let exampleFormattedString = exampleFormattedString { @@ -25,13 +33,13 @@ func isASCIIDigit(_ x: UInt8) -> Bool { } extension BufferViewIterator { - mutating func expectCharacter(_ expected: UInt8, input: String, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { + mutating func expectCharacter(_ expected: UInt8, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { guard let parsed = next(), parsed == expected else { throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) } } - mutating func expectOneOrMoreCharacters(_ expected: UInt8, input: String, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { + mutating func expectOneOrMoreCharacters(_ expected: UInt8, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { guard let parsed = next(), parsed == expected else { throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) } @@ -47,7 +55,7 @@ extension BufferViewIterator { } } - mutating func digits(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false, input: String, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws -> Int { + mutating func digits(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws -> Int { // Consume all leading zeros, parse until we no longer see a digit var result = 0 var count = 0 From 983c099363dcd2441b0d8d172a0275a5dc5712f5 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Wed, 18 Jun 2025 08:35:06 -0600 Subject: [PATCH 2/3] Fully disentangle diagnostics from iterating --- .../Formatting/Date+HTTPFormatStyle.swift | 86 ++++++++---- .../DateComponents+ISO8601FormatStyle.swift | 132 ++++++++++++------ .../Formatting/FormatParsingUtilities.swift | 81 +++++++---- .../JSON/BufferViewIterator.swift | 5 + 4 files changed, 208 insertions(+), 96 deletions(-) diff --git a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift index 7077f87e4..1cd4a3d4a 100644 --- a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift @@ -343,12 +343,17 @@ extension DateComponents { // https://www.rfc-editor.org/rfc/rfc9110.html#http.date // , :: GMT + // Produce an error message to throw + func error(_ extendedDescription: String? = nil) -> CocoaError { + parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: extendedDescription) + } + var it = view.makeIterator() var dc = DateComponents() // Despite the spec, we allow the weekday name to be optional. guard let maybeWeekday1 = it.peek() else { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) + throw error() } if isASCIIDigit(maybeWeekday1) { @@ -356,7 +361,7 @@ extension DateComponents { } else { // Anything else must be a day-name (Mon, Tue, ... Sun) guard let weekday1 = it.next(), let weekday2 = it.next(), let weekday3 = it.next() else { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now)) + throw error() } dc.weekday = switch (weekday1, weekday2, weekday3) { @@ -375,20 +380,30 @@ extension DateComponents { case (UInt8(ascii: "S"), UInt8(ascii: "a"), UInt8(ascii: "t")): 7 default: - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Malformed weekday name") + throw error("Malformed weekday name") } // Move past , and space to weekday - try it.expectCharacter(UInt8(ascii: ","), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing , after weekday") - try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing space after weekday") + guard it.matchByte(UInt8(ascii: ",")) else { + throw error("Missing , after weekday") + } + guard it.matchByte(UInt8(ascii: " ")) else { + throw error("Missing space after weekday") + } } - dc.day = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing or malformed day") - try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard let day = it.parseNumber(minDigits: 2, maxDigits: 2) else { + throw error("Missing or malformed day") + } + dc.day = day + + guard it.matchByte(UInt8(ascii: " ")) else { + throw error() + } // month-name (Jan, Feb, ... Dec) guard let month1 = it.next(), let month2 = it.next(), let month3 = it.next() else { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing month") + throw error("Missing month") } dc.month = switch (month1, month2, month3) { @@ -417,32 +432,50 @@ extension DateComponents { case (UInt8(ascii: "D"), UInt8(ascii: "e"), UInt8(ascii: "c")): 12 default: - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Month \(String(describing: dc.month)) is out of bounds") + throw error("Month \(String(describing: dc.month)) is out of bounds") + } + + guard it.matchByte(UInt8(ascii: " ")) else { + throw error() } - try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard let year = it.parseNumber(minDigits: 4, maxDigits: 4) else { + throw error() + } + dc.year = year - dc.year = try it.digits(minDigits: 4, maxDigits: 4, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) - try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard it.matchByte(UInt8(ascii: " ")) else { + throw error() + } - let hour = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard let hour = it.parseNumber(minDigits: 2, maxDigits: 2) else { + throw error() + } if hour < 0 || hour > 23 { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Hour \(hour) is out of bounds") + throw error("Hour \(hour) is out of bounds") } dc.hour = hour - try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) - let minute = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard it.matchByte(UInt8(ascii: ":")) else { + throw error() + } + guard let minute = it.parseNumber(minDigits: 2, maxDigits: 2) else { + throw error() + } if minute < 0 || minute > 59 { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Minute \(minute) is out of bounds") + throw error("Minute \(minute) is out of bounds") } dc.minute = minute - try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) - let second = try it.digits(minDigits: 2, maxDigits: 2, input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard it.matchByte(UInt8(ascii: ":")) else { + throw error() + } + guard let second = it.parseNumber(minDigits: 2, maxDigits: 2) else { + throw error() + } // second '60' is supported in the spec for leap seconds, but Foundation does not support leap seconds. 60 is adjusted to 59. if second < 0 || second > 60 { - throw parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Second \(second) is out of bounds") + throw error("Second \(second) is out of bounds") } // Foundation does not support leap seconds. We convert 60 seconds into 59 seconds. if second == 60 { @@ -450,12 +483,17 @@ extension DateComponents { } else { dc.second = second } - try it.expectCharacter(UInt8(ascii: " "), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now)) + guard it.matchByte(UInt8(ascii: " ")) else { + throw error() + } // "GMT" - try it.expectCharacter(UInt8(ascii: "G"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") - try it.expectCharacter(UInt8(ascii: "M"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") - try it.expectCharacter(UInt8(ascii: "T"), input: view, onFailure: Date.HTTPFormatStyle().format(Date.now), extendedDescription: "Missing GMT time zone") + guard it.matchByte(UInt8(ascii: "G")), + it.matchByte(UInt8(ascii: "M")), + it.matchByte(UInt8(ascii: "T")) + else { + throw error("Missing GMT time zone") + } // Time zone is always GMT, calendar is always Gregorian dc.timeZone = .gmt diff --git a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift index 280fbcbfd..95327f750 100644 --- a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift @@ -409,7 +409,12 @@ extension DateComponents.ISO8601FormatStyle { private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: borrowing BufferView) throws -> ComponentsParseResult { let fields = formatFields - + + // Produce an error message to throw + func error(_ extendedDescription: String? = nil) -> CocoaError { + parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now), extendedDescription: extendedDescription) + } + var it = view.makeIterator() var needsSeparator = false @@ -429,7 +434,9 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.year) { let max = dateSeparator == .omitted ? 4 : nil - let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let value = it.parseNumber(maxDigits: max) else { + throw error() + } if fields.contains(.weekOfYear) { yearForWeekOfYear = value } else { @@ -444,30 +451,38 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.month) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard it.matchByte(UInt8(ascii: "-")) else { + throw error() + } } // parse month digits let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - guard _calendar.maximumRange(of: .month)!.contains(value) else { - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let value = it.parseNumber(maxDigits: max), + _calendar.maximumRange(of: .month)!.contains(value) + else { + throw error() } month = value needsSeparator = true } else if fields.contains(.weekOfYear) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard it.matchByte(UInt8(ascii: "-")) else { + throw error() + } } // parse W - try it.expectCharacter(UInt8(ascii: "W"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard it.matchByte(UInt8(ascii: "W")) else { + throw error() + } // parse week of year digits let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - guard _calendar.maximumRange(of: .weekOfYear)!.contains(value) else { - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let value = it.parseNumber(maxDigits: max), + _calendar.maximumRange(of: .weekOfYear)!.contains(value) + else { + throw error() } weekOfYear = value @@ -479,26 +494,32 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.day) { if needsSeparator && dateSeparator == .dash { - try it.expectCharacter(UInt8(ascii: "-"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard it.matchByte(UInt8(ascii: "-")) else { + throw error() + } } if fields.contains(.weekOfYear) { // parse day of week ('ee') // ISO8601 "1" is Monday. For our date components, 2 is Monday. Add 1 to account for difference. let max = dateSeparator == .omitted ? 2 : nil - let value = (try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) % 7) + 1 + guard let n = it.parseNumber(maxDigits: max) else { + throw error() + } + let value = (n % 7) + 1 guard _calendar.maximumRange(of: .weekday)!.contains(value) else { - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw error() } weekday = value } else if fields.contains(.month) { // parse day of month ('dd') let max = dateSeparator == .omitted ? 2 : nil - let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - guard _calendar.maximumRange(of: .day)!.contains(value) else { - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let value = it.parseNumber(maxDigits: max), + _calendar.maximumRange(of: .day)!.contains(value) + else { + throw error() } day = value @@ -506,9 +527,10 @@ extension DateComponents.ISO8601FormatStyle { } else { // parse 3 digit day of year ('DDD') let max = dateSeparator == .omitted ? 3 : nil - let value = try it.digits(maxDigits: max, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - guard _calendar.maximumRange(of: .dayOfYear)!.contains(value) else { - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let value = it.parseNumber(maxDigits: max), + _calendar.maximumRange(of: .dayOfYear)!.contains(value) + else { + throw error() } dayOfYear = value @@ -522,32 +544,48 @@ extension DateComponents.ISO8601FormatStyle { switch dateTimeSeparator { case .standard: // parse T - try it.expectCharacter(UInt8(ascii: "T"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard it.matchByte(UInt8(ascii: "T")) else { + throw error() + } case .space: // parse any number of spaces - try it.expectOneOrMoreCharacters(UInt8(ascii: " "), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let _ = it.matchOneOrMore(UInt8(ascii: " ")) else { + throw error() + } } } switch timeSeparator { case .colon: - hour = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - minute = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - try it.expectCharacter(UInt8(ascii: ":"), input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - second = try it.digits(input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let hrs = it.parseNumber(), + it.matchByte(UInt8(ascii: ":")), + let mins = it.parseNumber(), + it.matchByte(UInt8(ascii: ":")), + let secs = it.parseNumber() + else { + throw error() + } + (hour, minute, second) = (hrs, mins, secs) + case .omitted: - hour = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - minute = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - second = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let hrs = it.parseNumber(maxDigits: 2), + it.matchByte(UInt8(ascii: ":")), + let mins = it.parseNumber(maxDigits: 2), + it.matchByte(UInt8(ascii: ":")), + let secs = it.parseNumber(maxDigits: 2) + else { + throw error() + } + (hour, minute, second) = (hrs, mins, secs) } // When parsing, fractional seconds are always optional (as of Swift 6.2). // Peek ahead and see if the next character is a period or not. If not, just continue on. - if let next = it.peek(), next == UInt8(ascii: ".") { + if it.matchByte(UInt8(ascii: ".")) { // Looks like a fractional seconds - let _ = it.next() // consume the period - let fractionalSeconds = try it.digits(nanoseconds: true, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let fractionalSeconds = it.parseNumber(nanoseconds: true) else { + throw error() + } nanosecond = fractionalSeconds } @@ -557,15 +595,14 @@ extension DateComponents.ISO8601FormatStyle { if fields.contains(.timeZone) { // For compatibility with ICU implementation, if the dateTimeSeparator is a space, consume any number (including zero) of spaces here. if dateTimeSeparator == .space { - it.expectZeroOrMoreCharacters(UInt8(ascii: " ")) + it.matchZeroOrMore(UInt8(ascii: " ")) } guard let plusOrMinusOrZ = it.next() else { // Expected time zone - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw error() } - if plusOrMinusOrZ == UInt8(ascii: "Z") || plusOrMinusOrZ == UInt8(ascii: "z") { timeZone = .gmt } else { @@ -606,7 +643,7 @@ extension DateComponents.ISO8601FormatStyle { positive = false } else { // Expected time zone, found garbage - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw error() } if !skipDigits { @@ -614,7 +651,9 @@ extension DateComponents.ISO8601FormatStyle { // parse Time Zone: ISO8601 extended hms?, with Z // examples: -08:00, -07:52:58, Z - let hours = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let hours = it.parseNumber(maxDigits: 2) else { + throw error() + } // Expect a colon, or a minutes value, or the end. let expectMinutes: Bool @@ -641,16 +680,17 @@ extension DateComponents.ISO8601FormatStyle { tzOffset = hours * 3600 } else { // Continue on - let minutes = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) - - if let maybeColon = it.peek(), maybeColon == UInt8(ascii: ":") { - // Throw it away - it.advance() + guard let minutes = it.parseNumber(maxDigits: 2) else { + throw error() } - if let secondsTens = it.peek(), isASCIIDigit(secondsTens) { + _ = it.matchByte(UInt8(ascii: ":")) + + if it.peek(isASCIIDigit) != nil { // We have seconds - let seconds = try it.digits(maxDigits: 2, input: view, onFailure: Date.ISO8601FormatStyle(self).format(Date.now)) + guard let seconds = it.parseNumber(maxDigits: 2) else { + throw error() + } tzOffset = (hours * 3600) + (minutes * 60) + seconds } else { // If the next character is missing, that's allowed - the time can be something like just -0852 and then the string can end @@ -664,7 +704,7 @@ extension DateComponents.ISO8601FormatStyle { } else { guard let parsedTimeZone = TimeZone(secondsFromGMT: positive ? tzOffset : -tzOffset) else { // Out of range time zone - throw parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now)) + throw error() } timeZone = parsedTimeZone diff --git a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift index 83c97c7d9..f1b5d013f 100644 --- a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift +++ b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift @@ -32,54 +32,85 @@ func isASCIIDigit(_ x: UInt8) -> Bool { x >= UInt8(ascii: "0") && x <= UInt8(ascii: "9") } +/** + + Fundamental operations: + - Peek: return the next portion of input, if it exists and matches the given criteria + - Match: like peek, but also consumes the portion of input + - Parse: like match, but produces a value by interpreting the portion of input + + Notes on return types: + `peek(_:(UInt8) -> Bool) -> UInt8?` is more descriptive than returning a `Bool`, but slighlty less ergonomic if you only care about the `Bool`. If we don't return the `UInt8`, some callers may need to store it from the function somehow or else double-load it. + Match functions have different return types, depending on whether they always succeed, whether they match a variable length, etc. Since they also advance as part of matching, the return lengths are dicardable. They can also be retroactively calculated by the caller, we just return it because we can. + Finally, the parse functions just return the value, as there's no way to have a discardable return value alongside a non-discardable one. Again, lengths can be retroactively calculated by the caller based on the iterator's new offset. + */ extension BufferViewIterator { - mutating func expectCharacter(_ expected: UInt8, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { - guard let parsed = next(), parsed == expected else { - throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) + // Returns the next byte if there is one and it + // matches the predicate, otherwise false + func peek(_ f: (UInt8) -> Bool) -> UInt8? { + guard let b = peek(), f(b) else { + return nil } + return b } - - mutating func expectOneOrMoreCharacters(_ expected: UInt8, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws { - guard let parsed = next(), parsed == expected else { - throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) + + mutating func matchByte(_ expected: UInt8) -> Bool { + if peek() == expected { + _uncheckedAdvance() + return true } - - while let parsed = peek(), parsed == expected { - advance() + return false + } + + mutating func matchPredicate(_ f: (UInt8) -> Bool) -> UInt8? { + guard let b = peek(f) else { + return nil } + _uncheckedAdvance() + return b } - - mutating func expectZeroOrMoreCharacters(_ expected: UInt8) { - while let parsed = peek(), parsed == expected { - advance() + + @discardableResult + mutating func matchZeroOrMore(_ expected: UInt8) -> Int { + var count = 0 + while matchByte(expected) { + count += 1 } + return count + } + + @discardableResult + mutating func matchOneOrMore(_ expected: UInt8) -> Int? { + let c = matchZeroOrMore(expected) + return c == 0 ? nil : c } - - mutating func digits(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false, input: BufferView, onFailure: @autoclosure () -> (String), extendedDescription: String? = nil) throws -> Int { + + // TODO: I think it would be cleaner to separate out + // nanosecond handling here... + mutating func parseNumber(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false) -> Int? { // Consume all leading zeros, parse until we no longer see a digit var result = 0 var count = 0 // Cap at 10 digits max to avoid overflow let max = min(maxDigits ?? 10, 10) - while let next = peek(), isASCIIDigit(next) { + while let next = matchPredicate(isASCIIDigit) { let digit = Int(next - UInt8(ascii: "0")) result *= 10 result += digit - advance() count += 1 if count >= max { break } } - + guard count > 0 else { // No digits actually found - throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) + return nil } - + if let minDigits, count < minDigits { // Too few digits found - throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) + return nil } - + if nanoseconds { // Keeps us in the land of integers if count == 1 { return result * 100_000_000 } @@ -91,13 +122,11 @@ extension BufferViewIterator { if count == 7 { return result * 100 } if count == 8 { return result * 10 } if count == 9 { return result } - throw parseError(input, exampleFormattedString: onFailure(), extendedDescription: extendedDescription) + return nil } return result } - - } // Formatting helpers diff --git a/Sources/FoundationEssentials/JSON/BufferViewIterator.swift b/Sources/FoundationEssentials/JSON/BufferViewIterator.swift index 0ca685dc5..cc1d715f2 100644 --- a/Sources/FoundationEssentials/JSON/BufferViewIterator.swift +++ b/Sources/FoundationEssentials/JSON/BufferViewIterator.swift @@ -49,4 +49,9 @@ extension BufferViewIterator: IteratorProtocol { guard curPointer < endPointer else { return } curPointer = curPointer.advanced(by: MemoryLayout.stride) } + + mutating func _uncheckedAdvance() { + assert(curPointer < endPointer) + curPointer = curPointer.advanced(by: MemoryLayout.stride) + } } From 7a9dc5c021bdba2a70318f026c873b423552a15e Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Thu, 19 Jun 2025 10:54:28 -0600 Subject: [PATCH 3/3] Move to UTF8Span from BufferView --- .../Formatting/Date+HTTPFormatStyle.swift | 60 ++++------ .../DateComponents+ISO8601FormatStyle.swift | 37 +++---- .../Formatting/FormatParsingUtilities.swift | 103 +++++++++++++++--- 3 files changed, 129 insertions(+), 71 deletions(-) diff --git a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift index 1cd4a3d4a..4adde9c1c 100644 --- a/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift @@ -60,32 +60,23 @@ extension Date { } fileprivate func parse(_ value: String, in range: Range) -> (String.Index, Date)? { - var v = value[range] + let v = value[range] guard !v.isEmpty else { return nil } - - let result = v.withUTF8 { buffer -> (Int, Date)? in - let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? componentsStyle.components(in: view) else { - return nil - } - - // HTTP dates are always GMT - guard let date = Calendar(identifier: .gregorian).date(from: comps.components) else { - return nil - } - - return (comps.consumed, date) + guard #available(FoundationSpan 6.2, *) else { + fatalError("Span unavailable") } - - guard let result else { + + guard let comps = try? componentsStyle.components(in: v.utf8Span), + let date = Calendar(identifier: .gregorian).date(from: comps.components) + else { return nil } - - let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0) - return (endIndex, result.1) + + let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed) + return (endIndex, date) } } } @@ -316,30 +307,27 @@ extension DateComponents { } private func parse(_ value: String, in range: Range) -> (String.Index, DateComponents)? { - var v = value[range] + let v = value[range] guard !v.isEmpty else { return nil } - - let result = v.withUTF8 { buffer -> (Int, DateComponents)? in - let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? components(in: view) else { - return nil - } - - return (comps.consumed, comps.components) + guard #available(FoundationSpan 6.2, *) else { + fatalError("Span unavailable") } - - guard let result else { + + guard let comps = try? components(in: v.utf8Span) else { return nil } - - let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0) - return (endIndex, result.1) + + let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed) + return (endIndex, comps.components) } - fileprivate func components(in view: borrowing BufferView) throws -> ComponentsParseResult { + @available(FoundationSpan 6.2, *) + fileprivate func components( + in view: UTF8Span + ) throws -> ComponentsParseResult { // https://www.rfc-editor.org/rfc/rfc9110.html#http.date // , :: GMT @@ -348,7 +336,7 @@ extension DateComponents { parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: extendedDescription) } - var it = view.makeIterator() + var it = view.makeCursor() var dc = DateComponents() // Despite the spec, we allow the weekday name to be optional. @@ -500,7 +488,7 @@ extension DateComponents { dc.calendar = Calendar(identifier: .gregorian) // Would be nice to see this functionality on BufferView, but for now we calculate it ourselves. - let utf8CharactersRead = it.curPointer - view.startIndex._rawValue + let utf8CharactersRead = it.currentOffset return ComponentsParseResult(consumed: utf8CharactersRead, components: dc) } diff --git a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift index 95327f750..88fb48fdd 100644 --- a/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift +++ b/Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift @@ -407,7 +407,8 @@ extension DateComponents.ISO8601FormatStyle { var components: DateComponents } - private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: borrowing BufferView) throws -> ComponentsParseResult { + @available(FoundationSpan 6.2, *) + private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: UTF8Span) throws -> ComponentsParseResult { let fields = formatFields // Produce an error message to throw @@ -415,7 +416,7 @@ extension DateComponents.ISO8601FormatStyle { parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now), extendedDescription: extendedDescription) } - var it = view.makeIterator() + var it = view.makeCursor() var needsSeparator = false // Keep these fields local and set them in the DateComponents once for improved performance @@ -618,7 +619,7 @@ extension DateComponents.ISO8601FormatStyle { if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) { if next == UInt8(ascii: "+") { positive = true } else { positive = false } - it.advance() + it.uncheckedAdvance() } else { positive = true tzOffset = 0 @@ -631,7 +632,7 @@ extension DateComponents.ISO8601FormatStyle { if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) { if next == UInt8(ascii: "+") { positive = true } else { positive = false } - it.advance() + it.uncheckedAdvance() } else { positive = true tzOffset = 0 @@ -660,8 +661,8 @@ extension DateComponents.ISO8601FormatStyle { if let next = it.peek() { if next == UInt8(ascii: ":") { // Throw it away - it.advance() - + it.uncheckedAdvance() + // But we should have minutes after this expectMinutes = true } else if isASCIIDigit(next) { @@ -732,7 +733,7 @@ extension DateComponents.ISO8601FormatStyle { rawDayOfYear: dayOfYear) // Would be nice to see this functionality on BufferView, but for now we calculate it ourselves. - let utf8CharactersRead = it.curPointer - view.startIndex._rawValue + let utf8CharactersRead = it.currentOffset return ComponentsParseResult(consumed: utf8CharactersRead, components: dc) } } @@ -772,27 +773,21 @@ extension DateComponents.ISO8601FormatStyle : ParseStrategy { } internal func parse(_ value: String, fillMissingUnits: Bool, in range: Range) -> (String.Index, DateComponents)? { - var v = value[range] + let v = value[range] guard !v.isEmpty else { return nil } - - let result = v.withUTF8 { buffer -> (Int, DateComponents)? in - let view = BufferView(unsafeBufferPointer: buffer)! - guard let comps = try? components(fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: view) else { - return nil - } - - return (comps.consumed, comps.components) + guard #available(FoundationSpan 6.2, *) else { + fatalError("Span unavailable") } - - guard let result else { + + guard let comps = try? components(fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: v.utf8Span) else { return nil } - - let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0) - return (endIndex, result.1) + + let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed) + return (endIndex, comps.components) } } diff --git a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift index f1b5d013f..a18c4c9bf 100644 --- a/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift +++ b/Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift @@ -18,6 +18,15 @@ func parseError( parseError(String(decoding: value, as: UTF8.self), exampleFormattedString: exampleFormattedString, extendedDescription: extendedDescription) } +@available(FoundationSpan 6.2, *) +func parseError( + _ value: UTF8Span, exampleFormattedString: String?, extendedDescription: String? = nil +) -> CocoaError { + // TODO: change to UTF8Span, and prototype string append and interpolation taking UTF8Span + parseError(String(copying: value), exampleFormattedString: exampleFormattedString, extendedDescription: extendedDescription) +} + + package func parseError(_ value: String, exampleFormattedString: String?, extendedDescription: String? = nil) -> CocoaError { let errorStr: String if let exampleFormattedString = exampleFormattedString { @@ -32,19 +41,66 @@ func isASCIIDigit(_ x: UInt8) -> Bool { x >= UInt8(ascii: "0") && x <= UInt8(ascii: "9") } -/** - Fundamental operations: - - Peek: return the next portion of input, if it exists and matches the given criteria - - Match: like peek, but also consumes the portion of input - - Parse: like match, but produces a value by interpreting the portion of input +@available(FoundationSpan 6.2, *) +extension UTF8Span { + // This is just an iterator style type, though for UTF8 we can + // load scalars and Characters, presumably. + // + // NOTE: I'm calling this "Cursor" temporarily as "Iterator" might + // end up being taken for other reasons. + struct Cursor: ~Escapable { + var span: UTF8Span + var currentOffset: Int + + @lifetime(copy span) + init(_ span: UTF8Span) { + self.span = span + self.currentOffset = 0 + } + } + + @lifetime(copy self) // copy or borrow? + func makeCursor() -> Cursor { + .init(self) + } +} + +@available(FoundationSpan 6.2, *) +extension UTF8Span.Cursor { + @lifetime(self: copy self) + mutating func uncheckedAdvance() { + assert(self.currentOffset < span.count) + self.currentOffset += 1 + } + + func peek() -> UInt8? { + guard !isEmpty else { return nil } + return span.span[unchecked: self.currentOffset] + } - Notes on return types: - `peek(_:(UInt8) -> Bool) -> UInt8?` is more descriptive than returning a `Bool`, but slighlty less ergonomic if you only care about the `Bool`. If we don't return the `UInt8`, some callers may need to store it from the function somehow or else double-load it. - Match functions have different return types, depending on whether they always succeed, whether they match a variable length, etc. Since they also advance as part of matching, the return lengths are dicardable. They can also be retroactively calculated by the caller, we just return it because we can. - Finally, the parse functions just return the value, as there's no way to have a discardable return value alongside a non-discardable one. Again, lengths can be retroactively calculated by the caller based on the iterator's new offset. - */ -extension BufferViewIterator { + @lifetime(self: copy self) + mutating func next() -> UInt8? { + guard !isEmpty else { return nil } + defer { uncheckedAdvance() } + return peek() + } + + var isEmpty: Bool { self.currentOffset >= span.count } + + @lifetime(self: copy self) + mutating func consume(_ byte: UInt8) -> Bool { + guard peek() == byte else { + return false + } + uncheckedAdvance() + return true + } + +} + +@available(FoundationSpan 6.2, *) +extension UTF8Span.Cursor { // Returns the next byte if there is one and it // matches the predicate, otherwise false func peek(_ f: (UInt8) -> Bool) -> UInt8? { @@ -54,22 +110,38 @@ extension BufferViewIterator { return b } + @lifetime(self: copy self) mutating func matchByte(_ expected: UInt8) -> Bool { if peek() == expected { - _uncheckedAdvance() + uncheckedAdvance() return true } return false } + @lifetime(self: copy self) mutating func matchPredicate(_ f: (UInt8) -> Bool) -> UInt8? { guard let b = peek(f) else { return nil } - _uncheckedAdvance() + uncheckedAdvance() return b } + /** + NOTE: We want a `match(anyOf:)` operation that takes an Array or Set + literal (or String literal, clearly delineated to mean ASCII), but is guaranteed not to actually materialize a runtime managed object. + + For example, that would handle this pattern from ISO8601: + ``` + if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) { + if next == UInt8(ascii: "+") { positive = true } + else { positive = false } + it.uncheckedAdvance() + ``` + */ + + @lifetime(self: copy self) @discardableResult mutating func matchZeroOrMore(_ expected: UInt8) -> Int { var count = 0 @@ -79,6 +151,7 @@ extension BufferViewIterator { return count } + @lifetime(self: copy self) @discardableResult mutating func matchOneOrMore(_ expected: UInt8) -> Int? { let c = matchZeroOrMore(expected) @@ -87,16 +160,18 @@ extension BufferViewIterator { // TODO: I think it would be cleaner to separate out // nanosecond handling here... + @lifetime(self: copy self) mutating func parseNumber(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false) -> Int? { // Consume all leading zeros, parse until we no longer see a digit var result = 0 var count = 0 // Cap at 10 digits max to avoid overflow let max = min(maxDigits ?? 10, 10) - while let next = matchPredicate(isASCIIDigit) { + while let next = peek(), isASCIIDigit(next) { let digit = Int(next - UInt8(ascii: "0")) result *= 10 result += digit + uncheckedAdvance() count += 1 if count >= max { break } }