diff --git a/stdlib/private/StdlibUnicodeUnittest/GraphemeBreaking.swift b/stdlib/private/StdlibUnicodeUnittest/GraphemeBreaking.swift
index 2cd26d6ae5708..c8c23482c6740 100644
--- a/stdlib/private/StdlibUnicodeUnittest/GraphemeBreaking.swift
+++ b/stdlib/private/StdlibUnicodeUnittest/GraphemeBreaking.swift
@@ -15,56 +15,47 @@
 #if _runtime(_ObjC)
 import Foundation
 
-func parseGraphemeBreakTests(
-  _ data: String,
-  into result: inout [(String, Int)]
-) {
-  for line in data.split(separator: "\n") {
+public struct GraphemeBreakTest {
+  public let string: String
+  public let pieces: [[Unicode.Scalar]]
+
+  init?(line: some StringProtocol) {
     // Only look at actual tests
-    guard line.hasPrefix("÷") else {
-      continue
-    }
+    guard line.hasPrefix("÷") else { return nil }
 
     let info = line.split(separator: "#")
     let components = info[0].split(separator: " ")
 
     var string = ""
-    var count = 0
-
-    for i in components.indices {
-      guard i != 0 else {
-        continue
-      }
-
-      let scalar: Unicode.Scalar
-
-      // If we're an odd index, this is a scalar.
-      if i & 0x1 == 1 {
-        scalar = Unicode.Scalar(UInt32(components[i], radix: 16)!)!
-
+    var pieces: [[Unicode.Scalar]] = []
+
+    var piece: [Unicode.Scalar] = []
+    for component in components {
+      switch component {
+      case "":
+        break
+      case "×": // no grapheme break opportunity
+        break
+      case "÷": // grapheme break opportunity
+        guard !piece.isEmpty else { break }
+        pieces.append(piece)
+        piece = []
+      case _: // hexadecimal scalar value
+        guard let value = UInt32(component, radix: 16) else { return nil }
+        guard let scalar = Unicode.Scalar(value) else { return nil }
         string.unicodeScalars.append(scalar)
-      } else {
-        // Otherwise, it is a grapheme breaking operator.
-
-        // If this is a break, record the +1 count. Otherwise it is × which is
-        // not a break.
-        if components[i] == "÷" {
-          count += 1
-        }
+        piece.append(scalar)
       }
     }
-
-    result.append((string, count))
+    if !piece.isEmpty { pieces.append(piece) }
+    self.string = string
+    self.pieces = pieces
   }
 }
 
-public let graphemeBreakTests: [(String, Int)] = {
-  var result: [(String, Int)] = []
-
+public let graphemeBreakTests: [GraphemeBreakTest] = {
   let testFile = readInputFile("GraphemeBreakTest.txt")
-
-  parseGraphemeBreakTests(testFile, into: &result)
-
-  return result
+  return testFile.split(separator: "\n")
+    .compactMap { GraphemeBreakTest(line: $0) }
 }()
 #endif
diff --git a/stdlib/public/core/StringGraphemeBreaking.swift b/stdlib/public/core/StringGraphemeBreaking.swift
index 3646636a9cd25..b602485b72da4 100644
--- a/stdlib/public/core/StringGraphemeBreaking.swift
+++ b/stdlib/public/core/StringGraphemeBreaking.swift
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
+// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -436,6 +436,117 @@ internal struct _GraphemeBreakingState {
   var shouldBreakRI = false
 }
 
+extension Unicode {
+  /// A state machine for recognizing character (i.e., extended grapheme
+  /// cluster) boundaries in an arbitrary series of Unicode scalars.
+  ///
+  /// To detect grapheme breaks in a sequence of Unicode scalars, feed each of
+  /// them to the `hasBreak(before:)` method. The method returns true if the
+  /// sequence has a grapheme break preceding the given value.
+  ///
+  /// The results produced by this state machine are guaranteed to match the way
+  /// `String` splits its contents into `Character` values.
+  @available(SwiftStdlib 5.8, *)
+  public // SPI(Foundation) FIXME: We need API for this
+  struct _CharacterRecognizer {
+    internal var _previous: Unicode.Scalar
+    internal var _state: _GraphemeBreakingState
+
+    /// Returns a non-nil value if it can be determined whether there is a
+    /// grapheme break between `scalar1` and `scalar2` without knowing anything
+    /// about the scalars that precede `scalar1`. This can optionally be used as
+    /// a fast (but incomplete) test before spinning up a full state machine
+    /// session.
+    @_effects(releasenone)
+    public static func quickBreak(
+      between scalar1: Unicode.Scalar,
+      and scalar2: Unicode.Scalar
+    ) -> Bool? {
+      if scalar1.value == 0xD, scalar2.value == 0xA {
+        return false
+      }
+      if _hasGraphemeBreakBetween(scalar1, scalar2) {
+        return true
+      }
+      return nil
+    }
+
+    /// Initialize a new character recognizer at the _start of text_ (sot)
+    /// position.
+    ///
+    /// The resulting state machine will report a grapheme break on the
+    /// first scalar that is fed to it.
+    public init() {
+      _state = _GraphemeBreakingState()
+      // To avoid having to handle the empty case specially, we use NUL as the
+      // placeholder before the first scalar. NUL is a control character, so per
+      // rule GB5, it will induce an unconditional grapheme break before the
+      // first actual scalar, emulating GB1.
+      _previous = Unicode.Scalar(0 as UInt8)
+    }
+
+    /// Feeds the next scalar to the state machine, returning a Boolean value
+    /// indicating whether it starts a new extended grapheme cluster.
+    ///
+    /// This method will always report a break the first time it is called
+    /// on a newly initialized recognizer.
+    ///
+    /// The state machine does not carry information across character
+    /// boundaries. I.e., if this method returns true, then `self` after the
+    /// call is equivalent to feeding the same scalar to a newly initialized
+    /// recognizer instance.
+    @_effects(releasenone)
+    public mutating func hasBreak(
+      before next: Unicode.Scalar
+    ) -> Bool {
+      let r = _state.shouldBreak(between: _previous, and: next)
+      if r {
+        _state = _GraphemeBreakingState()
+      }
+      _previous = next
+      return r
+    }
+
+    /// Decode the scalars in the given UTF-8 buffer and feed them to the
+    /// recognizer up to and including the scalar following the first grapheme
+    /// break. If the buffer contains a grapheme break, then this function
+    /// returns the index range of the scalar that follows the first one;
+    /// otherwise it returns `nil`.
+    ///
+    /// On return, the state of the recognizer is updated to reflect the scalars
+    /// up to and including the returned one. You can detect additional grapheme
+    /// breaks by feeding the recognizer subsequent data.
+    ///
+    /// - Parameter buffer: A buffer containing valid UTF-8 data, starting and
+    ///    ending on Unicode scalar boundaries.
+    ///
+    /// - Parameter start: A valid index into `buffer`, addressing the first
+    ///    code unit of a UTF-8 scalar in the buffer, or the end.
+    ///
+    /// - Returns: The index range of the scalar that follows the first grapheme
+    ///    break in the buffer, if there is one. If the buffer contains no
+    ///    grapheme breaks, then this function returns `nil`.
+    ///
+    /// - Warning: This function does not validate that the buffer contains
+    ///    valid UTF-8 data; its behavior is undefined if given invalid input.
+    @_effects(releasenone)
+    public mutating func _firstBreak(
+      inUncheckedUnsafeUTF8Buffer buffer: UnsafeBufferPointer<UInt8>,
+      startingAt start: Int = 0
+    ) -> Range<Int>? {
+      var i = start
+      while i < buffer.endIndex {
+        let (next, n) = _decodeScalar(buffer, startingAt: i)
+        if hasBreak(before: next) {
+          return Range(_uncheckedBounds: (i, i &+ n))
+        }
+        i &+= n
+      }
+      return nil
+    }
+  }
+}
+
 extension _StringGuts {
   // Returns the stride of the grapheme cluster starting at offset `index`,
   // assuming it is on a grapheme cluster boundary.
@@ -459,7 +570,7 @@ extension _StringGuts {
 
     while true {
       guard let (scalar2, nextIndex) = nextScalar(index) else { break }
-      if shouldBreak(between: scalar, and: scalar2, at: index, with: &state) {
+      if state.shouldBreak(between: scalar, and: scalar2) {
         break
       }
       index = nextIndex
@@ -505,7 +616,7 @@ extension _StringGuts {
   }
 }
 
-extension _StringGuts {
+extension _GraphemeBreakingState {
   // Return true if there is an extended grapheme cluster boundary between two
   // scalars, based on state information previously collected about preceding
   // scalars.
@@ -517,11 +628,9 @@ extension _StringGuts {
   //
   // This is based on the Unicode Annex #29 for [Grapheme Cluster Boundary
   // Rules](https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules).
-  internal func shouldBreak(
+  internal mutating func shouldBreak(
     between scalar1: Unicode.Scalar,
-    and scalar2: Unicode.Scalar,
-    at index: Int,
-    with state: inout _GraphemeBreakingState
+    and scalar2: Unicode.Scalar
   ) -> Bool {
     // GB3
     if scalar1.value == 0xD, scalar2.value == 0xA {
@@ -545,8 +654,8 @@ extension _StringGuts {
     var enterIndicSequence = false
 
     defer {
-      state.isInEmojiSequence = enterEmojiSequence
-      state.isInIndicSequence = enterIndicSequence
+      self.isInEmojiSequence = enterEmojiSequence
+      self.isInIndicSequence = enterIndicSequence
     }
 
     switch (x, y) {
@@ -591,14 +700,14 @@ extension _StringGuts {
       // continue the grapheme cluster by combining more scalars later. If we're
       // not currently in an emoji sequence, but our lhs scalar is a pictograph,
       // then that's a signal that it's the start of an emoji sequence.
-      if state.isInEmojiSequence || x == .extendedPictographic {
+      if self.isInEmojiSequence || x == .extendedPictographic {
         enterEmojiSequence = true
       }
 
       // If we're currently in an indic sequence (or if our lhs is a linking
       // consonant), then this check and everything underneath ensures that
       // we continue being in one and may check if this extend is a Virama.
-      if state.isInIndicSequence || scalar1._isLinkingConsonant {
+      if self.isInIndicSequence || scalar1._isLinkingConsonant {
         if y == .extend {
           let extendNormData = Unicode._NormData(scalar2, fastUpperbound: 0x300)
 
@@ -611,7 +720,7 @@ extension _StringGuts {
         enterIndicSequence = true
 
         if scalar2._isVirama {
-          state.hasSeenVirama = true
+          self.hasSeenVirama = true
         }
       }
 
@@ -627,32 +736,34 @@ extension _StringGuts {
 
     // GB11
     case (.zwj, .extendedPictographic):
-      return !state.isInEmojiSequence
+      return !self.isInEmojiSequence
 
     // GB12 & GB13
     case (.regionalIndicator, .regionalIndicator):
       defer {
-        state.shouldBreakRI.toggle()
+        self.shouldBreakRI.toggle()
       }
 
-      return state.shouldBreakRI
+      return self.shouldBreakRI
 
     // GB999
     default:
       // GB9c
       if
-        state.isInIndicSequence,
-        state.hasSeenVirama,
+        self.isInIndicSequence,
+        self.hasSeenVirama,
         scalar2._isLinkingConsonant
       {
-        state.hasSeenVirama = false
+        self.hasSeenVirama = false
         return false
       }
 
       return true
     }
   }
+}
 
+extension _StringGuts {
   // Return true if there is an extended grapheme cluster boundary between two
   // scalars, with no previous knowledge about preceding scalars.
   //
diff --git a/test/stdlib/CharacterRecognizer.swift b/test/stdlib/CharacterRecognizer.swift
new file mode 100644
index 0000000000000..6bacbaaacac54
--- /dev/null
+++ b/test/stdlib/CharacterRecognizer.swift
@@ -0,0 +1,116 @@
+// RUN: %empty-directory(%t)
+// RUN: %target-run-stdlib-swift %S/Inputs/
+
+// REQUIRES: executable_test
+// REQUIRES: objc_interop
+// REQUIRES: optimized_stdlib
+
+import Swift
+import StdlibUnittest
+import StdlibUnicodeUnittest
+
+var suite = TestSuite("CharacterRecognizer")
+defer { runAllTests() }
+
+if #available(SwiftStdlib 5.8, *) {
+  suite.test("Unicode test data/hasBreak") {
+    for test in graphemeBreakTests {
+      var recognizer = Unicode._CharacterRecognizer()
+      var pieces: [[Unicode.Scalar]] = []
+      var piece: [Unicode.Scalar] = []
+      for scalar in test.string.unicodeScalars {
+        if recognizer.hasBreak(before: scalar) {
+          if !piece.isEmpty { pieces.append(piece) }
+          piece = [scalar]
+        } else {
+          piece.append(scalar)
+        }
+      }
+      if !piece.isEmpty { pieces.append(piece) }
+      expectEqual(pieces, test.pieces,
+        "string: \(String(reflecting: test.string))")
+    }
+  }
+}
+
+func scalars(in buffer: some Sequence<UInt8>) -> [Unicode.Scalar] {
+  var result: [Unicode.Scalar] = []
+  var it = buffer.makeIterator()
+  var utf8Decoder = UTF8()
+  while true {
+    switch utf8Decoder.decode(&it) {
+    case .scalarValue(let v): result.append(v)
+    case .emptyInput: return result
+    case .error: expectTrue(false, "Invalid scalar")
+    }
+  }
+}
+
+if #available(SwiftStdlib 5.8, *) {
+  suite.test("Unicode test data/_firstBreak") {
+    for test in graphemeBreakTests {
+      var recognizer = Unicode._CharacterRecognizer()
+      var pieces: [[Unicode.Scalar]] = []
+      var str = test.string
+      str.withUTF8 { buffer in
+        var i = buffer.startIndex
+        var last = i
+        while i < buffer.endIndex {
+          guard let scalar = recognizer._firstBreak(
+            inUncheckedUnsafeUTF8Buffer: buffer, startingAt: i)
+          else { break }
+
+          if scalar.lowerBound > last {
+            pieces.append(scalars(in: buffer[last..<scalar.lowerBound]))
+          }
+
+          last = scalar.lowerBound
+          i = scalar.upperBound
+        }
+
+        pieces.append(scalars(in: buffer[last...]))
+      }
+      expectEqual(pieces, test.pieces,
+        "string: \(String(reflecting: test.string))")
+    }
+  }
+}
+
+let sampleString = #"""
+    The powerful programming language that is also easy to learn.
+    손쉽게 학습할 수 있는 강력한 프로그래밍 언어.
+    🪙 A 🥞 short 🍰 piece 🫘 of 🌰 text 👨‍👨‍👧‍👧 with 👨‍👩‍👦 some 🚶🏽 emoji 🇺🇸🇨🇦 characters 🧈
+    some🔩times 🛺 placed 🎣 in 🥌 the 🆘 mid🔀dle 🇦🇶or🏁 around 🏳️‍🌈 a 🍇 w🍑o🥒r🥨d
+    Unicode is such fun!
+    U̷n̷i̷c̷o̴d̴e̷ ̶i̸s̷ ̸s̵u̵c̸h̷ ̸f̵u̷n̴!̵
+    U̴̡̲͋̾n̵̻̳͌ì̶̠̕c̴̭̈͘ǫ̷̯͋̊d̸͖̩̈̈́ḛ̴́ ̴̟͎͐̈i̴̦̓s̴̜̱͘ ̶̲̮̚s̶̙̞͘u̵͕̯̎̽c̵̛͕̜̓h̶̘̍̽ ̸̜̞̿f̵̤̽ṷ̴͇̎͘ń̷͓̒!̷͍̾̚
+    U̷̢̢̧̨̼̬̰̪͓̞̠͔̗̼̙͕͕̭̻̗̮̮̥̣͉̫͉̬̲̺͍̺͊̂ͅ\#
+    n̶̨̢̨̯͓̹̝̲̣̖̞̼̺̬̤̝̊̌́̑̋̋͜͝ͅ\#
+    ḭ̸̦̺̺͉̳͎́͑\#
+    c̵̛̘̥̮̙̥̟̘̝͙̤̮͉͔̭̺̺̅̀̽̒̽̏̊̆͒͌̂͌̌̓̈́̐̔̿̂͑͠͝͝ͅ\#
+    ö̶̱̠̱̤̙͚͖̳̜̰̹̖̣̻͎͉̞̫̬̯͕̝͔̝̟̘͔̙̪̭̲́̆̂͑̌͂̉̀̓́̏̎̋͗͛͆̌̽͌̄̎̚͝͝͝͝ͅ\#
+    d̶̨̨̡̡͙̟͉̱̗̝͙͍̮͍̘̮͔͑\#
+    e̶̢͕̦̜͔̘̘̝͈̪̖̺̥̺̹͉͎͈̫̯̯̻͑͑̿̽͂̀̽͋́̎̈́̈̿͆̿̒̈́̽̔̇͐͛̀̓͆̏̾̀̌̈́̆̽̕ͅ
+    """#
+
+if #available(SwiftStdlib 5.8, *) {
+  suite.test("Consistency with Swift String's behavior/hasBreak") {
+
+    let expectedBreaks = Array(sampleString.indices)
+
+    let u = sampleString.unicodeScalars
+
+    var recognizer = Unicode._CharacterRecognizer()
+    var actualBreaks: [String.Index] = []
+    for i in u.indices {
+      if recognizer.hasBreak(before: u[i]) {
+        actualBreaks.append(i)
+      }
+    }
+    expectEqual(actualBreaks, expectedBreaks,
+      """
+      actualBreaks: \(actualBreaks.map { $0._description })
+      expectedBreaks: \(expectedBreaks.map { $0._description })
+      """)
+  }
+}
diff --git a/test/stdlib/StringIndex.swift b/test/stdlib/StringIndex.swift
index 6d101e7383d63..c50e00ec262e9 100644
--- a/test/stdlib/StringIndex.swift
+++ b/test/stdlib/StringIndex.swift
@@ -656,8 +656,8 @@ suite.test("Fully exhaustive index interchange")
 
 #if _runtime(_ObjC)
 suite.test("Fully exhaustive index interchange/GraphemeBreakTests") {
-  for string in graphemeBreakTests.map { $0.0 } {
-    fullyExhaustiveIndexInterchange(string)
+  for test in graphemeBreakTests {
+    fullyExhaustiveIndexInterchange(test.string)
   }
 }
 #endif
diff --git a/validation-test/stdlib/StringGraphemeBreaking.swift b/validation-test/stdlib/StringGraphemeBreaking.swift
index 3ae6c5147cca4..549918afe154a 100644
--- a/validation-test/stdlib/StringGraphemeBreaking.swift
+++ b/validation-test/stdlib/StringGraphemeBreaking.swift
@@ -10,47 +10,59 @@ import StdlibUnicodeUnittest
 import Foundation
 
 let StringGraphemeBreaking = TestSuite("StringGraphemeBreaking")
+defer { runAllTests() }
 
 extension String {
-  var backwardsCount: Int {
-    var c = 0
-    var index = endIndex
-    while index != startIndex {
-      c += 1
-      formIndex(before: &index)
+  var forwardPieces: [[Unicode.Scalar]] {
+    var i = startIndex
+    var r: [[Unicode.Scalar]] = []
+    while i < endIndex {
+      let j = self.index(after: i)
+      r.append(Array(self[i..<j].unicodeScalars))
+      i = j
     }
-    return c
+    return r
+  }
+
+  var backwardPieces: [[Unicode.Scalar]] {
+    var j = endIndex
+    var r: [[Unicode.Scalar]] = []
+    while j > startIndex {
+      let i = self.index(before: j)
+      r.append(Array(self[i..<j].unicodeScalars))
+      j = i
+    }
+    r.reverse()
+    return r
   }
 }
 
 if #available(SwiftStdlib 5.6, *) {
   StringGraphemeBreaking.test("grapheme breaking") {
-    for graphemeBreakTest in graphemeBreakTests {
+    for test in graphemeBreakTests {
       expectEqual(
-        graphemeBreakTest.1,
-        graphemeBreakTest.0.count,
-        "string: \(String(reflecting: graphemeBreakTest.0))")
+        test.string.forwardPieces, test.pieces,
+        "string: \(String(reflecting: test.string)) (forward)")
       expectEqual(
-        graphemeBreakTest.1,
-        graphemeBreakTest.0.backwardsCount,
-        "string: \(String(reflecting: graphemeBreakTest.0))")
+        test.string.backwardPieces, test.pieces,
+        "string: \(String(reflecting: test.string)) (backward)")
     }
   }
 }
 
 // The most simple subclass of NSString that CoreFoundation does not know
 // about.
-class NonContiguousNSString : NSString {
+class NonContiguousNSString: NSString {
   required init(coder aDecoder: NSCoder) {
     fatalError("don't call this initializer")
   }
   required init(itemProviderData data: Data, typeIdentifier: String) throws {
-    fatalError("don't call this initializer")    
+    fatalError("don't call this initializer")
   }
 
-  override init() { 
+  override init() {
     _value = []
-    super.init() 
+    super.init()
   }
 
   init(_ value: [UInt16]) {
@@ -80,33 +92,19 @@ extension _StringGuts {
   func _isForeign() -> Bool
 }
 
-func getUTF16Array(from string: String) -> [UInt16] {
-  var result: [UInt16] = []
-
-  for cp in string.utf16 {
-    result.append(cp)
-  }
-
-  return result
-}
-
 if #available(SwiftStdlib 5.6, *) {
   StringGraphemeBreaking.test("grapheme breaking foreign") {
-    for graphemeBreakTest in graphemeBreakTests {
-      let foreignTest = NonContiguousNSString(
-        getUTF16Array(from: graphemeBreakTest.0)
-      )
-      let test = foreignTest as String
+    for test in graphemeBreakTests {
+      let foreign = NonContiguousNSString(Array(test.string.utf16))
+      let string = foreign as String
 
-      expectTrue(test._guts._isForeign())
+      expectTrue(string._guts._isForeign())
       expectEqual(
-        graphemeBreakTest.1, test.count,
-        "string: \(String(reflecting: graphemeBreakTest.0))")
+        string.forwardPieces, test.pieces,
+        "string: \(String(reflecting: test.string)) (forward)")
       expectEqual(
-        graphemeBreakTest.1, test.backwardsCount,
-        "string: \(String(reflecting: graphemeBreakTest.0))")
+        string.backwardPieces, test.pieces,
+        "string: \(String(reflecting: test.string)) (backward)")
     }
   }
 }
-
-runAllTests()