From 33a2553294b4fcf13ce5d8d15cc5de2fe9526419 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Wed, 21 Jun 2023 16:20:59 -0400 Subject: [PATCH 1/5] Add Grouped and Keyed --- Guides/Grouped.md | 68 +++++++++++++++++ Guides/Keyed.md | 74 +++++++++++++++++++ README.md | 2 + Sources/Algorithms/Grouped.swift | 25 +++++++ Sources/Algorithms/Keyed.swift | 47 ++++++++++++ Tests/SwiftAlgorithmsTests/GroupedTests.swift | 46 ++++++++++++ Tests/SwiftAlgorithmsTests/KeyedTests.swift | 74 +++++++++++++++++++ 7 files changed, 336 insertions(+) create mode 100644 Guides/Grouped.md create mode 100644 Guides/Keyed.md create mode 100644 Sources/Algorithms/Grouped.swift create mode 100644 Sources/Algorithms/Keyed.swift create mode 100644 Tests/SwiftAlgorithmsTests/GroupedTests.swift create mode 100644 Tests/SwiftAlgorithmsTests/KeyedTests.swift diff --git a/Guides/Grouped.md b/Guides/Grouped.md new file mode 100644 index 00000000..3df54438 --- /dev/null +++ b/Guides/Grouped.md @@ -0,0 +1,68 @@ +# Grouped + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Grouped.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/GroupedTests.swift)] + +Groups up elements of a sequence into a new Dictionary, whose values are Arrays of grouped elements, each keyed by the result of the given closure. + +```swift +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Avocado", "Coconut"] +let fruitsByLetter = fruits.grouped(by: { $0.first! }) +// Results in: +// [ +// "B": ["Banana"], +// "A": ["Apricot", "Apple", "Avocado"], +// "C": ["Cherry", "Coconut"], +// ] +``` + +If you wish to achieve a similar effect but for single values (instead of Arrays of grouped values), see [`keyed(by:)`](Keyed.md). + +## Detailed Design + +The `grouped(by:)` method is declared as a `Sequence` extension returning +`[GroupKey: [Element]]`. + +```swift +extension Sequence { + public func grouped( + by keyForValue: (Element) throws -> GroupKey + ) rethrows -> [GroupKey: [Element]] +} +``` + +### Complexity + +Calling `grouped(by:)` is an O(_n_) operation. + +### Comparison with other languages + +| Language | Grouping API | +|---------------|--------------| +| Java | [`groupingBy`](https://docs.oracle.com/en/java/javase/20/docs/api/java.base/java/util/stream/Collectors.html#groupingBy(java.util.function.Function)) | +| Kotlin | [`groupBy`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/group-by.html) | +| C# | [`GroupBy`](https://learn.microsoft.com/en-us/dotnet/api/system.linq.enumerable.groupby?view=net-7.0#system-linq-enumerable-groupby) | +| Rust | [`group_by`](https://doc.rust-lang.org/std/primitive.slice.html#method.group_by) | +| Ruby | [`group_by`](https://ruby-doc.org/3.2.2/Enumerable.html#method-i-group_by) | +| Python | [`groupby`](https://docs.python.org/3/library/itertools.html#itertools.groupby) | +| PHP (Laravel) | [`groupBy`](https://laravel.com/docs/10.x/collections#method-groupby) | + +#### Naming + +All the surveyed languages name this operation with a variant of "grouped" or "grouping". The past tense `grouped(by:)` best fits [Swift's API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/). + +#### Customization points + +Java and C# are interesting in that they provide multiple overloads with several points of customization: + +1. Changing the type of the groups. + 1. E.g. the groups can be Sets instead of Arrays. + 1. Akin to calling `.transformValues { group in Set(group) }` on the resultant dictionary, but avoiding the intermediate allocation of Arrays of each group. +2. Picking which elements end up in the groupings. + 1. The default is the elements of the input sequence, but can be changed. + 2. Akin to calling `.transformValues { group in group.map(someTransform) }` on the resultant dictionary, but avoiding the intermediate allocation of Arrays of each group. +3. Changing the type of the outermost collection. + 1. E.g using an `OrderedDictionary`, `SortedDictionary` or `TreeDictionary` instead of the default (hashed, unordered) `Dictionary`. + 2. There's no great way to achieve this with the `grouped(by:)`. One could wrap the resultant dictionary in an initializer to one of the other dictionary types, but that isn't sufficient: Once the `Dictionary` loses the ordering, there's no way to get it back when constructing one of the ordered dictionary variants. + +It is not clear which of these points of customization are worth supporting, or what the best way to express them might be. diff --git a/Guides/Keyed.md b/Guides/Keyed.md new file mode 100644 index 00000000..b9fd022e --- /dev/null +++ b/Guides/Keyed.md @@ -0,0 +1,74 @@ +# Keyed + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Keyed.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/KeyedTests.swift)] + +Stores the elements of a sequence as the values of a Dictionary, keyed by the result of the given closure. + +```swift +let fruits = ["Apple", "Banana", "Cherry"] +let fruitByLetter = fruits.keyed(by: { $0.first! }) +// Results in: +// [ +// "A": "Apple", +// "B": "Banana", +// "C": "Cherry", +// ] +``` + +Duplicate keys will trigger a runtime error by default. To handle this, you can provide a closure which specifies which value to keep: + +```swift +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] +let fruitsByLetter = fruits.keyed( + by: { $0.first! }, + uniquingKeysWith: { old, new in new } // Always pick the latest fruit +) +// Results in: +// [ +// "A": "Avocado", +// "B": "Blackberry"], +// "C": "Coconut", +// ] +``` + +## Detailed Design + +The `keyed(by:)` method is declared as a `Sequence` extension returning `[Key: Element]`. + +```swift +extension Sequence { + public func keyed( + by keyForValue: (Element) throws -> Key, + uniquingKeysWith combine: ((Element, Element) throws -> Element)? = nil + ) rethrows -> [Key: Element] +} +``` + +### Complexity + +Calling `keyed(by:)` is an O(_n_) operation. + +### Comparison with other languages + +| Language | "Keying" API | +|---------------|-------------| +| Java | [`toMap`](https://docs.oracle.com/en/java/javase/20/docs/api/java.base/java/util/stream/Collectors.html#toMap(java.util.function.Function,java.util.function.Function)) | +| Kotlin | [`associatedBy`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/associate-by.html) | +| C# | [`ToDictionary`](https://learn.microsoft.com/en-us/dotnet/api/system.linq.enumerable.todictionary?view=net-7.0#system-linq-enumerable-todictionary) | +| Ruby (ActiveSupport) | [`index_by`](https://rubydoc.info/gems/activesupport/7.0.5/Enumerable#index_by-instance_method) | +| PHP (Laravel) | [`keyBy`](https://laravel.com/docs/10.x/collections#method-keyby) | + +#### Rejected alternative names + +1. Java's `toMap` is referring to `Map`/`HashMap`, their naming for Dictionaries and other associative collections. It's easy to confuse with the transformation function, `Sequence.map(_:)`. +2. C#'s `toXXX()` naming doesn't suite Swift well, which tends to prefer `Foo.init` over `toFoo()` methods. +3. Ruby's `index_by` naming doesn't fit Swift well, where "index" is a specific term (e.g. the `associatedtype Index` on `Collection`). There is also a [`index(by:)`](Index.md) method in swift-algorithms, is specifically to do with matching elements up with their indices, and not any arbitrary derived value. + +#### Alternative names + +Kotlin's `associatedBy` naming is a good alterative, and matches the past tense of [Swift's API Design Guidelines](https://www.swift.org/documentation/api-design-guidelines/), though perhaps we'd spell it `associated(by:)`. + +#### Customization points + +Java and C# are interesting in that they provide overloads that let you customize the type of the outermost collection. E.g. using an `OrderedDictionary` instead of the default (hashed, unordered) `Dictionary`. diff --git a/README.md b/README.md index 270055fb..059e51f0 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,10 @@ Read more about the package, and the intent behind it, in the [announcement on s - [`adjacentPairs()`](https://github.com/apple/swift-algorithms/blob/main/Guides/AdjacentPairs.md): Lazily iterates over tuples of adjacent elements. - [`chunked(by:)`, `chunked(on:)`, `chunks(ofCount:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chunked.md): Eager and lazy operations that break a collection into chunks based on either a binary predicate or when the result of a projection changes or chunks of a given count. - [`firstNonNil(_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/FirstNonNil.md): Returns the first non-`nil` result from transforming a sequence's elements. +- [`grouped(by:)](https://github.com/apple/swift-algorithms/blob/main/Guides/Grouped.md): Group up elements using the given closure, returning a Dictionary of those groups, keyed by the results of the closure. - [`indexed()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Indexed.md): Iterate over tuples of a collection's indices and elements. - [`interspersed(with:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Intersperse.md): Place a value between every two elements of a sequence. +- [`keyed(by:)`, `keyed(by:uniquingKeysBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Keyed.md): Returns a Dictionary that associates elements of a sequence with the keys returned by the given closure. - [`partitioningIndex(where:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Partition.md): Returns the starting index of the partition of a collection that matches a predicate. - [`reductions(_:)`, `reductions(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Reductions.md): Returns all the intermediate states of reducing the elements of a sequence or collection. - [`split(maxSplits:omittingEmptySubsequences:whereSeparator)`, `split(separator:maxSplits:omittingEmptySubsequences)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Split.md): Lazy versions of the Standard Library's eager operations that split sequences and collections into subsequences separated by the specified separator element. diff --git a/Sources/Algorithms/Grouped.swift b/Sources/Algorithms/Grouped.swift new file mode 100644 index 00000000..34d284da --- /dev/null +++ b/Sources/Algorithms/Grouped.swift @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2021 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Groups up elements of `self` into a new Dictionary, + /// whose values are Arrays of grouped elements, + /// each keyed by the group key returned by the given closure. + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in + /// `self`. + /// - Returns: A dictionary containing grouped elements of self, keyed by + /// the keys derived by the `keyForValue` closure. + @inlinable + public func grouped(by keyForValue: (Element) throws -> GroupKey) rethrows -> [GroupKey: [Element]] { + try Dictionary(grouping: self, by: keyForValue) + } +} diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift new file mode 100644 index 00000000..f4d45750 --- /dev/null +++ b/Sources/Algorithms/Keyed.swift @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Creates a new Dictionary from the elements of `self`, keyed by the + /// results returned by the given `keyForValue` closure. As the dictionary is + /// built, the initializer calls the `combine` closure with the current and + /// new values for any duplicate keys. Pass a closure as `combine` that + /// returns the value to use in the resulting dictionary: The closure can + /// choose between the two values, combine them to produce a new value, or + /// even throw an error. + /// + /// If no `combine` closure is provided, deriving the same duplicate key for + /// more than one element of self results in a runtime error. + /// + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in + /// `self`. + /// - combine: A closure that is called with the values for any duplicate + /// keys that are encountered. The closure returns the desired value for + /// the final dictionary. + @inlinable + public func keyed( + by keyForValue: (Element) throws -> Key, + // TODO: pass `Key` into `combine`: (Key, Element, Element) throws -> Element + uniquingKeysWith combine: ((Element, Element) throws -> Element)? = nil + ) rethrows -> [Key: Element] { + // Note: This implementation is a bit convoluted, but it's just aiming to reuse the existing stdlib logic, + // to ensure consistent behaviour, error messages, etc. + // If this API ends up in the stdlib itself, it could just call the underlying `_NativeDictionary` methods. + try withoutActuallyEscaping(keyForValue) { keyForValue in + if let combine { + return try Dictionary(self.lazy.map { (try keyForValue($0), $0) }, uniquingKeysWith: combine) + } else { + return try Dictionary(uniqueKeysWithValues: self.lazy.map { (try keyForValue($0), $0) } ) + } + } + } +} diff --git a/Tests/SwiftAlgorithmsTests/GroupedTests.swift b/Tests/SwiftAlgorithmsTests/GroupedTests.swift new file mode 100644 index 00000000..579016c4 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/GroupedTests.swift @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class GroupedTests: XCTestCase { + private class SampleError: Error {} + + // Based on https://github.com/apple/swift/blob/4d1d8a9de5ebc132a17aee9fc267461facf89bf8/validation-test/stdlib/Dictionary.swift#L1974-L1988 + + func testGroupedBy() { + let r = 0..<10 + + let d1 = r.grouped(by: { $0 % 3 }) + XCTAssertEqual(3, d1.count) + XCTAssertEqual(d1[0]!, [0, 3, 6, 9]) + XCTAssertEqual(d1[1]!, [1, 4, 7]) + XCTAssertEqual(d1[2]!, [2, 5, 8]) + + let d2 = r.grouped(by: { $0 }) + XCTAssertEqual(10, d2.count) + + let d3 = (0..<0).grouped(by: { $0 }) + XCTAssertEqual(0, d3.count) + } + + func testThrowingFromKeyFunction() { + let input = ["Apple", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.grouped(by: { (_: String) -> Character in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } +} diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift new file mode 100644 index 00000000..68dbd94e --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class KeyedTests: XCTestCase { + private class SampleError: Error {} + + func testUniqueKeys() { + let d = ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Apple") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry") + XCTAssertNil(d["D"]) + } + + func testEmpty() { + let d = EmptyCollection().keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 0) + } + + func testNonUniqueKeys() throws { + throw XCTSkip(""" + TODO: What's the XCTest equivalent to `expectCrashLater()`? + + https://github.com/apple/swift/blob/4d1d8a9de5ebc132a17aee9fc267461facf89bf8/validation-test/stdlib/Dictionary.swift#L1914 + """) + } + + func testNonUniqueKeysWithMergeFunction() { + let d = ["Apple", "Avocado", "Banana", "Cherry", "Coconut"].keyed( + by: { $0.first! }, + uniquingKeysWith: { older, newer in "\(older)-\(newer)"} + ) + + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Apple-Avocado") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry-Coconut") + XCTAssertNil(d["D"]) + } + + func testThrowingFromKeyFunction() { + let input = ["Apple", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.keyed(by: { (_: String) -> Character in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } + + func testThrowingFromCombineFunction() { + let input = ["Apple", "Avocado", "Banana", "Cherry"] + let error = SampleError() + + XCTAssertThrowsError( + try input.keyed(by: { $0.first! }, uniquingKeysWith: { _, _ in throw error }) + ) { thrownError in + XCTAssertIdentical(error, thrownError as? SampleError) + } + } +} From f3b1dd82dda50b9598d17e6f3527604fd9826d58 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Wed, 21 Jun 2023 16:30:54 -0400 Subject: [PATCH 2/5] Add `Key` to `combine` closure --- Guides/Keyed.md | 4 +- Sources/Algorithms/Keyed.swift | 43 ++++++++++++++++----- Tests/SwiftAlgorithmsTests/KeyedTests.swift | 18 ++++++++- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/Guides/Keyed.md b/Guides/Keyed.md index b9fd022e..11bedebf 100644 --- a/Guides/Keyed.md +++ b/Guides/Keyed.md @@ -22,7 +22,7 @@ Duplicate keys will trigger a runtime error by default. To handle this, you can let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] let fruitsByLetter = fruits.keyed( by: { $0.first! }, - uniquingKeysWith: { old, new in new } // Always pick the latest fruit + uniquingKeysWith: { key, old, new in new } // Always pick the latest fruit ) // Results in: // [ @@ -40,7 +40,7 @@ The `keyed(by:)` method is declared as a `Sequence` extension returning `[Key: E extension Sequence { public func keyed( by keyForValue: (Element) throws -> Key, - uniquingKeysWith combine: ((Element, Element) throws -> Element)? = nil + uniquingKeysWith combine: ((Key, Element, Element) throws -> Element)? = nil ) rethrows -> [Key: Element] } ``` diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift index f4d45750..40a76109 100644 --- a/Sources/Algorithms/Keyed.swift +++ b/Sources/Algorithms/Keyed.swift @@ -30,18 +30,41 @@ extension Sequence { @inlinable public func keyed( by keyForValue: (Element) throws -> Key, - // TODO: pass `Key` into `combine`: (Key, Element, Element) throws -> Element - uniquingKeysWith combine: ((Element, Element) throws -> Element)? = nil + uniquingKeysWith combine: ((Key, Element, Element) throws -> Element)? = nil ) rethrows -> [Key: Element] { - // Note: This implementation is a bit convoluted, but it's just aiming to reuse the existing stdlib logic, - // to ensure consistent behaviour, error messages, etc. - // If this API ends up in the stdlib itself, it could just call the underlying `_NativeDictionary` methods. - try withoutActuallyEscaping(keyForValue) { keyForValue in - if let combine { - return try Dictionary(self.lazy.map { (try keyForValue($0), $0) }, uniquingKeysWith: combine) - } else { - return try Dictionary(uniqueKeysWithValues: self.lazy.map { (try keyForValue($0), $0) } ) + var result = [Key: Element]() + + if combine != nil { + // We have a `combine` closure. Use it to resolve duplicate keys. + + for element in self { + let key = try keyForValue(element) + + if let oldValue = result.updateValue(element, forKey: key) { + // Can't use a conditional binding to unwrap this, because the newly bound variable + // doesn't play nice with the `rethrows` system. + let valueToKeep = try combine!(key, oldValue, element) + + // This causes a second look-up for the same key. The standard library can avoid that + // by calling `mutatingFind` to get access to the bucket where the value will end up, + // and updating in place. + // Swift Algorithms doesn't have access to that API, so we make due. + // When this gets merged into the standard library, we should optimize this. + result[key] = valueToKeep + } + } + } else { + // There's no `combine` closure. Duplicate keys are disallowed. + + for element in self { + let key = try keyForValue(element) + + guard result.updateValue(element, forKey: key) == nil else { + fatalError("Duplicate values for key: '\(key)'") + } } } + + return result } } diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift index 68dbd94e..0ba39ff5 100644 --- a/Tests/SwiftAlgorithmsTests/KeyedTests.swift +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -38,9 +38,18 @@ final class KeyedTests: XCTestCase { } func testNonUniqueKeysWithMergeFunction() { + var combineCallHistory = [(key: Character, current: String, new: String)]() + let expectedCallHistory = [ + (key: "A", current: "Apple", new: "Avocado"), + (key: "C", current: "Cherry", new: "Coconut"), + ] + let d = ["Apple", "Avocado", "Banana", "Cherry", "Coconut"].keyed( by: { $0.first! }, - uniquingKeysWith: { older, newer in "\(older)-\(newer)"} + uniquingKeysWith: { key, older, newer in + combineCallHistory.append((key, older, newer)) + return "\(older)-\(newer)" + } ) XCTAssertEqual(d.count, 3) @@ -48,6 +57,11 @@ final class KeyedTests: XCTestCase { XCTAssertEqual(d["B"]!, "Banana") XCTAssertEqual(d["C"]!, "Cherry-Coconut") XCTAssertNil(d["D"]) + + XCTAssertEqual( + combineCallHistory.map(String.init(describing:)), // quick/dirty workaround: tuples aren't Equatable + expectedCallHistory.map(String.init(describing:)) + ) } func testThrowingFromKeyFunction() { @@ -66,7 +80,7 @@ final class KeyedTests: XCTestCase { let error = SampleError() XCTAssertThrowsError( - try input.keyed(by: { $0.first! }, uniquingKeysWith: { _, _ in throw error }) + try input.keyed(by: { $0.first! }, uniquingKeysWith: { _, _, _ in throw error }) ) { thrownError in XCTAssertIdentical(error, thrownError as? SampleError) } From 8c7df22f9e8db0782be8e5bd2458d87e3360678d Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Thu, 6 Jul 2023 21:40:21 -0400 Subject: [PATCH 3/5] Split `keyed(by:)` into two overloads --- Guides/Keyed.md | 10 ++- Sources/Algorithms/Keyed.swift | 82 +++++++++++++-------- Tests/SwiftAlgorithmsTests/KeyedTests.swift | 17 +++-- 3 files changed, 67 insertions(+), 42 deletions(-) diff --git a/Guides/Keyed.md b/Guides/Keyed.md index 11bedebf..93ab8d8e 100644 --- a/Guides/Keyed.md +++ b/Guides/Keyed.md @@ -7,7 +7,7 @@ Stores the elements of a sequence as the values of a Dictionary, keyed by the re ```swift let fruits = ["Apple", "Banana", "Cherry"] -let fruitByLetter = fruits.keyed(by: { $0.first! }) +let fruitByLetter = try! fruits.keyed(by: { $0.first! }) // Results in: // [ // "A": "Apple", @@ -16,7 +16,7 @@ let fruitByLetter = fruits.keyed(by: { $0.first! }) // ] ``` -Duplicate keys will trigger a runtime error by default. To handle this, you can provide a closure which specifies which value to keep: +Duplicate keys throw an error by default. Alternatively, you can provide a closure which specifies which value to keep: ```swift let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] @@ -34,10 +34,14 @@ let fruitsByLetter = fruits.keyed( ## Detailed Design -The `keyed(by:)` method is declared as a `Sequence` extension returning `[Key: Element]`. +The `keyed(by:)` and `keyed(by:uniquingKeysWith:)` methods are declared in an `Sequence` extension, both returning `[Key: Element]`. ```swift extension Sequence { + public func keyed( + by keyForValue: (Element) throws -> Key + ) throws -> [Key: Element] + public func keyed( by keyForValue: (Element) throws -> Key, uniquingKeysWith combine: ((Key, Element, Element) throws -> Element)? = nil diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift index 40a76109..6f54e9ef 100644 --- a/Sources/Algorithms/Keyed.swift +++ b/Sources/Algorithms/Keyed.swift @@ -9,7 +9,45 @@ // //===----------------------------------------------------------------------===// +public struct KeysAreNotUnique: Error { + public let key: Key + public let previousElement: Element + public let conflictingElement: Element + + @inlinable + public init(key: Key, previousElement: Element, conflictingElement: Element) { + self.key = key + self.previousElement = previousElement + self.conflictingElement = conflictingElement + } +} + extension Sequence { + /// Creates a new Dictionary from the elements of `self`, keyed by the + /// results returned by the given `keyForValue` closure. Deriving the + /// same duplicate key for more than one element of `self` will cause + /// an error to be thrown. + /// + /// - Parameters: + /// - keyForValue: A closure that returns a key for each element in `self`. + /// - Throws: `KeysAreNotUnique ` if two values map to the same key (via `keyForValue`). + @inlinable + public func keyed( + by keyForValue: (Element) throws -> Key + ) throws -> [Key: Element] { + var result = [Key: Element]() + + for element in self { + let key = try keyForValue(element) + + if let previousElement = result.updateValue(element, forKey: key) { + throw KeysAreNotUnique(key: key, previousElement: previousElement, conflictingElement: element) + } + } + + return result + } + /// Creates a new Dictionary from the elements of `self`, keyed by the /// results returned by the given `keyForValue` closure. As the dictionary is /// built, the initializer calls the `combine` closure with the current and @@ -18,50 +56,30 @@ extension Sequence { /// choose between the two values, combine them to produce a new value, or /// even throw an error. /// - /// If no `combine` closure is provided, deriving the same duplicate key for - /// more than one element of self results in a runtime error. - /// /// - Parameters: - /// - keyForValue: A closure that returns a key for each element in - /// `self`. + /// - keyForValue: A closure that returns a key for each element in `self`. /// - combine: A closure that is called with the values for any duplicate /// keys that are encountered. The closure returns the desired value for /// the final dictionary. @inlinable public func keyed( by keyForValue: (Element) throws -> Key, - uniquingKeysWith combine: ((Key, Element, Element) throws -> Element)? = nil + uniquingKeysWith combine: (Key, Element, Element) throws -> Element ) rethrows -> [Key: Element] { var result = [Key: Element]() - if combine != nil { - // We have a `combine` closure. Use it to resolve duplicate keys. - - for element in self { - let key = try keyForValue(element) - - if let oldValue = result.updateValue(element, forKey: key) { - // Can't use a conditional binding to unwrap this, because the newly bound variable - // doesn't play nice with the `rethrows` system. - let valueToKeep = try combine!(key, oldValue, element) - - // This causes a second look-up for the same key. The standard library can avoid that - // by calling `mutatingFind` to get access to the bucket where the value will end up, - // and updating in place. - // Swift Algorithms doesn't have access to that API, so we make due. - // When this gets merged into the standard library, we should optimize this. - result[key] = valueToKeep - } - } - } else { - // There's no `combine` closure. Duplicate keys are disallowed. + for element in self { + let key = try keyForValue(element) - for element in self { - let key = try keyForValue(element) + if let oldValue = result.updateValue(element, forKey: key) { + let valueToKeep = try combine(key, oldValue, element) - guard result.updateValue(element, forKey: key) == nil else { - fatalError("Duplicate values for key: '\(key)'") - } + // This causes a second look-up for the same key. The standard library can avoid that + // by calling `mutatingFind` to get access to the bucket where the value will end up, + // and updating in place. + // Swift Algorithms doesn't have access to that API, so we make due. + // When this gets merged into the standard library, we should optimize this. + result[key] = valueToKeep } } diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift index 0ba39ff5..6c740649 100644 --- a/Tests/SwiftAlgorithmsTests/KeyedTests.swift +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -16,7 +16,7 @@ final class KeyedTests: XCTestCase { private class SampleError: Error {} func testUniqueKeys() { - let d = ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) + let d = try! ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) XCTAssertEqual(d.count, 3) XCTAssertEqual(d["A"]!, "Apple") XCTAssertEqual(d["B"]!, "Banana") @@ -25,16 +25,19 @@ final class KeyedTests: XCTestCase { } func testEmpty() { - let d = EmptyCollection().keyed(by: { $0.first! }) + let d = try! EmptyCollection().keyed(by: { $0.first! }) XCTAssertEqual(d.count, 0) } func testNonUniqueKeys() throws { - throw XCTSkip(""" - TODO: What's the XCTest equivalent to `expectCrashLater()`? - - https://github.com/apple/swift/blob/4d1d8a9de5ebc132a17aee9fc267461facf89bf8/validation-test/stdlib/Dictionary.swift#L1914 - """) + XCTAssertThrowsError( + try ["Apple", "Avocado", "Banana", "Cherry"].keyed(by: { $0.first! }) + ) { thrownError in + let e = thrownError as! KeysAreNotUnique + XCTAssertEqual(e.key, "A") + XCTAssertEqual(e.previousElement, "Apple") + XCTAssertEqual(e.conflictingElement, "Avocado") + } } func testNonUniqueKeysWithMergeFunction() { From 4ca655136812c6672599db99b3fb93285c6ce803 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Thu, 6 Jul 2023 21:49:16 -0400 Subject: [PATCH 4/5] =?UTF-8?q?Rename=20uniquingKeysWith=20=E2=86=92=20res?= =?UTF-8?q?olvingConflictsWith?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Guides/Keyed.md | 6 ++--- README.md | 2 +- Sources/Algorithms/Keyed.swift | 26 +++++++-------------- Tests/SwiftAlgorithmsTests/KeyedTests.swift | 10 ++++---- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/Guides/Keyed.md b/Guides/Keyed.md index 93ab8d8e..d51393f4 100644 --- a/Guides/Keyed.md +++ b/Guides/Keyed.md @@ -22,7 +22,7 @@ Duplicate keys throw an error by default. Alternatively, you can provide a closu let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] let fruitsByLetter = fruits.keyed( by: { $0.first! }, - uniquingKeysWith: { key, old, new in new } // Always pick the latest fruit + resolvingConflictsWith: { key, old, new in new } // Always pick the latest fruit ) // Results in: // [ @@ -34,7 +34,7 @@ let fruitsByLetter = fruits.keyed( ## Detailed Design -The `keyed(by:)` and `keyed(by:uniquingKeysWith:)` methods are declared in an `Sequence` extension, both returning `[Key: Element]`. +The `keyed(by:)` and `keyed(by:resolvingConflictsWith:)` methods are declared in an `Sequence` extension, both returning `[Key: Element]`. ```swift extension Sequence { @@ -44,7 +44,7 @@ extension Sequence { public func keyed( by keyForValue: (Element) throws -> Key, - uniquingKeysWith combine: ((Key, Element, Element) throws -> Element)? = nil + resolvingConflictsWith resolve: ((Key, Element, Element) throws -> Element)? = nil ) rethrows -> [Key: Element] } ``` diff --git a/README.md b/README.md index 059e51f0..4218cdf9 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Read more about the package, and the intent behind it, in the [announcement on s - [`grouped(by:)](https://github.com/apple/swift-algorithms/blob/main/Guides/Grouped.md): Group up elements using the given closure, returning a Dictionary of those groups, keyed by the results of the closure. - [`indexed()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Indexed.md): Iterate over tuples of a collection's indices and elements. - [`interspersed(with:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Intersperse.md): Place a value between every two elements of a sequence. -- [`keyed(by:)`, `keyed(by:uniquingKeysBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Keyed.md): Returns a Dictionary that associates elements of a sequence with the keys returned by the given closure. +- [`keyed(by:)`, `keyed(by:resolvingConflictsBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Keyed.md): Returns a Dictionary that associates elements of a sequence with the keys returned by the given closure. - [`partitioningIndex(where:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Partition.md): Returns the starting index of the partition of a collection that matches a predicate. - [`reductions(_:)`, `reductions(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Reductions.md): Returns all the intermediate states of reducing the elements of a sequence or collection. - [`split(maxSplits:omittingEmptySubsequences:whereSeparator)`, `split(separator:maxSplits:omittingEmptySubsequences)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Split.md): Lazy versions of the Standard Library's eager operations that split sequences and collections into subsequences separated by the specified separator element. diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift index 6f54e9ef..5d908297 100644 --- a/Sources/Algorithms/Keyed.swift +++ b/Sources/Algorithms/Keyed.swift @@ -35,36 +35,28 @@ extension Sequence { public func keyed( by keyForValue: (Element) throws -> Key ) throws -> [Key: Element] { - var result = [Key: Element]() - - for element in self { - let key = try keyForValue(element) - - if let previousElement = result.updateValue(element, forKey: key) { - throw KeysAreNotUnique(key: key, previousElement: previousElement, conflictingElement: element) - } - } - - return result + try self.keyed(by: keyForValue, resolvingConflictsWith: { + throw KeysAreNotUnique(key: $0, previousElement: $1, conflictingElement: $2) + }) } /// Creates a new Dictionary from the elements of `self`, keyed by the /// results returned by the given `keyForValue` closure. As the dictionary is - /// built, the initializer calls the `combine` closure with the current and - /// new values for any duplicate keys. Pass a closure as `combine` that + /// built, the initializer calls the `resolve` closure with the current and + /// new values for any duplicate keys. Pass a closure as `resolve` that /// returns the value to use in the resulting dictionary: The closure can /// choose between the two values, combine them to produce a new value, or /// even throw an error. /// /// - Parameters: /// - keyForValue: A closure that returns a key for each element in `self`. - /// - combine: A closure that is called with the values for any duplicate + /// - resolve: A closure that is called with the values for any duplicate /// keys that are encountered. The closure returns the desired value for /// the final dictionary. @inlinable public func keyed( by keyForValue: (Element) throws -> Key, - uniquingKeysWith combine: (Key, Element, Element) throws -> Element + resolvingConflictsWith resolve: (Key, Element, Element) throws -> Element ) rethrows -> [Key: Element] { var result = [Key: Element]() @@ -72,12 +64,12 @@ extension Sequence { let key = try keyForValue(element) if let oldValue = result.updateValue(element, forKey: key) { - let valueToKeep = try combine(key, oldValue, element) + let valueToKeep = try resolve(key, oldValue, element) // This causes a second look-up for the same key. The standard library can avoid that // by calling `mutatingFind` to get access to the bucket where the value will end up, // and updating in place. - // Swift Algorithms doesn't have access to that API, so we make due. + // Swift Algorithms doesn't have access to that API, so we make do. // When this gets merged into the standard library, we should optimize this. result[key] = valueToKeep } diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift index 6c740649..04397f7a 100644 --- a/Tests/SwiftAlgorithmsTests/KeyedTests.swift +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -41,7 +41,7 @@ final class KeyedTests: XCTestCase { } func testNonUniqueKeysWithMergeFunction() { - var combineCallHistory = [(key: Character, current: String, new: String)]() + var resolveCallHistory = [(key: Character, current: String, new: String)]() let expectedCallHistory = [ (key: "A", current: "Apple", new: "Avocado"), (key: "C", current: "Cherry", new: "Coconut"), @@ -49,8 +49,8 @@ final class KeyedTests: XCTestCase { let d = ["Apple", "Avocado", "Banana", "Cherry", "Coconut"].keyed( by: { $0.first! }, - uniquingKeysWith: { key, older, newer in - combineCallHistory.append((key, older, newer)) + resolvingConflictsWith: { key, older, newer in + resolveCallHistory.append((key, older, newer)) return "\(older)-\(newer)" } ) @@ -62,7 +62,7 @@ final class KeyedTests: XCTestCase { XCTAssertNil(d["D"]) XCTAssertEqual( - combineCallHistory.map(String.init(describing:)), // quick/dirty workaround: tuples aren't Equatable + resolveCallHistory.map(String.init(describing:)), // quick/dirty workaround: tuples aren't Equatable expectedCallHistory.map(String.init(describing:)) ) } @@ -83,7 +83,7 @@ final class KeyedTests: XCTestCase { let error = SampleError() XCTAssertThrowsError( - try input.keyed(by: { $0.first! }, uniquingKeysWith: { _, _, _ in throw error }) + try input.keyed(by: { $0.first! }, resolvingConflictsWith: { _, _, _ in throw error }) ) { thrownError in XCTAssertIdentical(error, thrownError as? SampleError) } From 1a707601e1ce5f6396bb45411381e3f32c9b72a0 Mon Sep 17 00:00:00 2001 From: Alexander Momchilov Date: Sun, 5 Nov 2023 11:44:27 -0500 Subject: [PATCH 5/5] Keep latest instead of throwing on collision --- Guides/Keyed.md | 22 ++++++++--------- Sources/Algorithms/Keyed.swift | 27 +++++---------------- Tests/SwiftAlgorithmsTests/KeyedTests.swift | 17 ++++++------- 3 files changed, 24 insertions(+), 42 deletions(-) mode change 100644 => 100755 Sources/Algorithms/Keyed.swift diff --git a/Guides/Keyed.md b/Guides/Keyed.md index d51393f4..b2b72b41 100644 --- a/Guides/Keyed.md +++ b/Guides/Keyed.md @@ -6,29 +6,29 @@ Stores the elements of a sequence as the values of a Dictionary, keyed by the result of the given closure. ```swift -let fruits = ["Apple", "Banana", "Cherry"] -let fruitByLetter = try! fruits.keyed(by: { $0.first! }) +let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] +let fruitByLetter = fruits.keyed(by: { $0.first! }) // Results in: // [ -// "A": "Apple", -// "B": "Banana", -// "C": "Cherry", +// "A": "Avocado", +// "B": "Blackberry", +// "C": "Coconut", // ] ``` -Duplicate keys throw an error by default. Alternatively, you can provide a closure which specifies which value to keep: +On a key-collision, the latest element is kept by default. Alternatively, you can provide a closure which specifies which value to keep: ```swift let fruits = ["Apricot", "Banana", "Apple", "Cherry", "Blackberry", "Avocado", "Coconut"] let fruitsByLetter = fruits.keyed( by: { $0.first! }, - resolvingConflictsWith: { key, old, new in new } // Always pick the latest fruit + resolvingConflictsWith: { key, old, new in old } // Always pick the first fruit ) // Results in: // [ -// "A": "Avocado", -// "B": "Blackberry"], -// "C": "Coconut", +// "A": "Apricot", +// "B": "Banana", +// "C": "Cherry", // ] ``` @@ -40,7 +40,7 @@ The `keyed(by:)` and `keyed(by:resolvingConflictsWith:)` methods are declared in extension Sequence { public func keyed( by keyForValue: (Element) throws -> Key - ) throws -> [Key: Element] + ) rethrows -> [Key: Element] public func keyed( by keyForValue: (Element) throws -> Key, diff --git a/Sources/Algorithms/Keyed.swift b/Sources/Algorithms/Keyed.swift old mode 100644 new mode 100755 index 5d908297..0446ef69 --- a/Sources/Algorithms/Keyed.swift +++ b/Sources/Algorithms/Keyed.swift @@ -9,35 +9,20 @@ // //===----------------------------------------------------------------------===// -public struct KeysAreNotUnique: Error { - public let key: Key - public let previousElement: Element - public let conflictingElement: Element - - @inlinable - public init(key: Key, previousElement: Element, conflictingElement: Element) { - self.key = key - self.previousElement = previousElement - self.conflictingElement = conflictingElement - } -} - extension Sequence { /// Creates a new Dictionary from the elements of `self`, keyed by the - /// results returned by the given `keyForValue` closure. Deriving the - /// same duplicate key for more than one element of `self` will cause - /// an error to be thrown. + /// results returned by the given `keyForValue` closure. + /// + /// If the key derived for a new element collides with an existing key from a previous element, + /// the latest value will be kept. /// /// - Parameters: /// - keyForValue: A closure that returns a key for each element in `self`. - /// - Throws: `KeysAreNotUnique ` if two values map to the same key (via `keyForValue`). @inlinable public func keyed( by keyForValue: (Element) throws -> Key - ) throws -> [Key: Element] { - try self.keyed(by: keyForValue, resolvingConflictsWith: { - throw KeysAreNotUnique(key: $0, previousElement: $1, conflictingElement: $2) - }) + ) rethrows -> [Key: Element] { + return try self.keyed(by: keyForValue, resolvingConflictsWith: { _, old, new in new }) } /// Creates a new Dictionary from the elements of `self`, keyed by the diff --git a/Tests/SwiftAlgorithmsTests/KeyedTests.swift b/Tests/SwiftAlgorithmsTests/KeyedTests.swift index 04397f7a..53b1c989 100644 --- a/Tests/SwiftAlgorithmsTests/KeyedTests.swift +++ b/Tests/SwiftAlgorithmsTests/KeyedTests.swift @@ -16,7 +16,7 @@ final class KeyedTests: XCTestCase { private class SampleError: Error {} func testUniqueKeys() { - let d = try! ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) + let d = ["Apple", "Banana", "Cherry"].keyed(by: { $0.first! }) XCTAssertEqual(d.count, 3) XCTAssertEqual(d["A"]!, "Apple") XCTAssertEqual(d["B"]!, "Banana") @@ -25,19 +25,16 @@ final class KeyedTests: XCTestCase { } func testEmpty() { - let d = try! EmptyCollection().keyed(by: { $0.first! }) + let d = EmptyCollection().keyed(by: { $0.first! }) XCTAssertEqual(d.count, 0) } func testNonUniqueKeys() throws { - XCTAssertThrowsError( - try ["Apple", "Avocado", "Banana", "Cherry"].keyed(by: { $0.first! }) - ) { thrownError in - let e = thrownError as! KeysAreNotUnique - XCTAssertEqual(e.key, "A") - XCTAssertEqual(e.previousElement, "Apple") - XCTAssertEqual(e.conflictingElement, "Avocado") - } + let d = ["Apple", "Avocado", "Banana", "Cherry"].keyed(by: { $0.first! }) + XCTAssertEqual(d.count, 3) + XCTAssertEqual(d["A"]!, "Avocado", "On a key-collision, keyed(by:) should take the latest value.") + XCTAssertEqual(d["B"]!, "Banana") + XCTAssertEqual(d["C"]!, "Cherry") } func testNonUniqueKeysWithMergeFunction() {