From 9c3010fe2950b02231c2a0ccf3130cabbf443c68 Mon Sep 17 00:00:00 2001 From: Justin Mazzocchi <2831158+jzzocc@users.noreply.github.com> Date: Sat, 5 Sep 2020 17:28:07 -0700 Subject: [PATCH] Refactoring --- .../CodableBloomFilter/BloomFilter.swift | 45 +++---------------- .../DeterministicHasher.swift | 36 +++++++++++++++ .../DeterministicallyHashable.swift | 2 +- .../Data+DeterministicallyHashable.swift | 2 +- .../String+DeterministicallyHashable.swift | 2 +- .../CodableBloomFilterTests.swift | 2 +- 6 files changed, 47 insertions(+), 42 deletions(-) create mode 100644 CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift index de463e0..c811b0d 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift @@ -7,24 +7,19 @@ import Foundation // This implementation uses deterministic hashing functions so it can be serialized / deserialized public struct BloomFilter { - public let hashes: [Hash] + public let hashers: [DeterministicHasher] public let bits: Int private var data: BitArray - public init(hashes: [Hash], bits: Int) { - self.hashes = hashes + public init(hashes: [DeterministicHasher], bits: Int) { + self.hashers = hashes self.bits = bits data = BitArray(count: bits) } } public extension BloomFilter { - enum Hash: String, Codable { - case djb2 - case sdbm - } - mutating func insert(_ newMember: T) { for index in indices(newMember) { data[index] = true @@ -38,7 +33,7 @@ public extension BloomFilter { extension BloomFilter: Codable { private enum CodingKeys: String, CodingKey { - case hashes + case hashers case bits case data } @@ -46,7 +41,7 @@ extension BloomFilter: Codable { public init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) - hashes = try container.decode([Hash].self, forKey: .hashes) + hashers = try container.decode([DeterministicHasher].self, forKey: .hashers) bits = try container.decode(Int.self, forKey: .bits) data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits) } @@ -54,7 +49,7 @@ extension BloomFilter: Codable { public func encode(to encoder: Encoder) throws { var container = encoder.container(keyedBy: CodingKeys.self) - try container.encode(hashes, forKey: .hashes) + try container.encode(hashers, forKey: .hashers) try container.encode(bits, forKey: .bits) try container.encode(data.data, forKey: .data) } @@ -62,32 +57,6 @@ extension BloomFilter: Codable { private extension BloomFilter { func indices(_ member: T) -> [Int] { - hashes.map { abs($0.apply(member)) % bits } - } -} - -// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 - -private extension BloomFilter.Hash { - func apply(_ member: T) -> Int { - Array(member.deterministicallyHashableData) - .map(Int.init) - .reduce(initial, then) - } - - var initial: Int { - switch self { - case .djb2: return 5381 - case .sdbm: return 0 - } - } - - func then(result: Int, next: Int) -> Int { - switch self { - case .djb2: - return (result << 5) &+ result &+ next - case .sdbm: - return next &+ (result << 6) &+ (result << 16) - result - } + hashers.map { abs($0.apply(member)) % bits } } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift new file mode 100644 index 0000000..0121b84 --- /dev/null +++ b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift @@ -0,0 +1,36 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +public enum DeterministicHasher: String, Codable { + case djb2 + case sdbm +} + +extension DeterministicHasher { + func apply(_ hashable: DeterministicallyHashable) -> Int { + Array(hashable.hashableData) + .map(Int.init) + .reduce(initial, then) + } +} + +// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 + +private extension DeterministicHasher { + var initial: Int { + switch self { + case .djb2: return 5381 + case .sdbm: return 0 + } + } + + func then(result: Int, next: Int) -> Int { + switch self { + case .djb2: + return (result << 5) &+ result &+ next + case .sdbm: + return next &+ (result << 6) &+ (result << 16) - result + } + } +} diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift index 0b4d598..33f42e5 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation public protocol DeterministicallyHashable { - var deterministicallyHashableData: Data { get } + var hashableData: Data { get } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift index 02e3450..39eebde 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation extension Data: DeterministicallyHashable { - public var deterministicallyHashableData: Data { self } + public var hashableData: Data { self } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift index 1e36b67..dfa7683 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation extension String: DeterministicallyHashable { - public var deterministicallyHashableData: Data { Data(utf8) } + public var hashableData: Data { Data(utf8) } } diff --git a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift index 4a75c05..5bf90bd 100644 --- a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift +++ b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift @@ -16,7 +16,7 @@ final class CodableBloomFilterTests: XCTestCase { func testCoding() throws { var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64) - let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) + let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashers":["djb2","sdbm"]}"#.utf8) sut.insert("lol") sut.insert("ok")