Refactoring

This commit is contained in:
Justin Mazzocchi 2020-09-05 17:28:07 -07:00
parent 414f979f94
commit 9c3010fe29
No known key found for this signature in database
GPG key ID: E223E6937AAFB01C
6 changed files with 47 additions and 42 deletions

View file

@ -7,24 +7,19 @@ import Foundation
// This implementation uses deterministic hashing functions so it can be serialized / deserialized // This implementation uses deterministic hashing functions so it can be serialized / deserialized
public struct BloomFilter<T: DeterministicallyHashable> { public struct BloomFilter<T: DeterministicallyHashable> {
public let hashes: [Hash] public let hashers: [DeterministicHasher]
public let bits: Int public let bits: Int
private var data: BitArray private var data: BitArray
public init(hashes: [Hash], bits: Int) { public init(hashes: [DeterministicHasher], bits: Int) {
self.hashes = hashes self.hashers = hashes
self.bits = bits self.bits = bits
data = BitArray(count: bits) data = BitArray(count: bits)
} }
} }
public extension BloomFilter { public extension BloomFilter {
enum Hash: String, Codable {
case djb2
case sdbm
}
mutating func insert(_ newMember: T) { mutating func insert(_ newMember: T) {
for index in indices(newMember) { for index in indices(newMember) {
data[index] = true data[index] = true
@ -38,7 +33,7 @@ public extension BloomFilter {
extension BloomFilter: Codable { extension BloomFilter: Codable {
private enum CodingKeys: String, CodingKey { private enum CodingKeys: String, CodingKey {
case hashes case hashers
case bits case bits
case data case data
} }
@ -46,7 +41,7 @@ extension BloomFilter: Codable {
public init(from decoder: Decoder) throws { public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self) let container = try decoder.container(keyedBy: CodingKeys.self)
hashes = try container.decode([Hash].self, forKey: .hashes) hashers = try container.decode([DeterministicHasher].self, forKey: .hashers)
bits = try container.decode(Int.self, forKey: .bits) bits = try container.decode(Int.self, forKey: .bits)
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits) data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
} }
@ -54,7 +49,7 @@ extension BloomFilter: Codable {
public func encode(to encoder: Encoder) throws { public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self) var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(hashes, forKey: .hashes) try container.encode(hashers, forKey: .hashers)
try container.encode(bits, forKey: .bits) try container.encode(bits, forKey: .bits)
try container.encode(data.data, forKey: .data) try container.encode(data.data, forKey: .data)
} }
@ -62,32 +57,6 @@ extension BloomFilter: Codable {
private extension BloomFilter { private extension BloomFilter {
func indices(_ member: T) -> [Int] { func indices(_ member: T) -> [Int] {
hashes.map { abs($0.apply(member)) % bits } hashers.map { abs($0.apply(member)) % bits }
}
}
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
private extension BloomFilter.Hash {
func apply(_ member: T) -> Int {
Array(member.deterministicallyHashableData)
.map(Int.init)
.reduce(initial, then)
}
var initial: Int {
switch self {
case .djb2: return 5381
case .sdbm: return 0
}
}
func then(result: Int, next: Int) -> Int {
switch self {
case .djb2:
return (result << 5) &+ result &+ next
case .sdbm:
return next &+ (result << 6) &+ (result << 16) - result
}
} }
} }

View file

@ -0,0 +1,36 @@
// Copyright © 2020 Metabolist. All rights reserved.
import Foundation
public enum DeterministicHasher: String, Codable {
case djb2
case sdbm
}
extension DeterministicHasher {
func apply(_ hashable: DeterministicallyHashable) -> Int {
Array(hashable.hashableData)
.map(Int.init)
.reduce(initial, then)
}
}
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
private extension DeterministicHasher {
var initial: Int {
switch self {
case .djb2: return 5381
case .sdbm: return 0
}
}
func then(result: Int, next: Int) -> Int {
switch self {
case .djb2:
return (result << 5) &+ result &+ next
case .sdbm:
return next &+ (result << 6) &+ (result << 16) - result
}
}
}

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
public protocol DeterministicallyHashable { public protocol DeterministicallyHashable {
var deterministicallyHashableData: Data { get } var hashableData: Data { get }
} }

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
extension Data: DeterministicallyHashable { extension Data: DeterministicallyHashable {
public var deterministicallyHashableData: Data { self } public var hashableData: Data { self }
} }

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
extension String: DeterministicallyHashable { extension String: DeterministicallyHashable {
public var deterministicallyHashableData: Data { Data(utf8) } public var hashableData: Data { Data(utf8) }
} }

View file

@ -16,7 +16,7 @@ final class CodableBloomFilterTests: XCTestCase {
func testCoding() throws { func testCoding() throws {
var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 64) var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 64)
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashers":["djb2","sdbm"]}"#.utf8)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")