mirror of
https://github.com/metabolist/metatext.git
synced 2024-11-22 08:10:59 +00:00
Refactoring
This commit is contained in:
parent
414f979f94
commit
9c3010fe29
6 changed files with 47 additions and 42 deletions
|
@ -7,24 +7,19 @@ import Foundation
|
|||
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
||||
|
||||
public struct BloomFilter<T: DeterministicallyHashable> {
|
||||
public let hashes: [Hash]
|
||||
public let hashers: [DeterministicHasher]
|
||||
public let bits: Int
|
||||
|
||||
private var data: BitArray
|
||||
|
||||
public init(hashes: [Hash], bits: Int) {
|
||||
self.hashes = hashes
|
||||
public init(hashes: [DeterministicHasher], bits: Int) {
|
||||
self.hashers = hashes
|
||||
self.bits = bits
|
||||
data = BitArray(count: bits)
|
||||
}
|
||||
}
|
||||
|
||||
public extension BloomFilter {
|
||||
enum Hash: String, Codable {
|
||||
case djb2
|
||||
case sdbm
|
||||
}
|
||||
|
||||
mutating func insert(_ newMember: T) {
|
||||
for index in indices(newMember) {
|
||||
data[index] = true
|
||||
|
@ -38,7 +33,7 @@ public extension BloomFilter {
|
|||
|
||||
extension BloomFilter: Codable {
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case hashes
|
||||
case hashers
|
||||
case bits
|
||||
case data
|
||||
}
|
||||
|
@ -46,7 +41,7 @@ extension BloomFilter: Codable {
|
|||
public init(from decoder: Decoder) throws {
|
||||
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||
|
||||
hashes = try container.decode([Hash].self, forKey: .hashes)
|
||||
hashers = try container.decode([DeterministicHasher].self, forKey: .hashers)
|
||||
bits = try container.decode(Int.self, forKey: .bits)
|
||||
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
|
||||
}
|
||||
|
@ -54,7 +49,7 @@ extension BloomFilter: Codable {
|
|||
public func encode(to encoder: Encoder) throws {
|
||||
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||
|
||||
try container.encode(hashes, forKey: .hashes)
|
||||
try container.encode(hashers, forKey: .hashers)
|
||||
try container.encode(bits, forKey: .bits)
|
||||
try container.encode(data.data, forKey: .data)
|
||||
}
|
||||
|
@ -62,32 +57,6 @@ extension BloomFilter: Codable {
|
|||
|
||||
private extension BloomFilter {
|
||||
func indices(_ member: T) -> [Int] {
|
||||
hashes.map { abs($0.apply(member)) % bits }
|
||||
}
|
||||
}
|
||||
|
||||
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
||||
|
||||
private extension BloomFilter.Hash {
|
||||
func apply(_ member: T) -> Int {
|
||||
Array(member.deterministicallyHashableData)
|
||||
.map(Int.init)
|
||||
.reduce(initial, then)
|
||||
}
|
||||
|
||||
var initial: Int {
|
||||
switch self {
|
||||
case .djb2: return 5381
|
||||
case .sdbm: return 0
|
||||
}
|
||||
}
|
||||
|
||||
func then(result: Int, next: Int) -> Int {
|
||||
switch self {
|
||||
case .djb2:
|
||||
return (result << 5) &+ result &+ next
|
||||
case .sdbm:
|
||||
return next &+ (result << 6) &+ (result << 16) - result
|
||||
}
|
||||
hashers.map { abs($0.apply(member)) % bits }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright © 2020 Metabolist. All rights reserved.
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum DeterministicHasher: String, Codable {
|
||||
case djb2
|
||||
case sdbm
|
||||
}
|
||||
|
||||
extension DeterministicHasher {
|
||||
func apply(_ hashable: DeterministicallyHashable) -> Int {
|
||||
Array(hashable.hashableData)
|
||||
.map(Int.init)
|
||||
.reduce(initial, then)
|
||||
}
|
||||
}
|
||||
|
||||
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
||||
|
||||
private extension DeterministicHasher {
|
||||
var initial: Int {
|
||||
switch self {
|
||||
case .djb2: return 5381
|
||||
case .sdbm: return 0
|
||||
}
|
||||
}
|
||||
|
||||
func then(result: Int, next: Int) -> Int {
|
||||
switch self {
|
||||
case .djb2:
|
||||
return (result << 5) &+ result &+ next
|
||||
case .sdbm:
|
||||
return next &+ (result << 6) &+ (result << 16) - result
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,5 +3,5 @@
|
|||
import Foundation
|
||||
|
||||
public protocol DeterministicallyHashable {
|
||||
var deterministicallyHashableData: Data { get }
|
||||
var hashableData: Data { get }
|
||||
}
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
import Foundation
|
||||
|
||||
extension Data: DeterministicallyHashable {
|
||||
public var deterministicallyHashableData: Data { self }
|
||||
public var hashableData: Data { self }
|
||||
}
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
import Foundation
|
||||
|
||||
extension String: DeterministicallyHashable {
|
||||
public var deterministicallyHashableData: Data { Data(utf8) }
|
||||
public var hashableData: Data { Data(utf8) }
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ final class CodableBloomFilterTests: XCTestCase {
|
|||
|
||||
func testCoding() throws {
|
||||
var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 64)
|
||||
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
|
||||
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashers":["djb2","sdbm"]}"#.utf8)
|
||||
|
||||
sut.insert("lol")
|
||||
sut.insert("ok")
|
||||
|
|
Loading…
Reference in a new issue