Refactoring

This commit is contained in:
Justin Mazzocchi 2020-09-07 23:49:58 -07:00
parent 17506f5bd9
commit 6e0dcd6398
No known key found for this signature in database
GPG key ID: E223E6937AAFB01C
9 changed files with 122 additions and 49 deletions

View file

@ -8,11 +8,7 @@ struct BitArray {
private var bytes: [UInt8] private var bytes: [UInt8]
init(byteCount: Int) { init(byteCount: Int) {
self.bytes = [UInt8](repeating: 0, count: byteCount) bytes = [UInt8](repeating: 0, count: byteCount)
}
init(data: Data) {
bytes = Array(data)
} }
} }
@ -40,9 +36,7 @@ extension BitArray {
extension BitArray: Codable { extension BitArray: Codable {
init(from decoder: Decoder) throws { init(from decoder: Decoder) throws {
let container = try decoder.singleValueContainer() bytes = Array(try decoder.singleValueContainer().decode(Data.self))
bytes = Array(try container.decode(Data.self))
} }
func encode(to encoder: Encoder) throws { func encode(to encoder: Encoder) throws {

View file

@ -6,14 +6,19 @@ import Foundation
// https://khanlou.com/2018/09/bloom-filters/ // https://khanlou.com/2018/09/bloom-filters/
// This implementation uses deterministic hashing functions so it can conform to Codable // This implementation uses deterministic hashing functions so it can conform to Codable
enum BloomFilterError: Error {
case noHashesProvided
}
public struct BloomFilter<T: DeterministicallyHashable>: Codable { public struct BloomFilter<T: DeterministicallyHashable>: Codable {
public let hashers: [DeterministicHasher] public let hashes: [Hash]
private var data: BitArray private var data: BitArray
public init(hashers: Set<DeterministicHasher>, byteCount: Int) { public init(hashes: Set<Hash>, byteCount: Int) throws {
// Sort the hashers for consistent decoding output guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided }
self.hashers = Array(hashers.sorted { $0.rawValue < $1.rawValue }) // Sort the hashes for consistent decoding output
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
data = BitArray(byteCount: byteCount) data = BitArray(byteCount: byteCount)
} }
} }
@ -32,6 +37,6 @@ public extension BloomFilter {
private extension BloomFilter { private extension BloomFilter {
func indices(_ member: T) -> [Int] { func indices(_ member: T) -> [Int] {
hashers.map { abs($0.apply(member)) % data.bitCount } hashes.map { abs($0.apply(member)) % data.bitCount }
} }
} }

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
public protocol DeterministicallyHashable { public protocol DeterministicallyHashable {
var hashableData: Data { get } var dataForHashingDeterministically: Data { get }
} }

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
extension Data: DeterministicallyHashable { extension Data: DeterministicallyHashable {
public var hashableData: Data { self } public var dataForHashingDeterministically: Data { self }
} }

View file

@ -3,5 +3,5 @@
import Foundation import Foundation
extension String: DeterministicallyHashable { extension String: DeterministicallyHashable {
public var hashableData: Data { Data(utf8) } public var dataForHashingDeterministically: Data { Data(utf8) }
} }

View file

@ -2,17 +2,17 @@
import Foundation import Foundation
public enum DeterministicHasher: String, Codable { public enum Hash: String, Codable {
case djb2 case djb232
case djb2a case djb2a32
case sdbm case sdbm32
case fnv1 case fnv132
case fnv1a case fnv1a32
} }
extension DeterministicHasher { extension Hash {
func apply(_ hashable: DeterministicallyHashable) -> Int { func apply(_ hashable: DeterministicallyHashable) -> Int {
Int(Array(hashable.hashableData) Int(Array(hashable.dataForHashingDeterministically)
.map(UInt32.init) .map(UInt32.init)
.reduce(offsetBasis, hash)) .reduce(offsetBasis, hash))
} }
@ -21,28 +21,28 @@ extension DeterministicHasher {
// http://www.cse.yorku.ca/~oz/hash.html // http://www.cse.yorku.ca/~oz/hash.html
// http://www.isthe.com/chongo/tech/comp/fnv/ // http://www.isthe.com/chongo/tech/comp/fnv/
private extension DeterministicHasher { private extension Hash {
static let fnvPrime: UInt32 = 16777619 static let fnvPrime: UInt32 = 16777619
var offsetBasis: UInt32 { var offsetBasis: UInt32 {
switch self { switch self {
case .djb2, .djb2a: return 5381 case .djb232, .djb2a32: return 5381
case .sdbm: return 0 case .sdbm32: return 0
case .fnv1, .fnv1a: return 2166136261 case .fnv132, .fnv1a32: return 2166136261
} }
} }
func hash(result: UInt32, next: UInt32) -> UInt32 { func hash(result: UInt32, next: UInt32) -> UInt32 {
switch self { switch self {
case .djb2: case .djb232:
return (result << 5) &+ result &+ next return (result << 5) &+ result &+ next
case .djb2a: case .djb2a32:
return (result << 5) &+ result ^ next return (result << 5) &+ result ^ next
case .sdbm: case .sdbm32:
return next &+ (result << 6) &+ (result << 16) &- result return next &+ (result << 6) &+ (result << 16) &- result
case .fnv1: case .fnv132:
return (result &* Self.fnvPrime) ^ next return (result &* Self.fnvPrime) ^ next
case .fnv1a: case .fnv1a32:
return (result ^ next) &* Self.fnvPrime return (result ^ next) &* Self.fnvPrime
} }
} }

View file

@ -4,17 +4,26 @@
import XCTest import XCTest
final class CodableBloomFilterTests: XCTestCase { final class CodableBloomFilterTests: XCTestCase {
func testHashes() {
func testHashers() { XCTAssertEqual(Hash.djb232.apply("hash"), 2090320585)
XCTAssertEqual(DeterministicHasher.djb2.apply("hash"), 2090320585) XCTAssertEqual(Hash.djb2a32.apply("hash"), 2087809207)
XCTAssertEqual(DeterministicHasher.djb2a.apply("hash"), 2087809207) XCTAssertEqual(Hash.sdbm32.apply("hash"), 385600046)
XCTAssertEqual(DeterministicHasher.sdbm.apply("hash"), 385600046) XCTAssertEqual(Hash.fnv132.apply("hash"), 3616638997)
XCTAssertEqual(DeterministicHasher.fnv1.apply("hash"), 3616638997) XCTAssertEqual(Hash.fnv1a32.apply("hash"), 3469047761)
XCTAssertEqual(DeterministicHasher.fnv1a.apply("hash"), 3469047761)
} }
func testContains() { func noHashesProvided() throws {
var sut = BloomFilter<String>(hashers: [.djb2, .sdbm, .fnv1, .fnv1a], byteCount: 128) XCTAssertThrowsError(try BloomFilter<String>(hashes: [], byteCount: 8)) {
guard case BloomFilterError.noHashesProvided = $0 else {
XCTFail("Expected no hashers provided error")
return
}
}
}
func testContains() throws {
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")
@ -26,8 +35,8 @@ final class CodableBloomFilterTests: XCTestCase {
} }
func testCoding() throws { func testCoding() throws {
var sut = BloomFilter<String>(hashers: [.sdbm, .djb2], byteCount: 8) var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
let expectedSerialization = Data(#"{"data":"ABAAAAACAJA=","hashers":["djb2","sdbm"]}"#.utf8) let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")
@ -36,15 +45,80 @@ final class CodableBloomFilterTests: XCTestCase {
encoder.outputFormatting = .sortedKeys encoder.outputFormatting = .sortedKeys
let serialization = try encoder.encode(sut) let data = try encoder.encode(sut)
XCTAssertEqual(serialization, expectedSerialization) XCTAssertEqual(data, expectedData)
let decoded = try JSONDecoder().decode(BloomFilter<String>.self, from: serialization) let decoded = try JSONDecoder().decode(BloomFilter<String>.self, from: data)
XCTAssert(decoded.contains("lol")) XCTAssert(decoded.contains("lol"))
XCTAssert(decoded.contains("ok")) XCTAssert(decoded.contains("ok"))
XCTAssertFalse(decoded.contains("wtf")) XCTAssertFalse(decoded.contains("wtf"))
XCTAssertFalse(decoded.contains("no")) XCTAssertFalse(decoded.contains("no"))
} }
func testInvalidHash() throws {
let invalidData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","invalid"]}"#.utf8)
XCTAssertThrowsError(try JSONDecoder().decode(BloomFilter<String>.self, from: invalidData)) {
guard case DecodingError.dataCorrupted = $0 else {
XCTFail("Expected data corrupted error")
return
}
}
}
func testDataEncodingStrategy() throws {
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
let expectedData = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8)
sut.insert("lol")
sut.insert("ok")
let encoder = JSONEncoder()
encoder.outputFormatting = .sortedKeys
encoder.dataEncodingStrategy = .custom { data, encoder in
var container = encoder.singleValueContainer()
try container.encode(data.map { String(format: "%02.2hhx", $0) }.joined())
}
let data = try encoder.encode(sut)
XCTAssertEqual(data, expectedData)
}
func testDataDecodingStrategy() throws {
let data = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8)
let decoder = JSONDecoder()
decoder.dataDecodingStrategy = .custom { decoder in
let container = try decoder.singleValueContainer()
let string = try container.decode(String.self)
var bytes = [UInt8]()
var i = string.startIndex
while i != string.endIndex {
let j = string.index(i, offsetBy: 2)
guard let byte = UInt8(string[i..<j], radix: 16) else {
throw DecodingError.dataCorruptedError(in: container, debugDescription: "Invalid byte")
}
bytes.append(byte)
i = j
}
return Data(bytes)
}
let sut = try decoder.decode(BloomFilter<String>.self, from: data)
XCTAssert(sut.contains("lol"))
XCTAssert(sut.contains("ok"))
XCTAssertFalse(sut.contains("wtf"))
XCTAssertFalse(sut.contains("no"))
}
} }

View file

@ -62,7 +62,7 @@ private extension InstanceFilterService {
static let updatedFilterUserDefaultsKey = "updatedFilter" static let updatedFilterUserDefaultsKey = "updatedFilter"
// Ugly, but baking this into the compiled app instead of loading the data from the bundle is more secure // Ugly, but baking this into the compiled app instead of loading the data from the bundle is more secure
// swiftlint:disable line_length // swiftlint:disable line_length
static let defaultFilterData = #"{"hashers":["djb2","djb2a","fnv1","fnv1a","sdbm"],"data":"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAIAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAgAAAAAQAAAAAABAAACAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAABAAAEAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAIAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAIAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAIAAAQAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAQAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAADAAAAAAAAAAAAA=="}"# static let defaultFilterData = #"{"hashes":["djb232","djb2a32","fnv132","fnv1a32","sdbm32"],"data":"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAIAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAgAAAAAQAAAAAABAAACAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAABAAAEAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAIAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAIAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAIAAAQAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAQAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAADAAAAAAAAAAAAA=="}"#
.data(using: .utf8)! .data(using: .utf8)!
// swiftlint:enable line_length // swiftlint:enable line_length
// swiftlint:disable force_try // swiftlint:disable force_try

View file

@ -29,7 +29,7 @@ class InstanceFilterServiceTests: XCTestCase {
XCTAssertTrue(sut.isFiltered(url: previouslyFilteredInstanceURL)) XCTAssertTrue(sut.isFiltered(url: previouslyFilteredInstanceURL))
XCTAssertFalse(sut.isFiltered(url: newlyFilteredInstanceURL)) XCTAssertFalse(sut.isFiltered(url: newlyFilteredInstanceURL))
var updatedFilter = BloomFilter<String>(hashers: [.djb2, .sdbm], byteCount: 16) var updatedFilter = try BloomFilter<String>(hashes: [.djb232, .sdbm32], byteCount: 16)
updatedFilter.insert("instance.filtered") updatedFilter.insert("instance.filtered")