This commit is contained in:
Justin Mazzocchi 2020-09-05 15:52:22 -07:00
parent 321cea3ccd
commit d0e9b2a1e3
No known key found for this signature in database
GPG key ID: E223E6937AAFB01C
3 changed files with 27 additions and 28 deletions

View file

@ -4,10 +4,10 @@
import Foundation
struct Bits {
struct BitArray {
let count: Int
private var bytes: [UInt8]
private var items: [UInt8]
init(count: Int) {
self.count = count
@ -16,38 +16,38 @@ struct Bits {
byteCount += bitRemainder > 0 ? 1 : 0
bytes = [UInt8](repeating: 0, count: byteCount)
items = [UInt8](repeating: 0, count: byteCount)
}
init(bytes: [UInt8], count: Int) {
self.bytes = bytes
init(data: Data, count: Int) {
self.items = Array(data)
self.count = count
}
}
extension Bits {
var data: Data { Data(bytes) }
extension BitArray {
var data: Data { Data(items) }
subscript(index: Int) -> Bool {
get {
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
return bytes[byteCount] & mask(index: bitPosition) > 0
return items[byteCount] & mask(index: bitPosition) > 0
}
set {
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
if newValue {
bytes[byteCount] |= mask(index: bitPosition)
items[byteCount] |= mask(index: bitPosition)
} else {
bytes[byteCount] &= ~mask(index: bitPosition)
items[byteCount] &= ~mask(index: bitPosition)
}
}
}
}
private extension Bits {
private extension BitArray {
static let bitsInByte = 8
func mask(index: Int) -> UInt8 {

View file

@ -8,14 +8,14 @@ import Foundation
struct BloomFilter {
let hashes: [Hash]
let bitCount: Int
let bits: Int
private var bits: Bits
private var data: BitArray
init(hashes: [Hash], bitCount: Int) {
init(hashes: [Hash], bits: Int) {
self.hashes = hashes
self.bitCount = bitCount
bits = Bits(count: bitCount)
self.bits = bits
data = BitArray(count: bits)
}
}
@ -27,43 +27,42 @@ extension BloomFilter {
mutating func insert(_ newMember: String) {
for index in indices(newMember) {
bits[index] = true
data[index] = true
}
}
func contains(_ member: String) -> Bool {
indices(member).map { bits[$0] }.allSatisfy { $0 }
indices(member).map { data[$0] }.allSatisfy { $0 }
}
}
extension BloomFilter: Codable {
enum CodingKeys: String, CodingKey {
case hashes
case bitCount
case bits
case data
}
init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
let data = try container.decode(Data.self, forKey: .data)
hashes = try container.decode([Hash].self, forKey: .hashes)
bitCount = try container.decode(Int.self, forKey: .bitCount)
bits = Bits(bytes: Array(data), count: bitCount)
bits = try container.decode(Int.self, forKey: .bits)
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
}
func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(hashes, forKey: .hashes)
try container.encode(bitCount, forKey: .bitCount)
try container.encode(bits.data, forKey: .data)
try container.encode(bits, forKey: .bits)
try container.encode(data.data, forKey: .data)
}
}
private extension BloomFilter {
func indices(_ string: String) -> [Int] {
hashes.map { abs($0.apply(string)) % bitCount }
hashes.map { abs($0.apply(string)) % bits }
}
}

View file

@ -3,7 +3,7 @@ import XCTest
final class CodableBloomFilterTests: XCTestCase {
func testContains() {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 1024)
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024)
sut.insert("lol")
sut.insert("ok")
@ -15,8 +15,8 @@ final class CodableBloomFilterTests: XCTestCase {
}
func testCoding() throws {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 64)
let expectedSerialization = Data(#"{"bitCount":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64)
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
sut.insert("lol")
sut.insert("ok")