This commit is contained in:
Justin Mazzocchi 2020-09-05 15:52:22 -07:00
parent 321cea3ccd
commit d0e9b2a1e3
No known key found for this signature in database
GPG key ID: E223E6937AAFB01C
3 changed files with 27 additions and 28 deletions

View file

@ -4,10 +4,10 @@
import Foundation import Foundation
struct Bits { struct BitArray {
let count: Int let count: Int
private var bytes: [UInt8] private var items: [UInt8]
init(count: Int) { init(count: Int) {
self.count = count self.count = count
@ -16,38 +16,38 @@ struct Bits {
byteCount += bitRemainder > 0 ? 1 : 0 byteCount += bitRemainder > 0 ? 1 : 0
bytes = [UInt8](repeating: 0, count: byteCount) items = [UInt8](repeating: 0, count: byteCount)
} }
init(bytes: [UInt8], count: Int) { init(data: Data, count: Int) {
self.bytes = bytes self.items = Array(data)
self.count = count self.count = count
} }
} }
extension Bits { extension BitArray {
var data: Data { Data(bytes) } var data: Data { Data(items) }
subscript(index: Int) -> Bool { subscript(index: Int) -> Bool {
get { get {
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte) let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
return bytes[byteCount] & mask(index: bitPosition) > 0 return items[byteCount] & mask(index: bitPosition) > 0
} }
set { set {
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte) let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
if newValue { if newValue {
bytes[byteCount] |= mask(index: bitPosition) items[byteCount] |= mask(index: bitPosition)
} else { } else {
bytes[byteCount] &= ~mask(index: bitPosition) items[byteCount] &= ~mask(index: bitPosition)
} }
} }
} }
} }
private extension Bits { private extension BitArray {
static let bitsInByte = 8 static let bitsInByte = 8
func mask(index: Int) -> UInt8 { func mask(index: Int) -> UInt8 {

View file

@ -8,14 +8,14 @@ import Foundation
struct BloomFilter { struct BloomFilter {
let hashes: [Hash] let hashes: [Hash]
let bitCount: Int let bits: Int
private var bits: Bits private var data: BitArray
init(hashes: [Hash], bitCount: Int) { init(hashes: [Hash], bits: Int) {
self.hashes = hashes self.hashes = hashes
self.bitCount = bitCount self.bits = bits
bits = Bits(count: bitCount) data = BitArray(count: bits)
} }
} }
@ -27,43 +27,42 @@ extension BloomFilter {
mutating func insert(_ newMember: String) { mutating func insert(_ newMember: String) {
for index in indices(newMember) { for index in indices(newMember) {
bits[index] = true data[index] = true
} }
} }
func contains(_ member: String) -> Bool { func contains(_ member: String) -> Bool {
indices(member).map { bits[$0] }.allSatisfy { $0 } indices(member).map { data[$0] }.allSatisfy { $0 }
} }
} }
extension BloomFilter: Codable { extension BloomFilter: Codable {
enum CodingKeys: String, CodingKey { enum CodingKeys: String, CodingKey {
case hashes case hashes
case bitCount case bits
case data case data
} }
init(from decoder: Decoder) throws { init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self) let container = try decoder.container(keyedBy: CodingKeys.self)
let data = try container.decode(Data.self, forKey: .data)
hashes = try container.decode([Hash].self, forKey: .hashes) hashes = try container.decode([Hash].self, forKey: .hashes)
bitCount = try container.decode(Int.self, forKey: .bitCount) bits = try container.decode(Int.self, forKey: .bits)
bits = Bits(bytes: Array(data), count: bitCount) data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
} }
func encode(to encoder: Encoder) throws { func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self) var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(hashes, forKey: .hashes) try container.encode(hashes, forKey: .hashes)
try container.encode(bitCount, forKey: .bitCount) try container.encode(bits, forKey: .bits)
try container.encode(bits.data, forKey: .data) try container.encode(data.data, forKey: .data)
} }
} }
private extension BloomFilter { private extension BloomFilter {
func indices(_ string: String) -> [Int] { func indices(_ string: String) -> [Int] {
hashes.map { abs($0.apply(string)) % bitCount } hashes.map { abs($0.apply(string)) % bits }
} }
} }

View file

@ -3,7 +3,7 @@ import XCTest
final class CodableBloomFilterTests: XCTestCase { final class CodableBloomFilterTests: XCTestCase {
func testContains() { func testContains() {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 1024) var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")
@ -15,8 +15,8 @@ final class CodableBloomFilterTests: XCTestCase {
} }
func testCoding() throws { func testCoding() throws {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 64) var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64)
let expectedSerialization = Data(#"{"bitCount":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")