From f02b1e033a0f1a0049148b5e4e6e4d4125d57311 Mon Sep 17 00:00:00 2001 From: Justin Mazzocchi <2831158+jzzocc@users.noreply.github.com> Date: Tue, 8 Sep 2020 02:07:15 -0700 Subject: [PATCH] Bloom filter data property and initialization --- .../Sources/CodableBloomFilter/BitArray.swift | 6 ++++-- .../CodableBloomFilter/BloomFilter.swift | 21 ++++++++++++++----- .../CodableBloomFilterTests.swift | 18 ++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift index 1c1cdf9..f0cc651 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift @@ -7,14 +7,16 @@ import Foundation struct BitArray { private var bytes: [UInt8] - init(byteCount: Int) { - bytes = [UInt8](repeating: 0, count: byteCount) + init(data: Data) { + bytes = Array(data) } } extension BitArray { var bitCount: Int { bytes.count * Self.bitsInByte } + var data: Data { Data(bytes) } + subscript(index: Int) -> Bool { get { let (byteIndex, bitIndex) = Self.byteAndBitIndices(index: index) diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift index 3b1d3b7..1497c4d 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift @@ -11,32 +11,43 @@ enum BloomFilterError: Error { } public struct BloomFilter: Codable { + enum CodingKeys: String, CodingKey { + case hashes + case bits = "data" + } + public let hashes: [Hash] - private var data: BitArray + private var bits: BitArray public init(hashes: Set, byteCount: Int) throws { + try self.init(hashes: hashes, data: Data(repeating: 0, count: byteCount)) + } + + public init(hashes: Set, data: Data) throws { guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided } // Sort the hashes for consistent decoding output self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue }) - data = BitArray(byteCount: byteCount) + bits = BitArray(data: data) } } public extension BloomFilter { + var data: Data { bits.data } + mutating func insert(_ newMember: T) { for index in indices(newMember) { - data[index] = true + bits[index] = true } } func contains(_ member: T) -> Bool { - indices(member).allSatisfy { data[$0] } + indices(member).allSatisfy { bits[$0] } } } private extension BloomFilter { func indices(_ member: T) -> [Int] { - hashes.map { abs($0.apply(member)) % data.bitCount } + hashes.map { abs($0.apply(member)) % bits.bitCount } } } diff --git a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift index 5e68479..e7af542 100644 --- a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift +++ b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift @@ -34,6 +34,24 @@ final class CodableBloomFilterTests: XCTestCase { XCTAssertFalse(sut.contains("no")) } + func testData() throws { + var sut = try BloomFilter(hashes: [.sdbm32, .djb232], byteCount: 8) + + sut.insert("lol") + sut.insert("ok") + + XCTAssertEqual(sut.data, Data([0, 16, 0, 0, 0, 2, 0, 144])) + } + + func testFromData() throws { + let sut = try BloomFilter(hashes: [.sdbm32, .djb232], data: Data([0, 16, 0, 0, 0, 2, 0, 144])) + + XCTAssert(sut.contains("lol")) + XCTAssert(sut.contains("ok")) + XCTAssertFalse(sut.contains("wtf")) + XCTAssertFalse(sut.contains("no")) + } + func testCoding() throws { var sut = try BloomFilter(hashes: [.sdbm32, .djb232], byteCount: 8) let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)