mirror of
https://github.com/metabolist/metatext.git
synced 2025-01-13 07:15:24 +00:00
Bloom filter data property and initialization
This commit is contained in:
parent
6e0dcd6398
commit
f02b1e033a
3 changed files with 38 additions and 7 deletions
|
@ -7,14 +7,16 @@ import Foundation
|
|||
struct BitArray {
|
||||
private var bytes: [UInt8]
|
||||
|
||||
init(byteCount: Int) {
|
||||
bytes = [UInt8](repeating: 0, count: byteCount)
|
||||
init(data: Data) {
|
||||
bytes = Array(data)
|
||||
}
|
||||
}
|
||||
|
||||
extension BitArray {
|
||||
var bitCount: Int { bytes.count * Self.bitsInByte }
|
||||
|
||||
var data: Data { Data(bytes) }
|
||||
|
||||
subscript(index: Int) -> Bool {
|
||||
get {
|
||||
let (byteIndex, bitIndex) = Self.byteAndBitIndices(index: index)
|
||||
|
|
|
@ -11,32 +11,43 @@ enum BloomFilterError: Error {
|
|||
}
|
||||
|
||||
public struct BloomFilter<T: DeterministicallyHashable>: Codable {
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case hashes
|
||||
case bits = "data"
|
||||
}
|
||||
|
||||
public let hashes: [Hash]
|
||||
|
||||
private var data: BitArray
|
||||
private var bits: BitArray
|
||||
|
||||
public init(hashes: Set<Hash>, byteCount: Int) throws {
|
||||
try self.init(hashes: hashes, data: Data(repeating: 0, count: byteCount))
|
||||
}
|
||||
|
||||
public init(hashes: Set<Hash>, data: Data) throws {
|
||||
guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided }
|
||||
// Sort the hashes for consistent decoding output
|
||||
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
|
||||
data = BitArray(byteCount: byteCount)
|
||||
bits = BitArray(data: data)
|
||||
}
|
||||
}
|
||||
|
||||
public extension BloomFilter {
|
||||
var data: Data { bits.data }
|
||||
|
||||
mutating func insert(_ newMember: T) {
|
||||
for index in indices(newMember) {
|
||||
data[index] = true
|
||||
bits[index] = true
|
||||
}
|
||||
}
|
||||
|
||||
func contains(_ member: T) -> Bool {
|
||||
indices(member).allSatisfy { data[$0] }
|
||||
indices(member).allSatisfy { bits[$0] }
|
||||
}
|
||||
}
|
||||
|
||||
private extension BloomFilter {
|
||||
func indices(_ member: T) -> [Int] {
|
||||
hashes.map { abs($0.apply(member)) % data.bitCount }
|
||||
hashes.map { abs($0.apply(member)) % bits.bitCount }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,24 @@ final class CodableBloomFilterTests: XCTestCase {
|
|||
XCTAssertFalse(sut.contains("no"))
|
||||
}
|
||||
|
||||
func testData() throws {
|
||||
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
||||
|
||||
sut.insert("lol")
|
||||
sut.insert("ok")
|
||||
|
||||
XCTAssertEqual(sut.data, Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
||||
}
|
||||
|
||||
func testFromData() throws {
|
||||
let sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], data: Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
||||
|
||||
XCTAssert(sut.contains("lol"))
|
||||
XCTAssert(sut.contains("ok"))
|
||||
XCTAssertFalse(sut.contains("wtf"))
|
||||
XCTAssertFalse(sut.contains("no"))
|
||||
}
|
||||
|
||||
func testCoding() throws {
|
||||
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
||||
let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)
|
||||
|
|
Loading…
Reference in a new issue