mirror of
https://github.com/metabolist/metatext.git
synced 2024-11-25 09:41:00 +00:00
Bloom filter data property and initialization
This commit is contained in:
parent
6e0dcd6398
commit
f02b1e033a
3 changed files with 38 additions and 7 deletions
|
@ -7,14 +7,16 @@ import Foundation
|
||||||
struct BitArray {
|
struct BitArray {
|
||||||
private var bytes: [UInt8]
|
private var bytes: [UInt8]
|
||||||
|
|
||||||
init(byteCount: Int) {
|
init(data: Data) {
|
||||||
bytes = [UInt8](repeating: 0, count: byteCount)
|
bytes = Array(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extension BitArray {
|
extension BitArray {
|
||||||
var bitCount: Int { bytes.count * Self.bitsInByte }
|
var bitCount: Int { bytes.count * Self.bitsInByte }
|
||||||
|
|
||||||
|
var data: Data { Data(bytes) }
|
||||||
|
|
||||||
subscript(index: Int) -> Bool {
|
subscript(index: Int) -> Bool {
|
||||||
get {
|
get {
|
||||||
let (byteIndex, bitIndex) = Self.byteAndBitIndices(index: index)
|
let (byteIndex, bitIndex) = Self.byteAndBitIndices(index: index)
|
||||||
|
|
|
@ -11,32 +11,43 @@ enum BloomFilterError: Error {
|
||||||
}
|
}
|
||||||
|
|
||||||
public struct BloomFilter<T: DeterministicallyHashable>: Codable {
|
public struct BloomFilter<T: DeterministicallyHashable>: Codable {
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case hashes
|
||||||
|
case bits = "data"
|
||||||
|
}
|
||||||
|
|
||||||
public let hashes: [Hash]
|
public let hashes: [Hash]
|
||||||
|
|
||||||
private var data: BitArray
|
private var bits: BitArray
|
||||||
|
|
||||||
public init(hashes: Set<Hash>, byteCount: Int) throws {
|
public init(hashes: Set<Hash>, byteCount: Int) throws {
|
||||||
|
try self.init(hashes: hashes, data: Data(repeating: 0, count: byteCount))
|
||||||
|
}
|
||||||
|
|
||||||
|
public init(hashes: Set<Hash>, data: Data) throws {
|
||||||
guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided }
|
guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided }
|
||||||
// Sort the hashes for consistent decoding output
|
// Sort the hashes for consistent decoding output
|
||||||
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
|
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
|
||||||
data = BitArray(byteCount: byteCount)
|
bits = BitArray(data: data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public extension BloomFilter {
|
public extension BloomFilter {
|
||||||
|
var data: Data { bits.data }
|
||||||
|
|
||||||
mutating func insert(_ newMember: T) {
|
mutating func insert(_ newMember: T) {
|
||||||
for index in indices(newMember) {
|
for index in indices(newMember) {
|
||||||
data[index] = true
|
bits[index] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func contains(_ member: T) -> Bool {
|
func contains(_ member: T) -> Bool {
|
||||||
indices(member).allSatisfy { data[$0] }
|
indices(member).allSatisfy { bits[$0] }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private extension BloomFilter {
|
private extension BloomFilter {
|
||||||
func indices(_ member: T) -> [Int] {
|
func indices(_ member: T) -> [Int] {
|
||||||
hashes.map { abs($0.apply(member)) % data.bitCount }
|
hashes.map { abs($0.apply(member)) % bits.bitCount }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,24 @@ final class CodableBloomFilterTests: XCTestCase {
|
||||||
XCTAssertFalse(sut.contains("no"))
|
XCTAssertFalse(sut.contains("no"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func testData() throws {
|
||||||
|
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
||||||
|
|
||||||
|
sut.insert("lol")
|
||||||
|
sut.insert("ok")
|
||||||
|
|
||||||
|
XCTAssertEqual(sut.data, Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
||||||
|
}
|
||||||
|
|
||||||
|
func testFromData() throws {
|
||||||
|
let sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], data: Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
||||||
|
|
||||||
|
XCTAssert(sut.contains("lol"))
|
||||||
|
XCTAssert(sut.contains("ok"))
|
||||||
|
XCTAssertFalse(sut.contains("wtf"))
|
||||||
|
XCTAssertFalse(sut.contains("no"))
|
||||||
|
}
|
||||||
|
|
||||||
func testCoding() throws {
|
func testCoding() throws {
|
||||||
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
||||||
let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)
|
let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)
|
||||||
|
|
Loading…
Reference in a new issue