mirror of
https://github.com/metabolist/metatext.git
synced 2024-12-22 21:46:28 +00:00
Implement serializable Bloom filter
This commit is contained in:
parent
4e029b40ac
commit
781be478ba
6 changed files with 186 additions and 0 deletions
|
@ -86,6 +86,7 @@
|
|||
D047FA8C24C3E21200AF17C5 /* Metatext.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Metatext.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
D0666A2124C677B400F3F04B /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
D0666A2524C677B400F3F04B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
D07E164425037264008B10D0 /* SerializableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = SerializableBloomFilter; sourceTree = "<group>"; };
|
||||
D085C3BB25008DEC008A6C5E /* DB */ = {isa = PBXFileReference; lastKnownFileType = folder; path = DB; sourceTree = "<group>"; };
|
||||
D0BDF66524FD7A6400C7FA1C /* ServiceLayer */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ServiceLayer; sourceTree = "<group>"; };
|
||||
D0BEB1F224F8EE8C001B0F04 /* AttachmentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AttachmentView.swift; sourceTree = "<group>"; };
|
||||
|
@ -181,6 +182,7 @@
|
|||
D047FA7F24C3E21000AF17C5 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
D07E164425037264008B10D0 /* SerializableBloomFilter */,
|
||||
D0C7D45224F76169001EBDBB /* Assets.xcassets */,
|
||||
D085C3BB25008DEC008A6C5E /* DB */,
|
||||
D0C7D46824F76169001EBDBB /* Extensions */,
|
||||
|
|
5
SerializableBloomFilter/.gitignore
vendored
Normal file
5
SerializableBloomFilter/.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
.DS_Store
|
||||
/.build
|
||||
/Packages
|
||||
/*.xcodeproj
|
||||
xcuserdata/
|
25
SerializableBloomFilter/Package.swift
Normal file
25
SerializableBloomFilter/Package.swift
Normal file
|
@ -0,0 +1,25 @@
|
|||
// swift-tools-version:5.3
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "SerializableBloomFilter",
|
||||
platforms: [
|
||||
.iOS(.v14),
|
||||
.macOS(.v11)
|
||||
],
|
||||
products: [
|
||||
.library(
|
||||
name: "SerializableBloomFilter",
|
||||
targets: ["SerializableBloomFilter"])
|
||||
],
|
||||
dependencies: [],
|
||||
targets: [
|
||||
.target(
|
||||
name: "SerializableBloomFilter",
|
||||
dependencies: []),
|
||||
.testTarget(
|
||||
name: "SerializableBloomFilterTests",
|
||||
dependencies: ["SerializableBloomFilter"])
|
||||
]
|
||||
)
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright © 2020 Metabolist. All rights reserved.
|
||||
|
||||
// Adapted from https://github.com/dduan/BitArray
|
||||
|
||||
import Foundation
|
||||
|
||||
struct Bits {
|
||||
let count: Int
|
||||
|
||||
private var bytes: [UInt8]
|
||||
|
||||
init(count: Int) {
|
||||
self.count = count
|
||||
|
||||
var (byteCount, bitRemainder) = count.quotientAndRemainder(dividingBy: Self.bitsInByte)
|
||||
|
||||
byteCount += bitRemainder > 0 ? 1 : 0
|
||||
|
||||
bytes = [UInt8](repeating: 0, count: byteCount)
|
||||
}
|
||||
|
||||
init(bytes: [UInt8], count: Int) {
|
||||
self.bytes = bytes
|
||||
self.count = count
|
||||
}
|
||||
}
|
||||
|
||||
extension Bits {
|
||||
var data: Data { Data(bytes) }
|
||||
|
||||
subscript(index: Int) -> Bool {
|
||||
get {
|
||||
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
|
||||
|
||||
return bytes[byteCount] & mask(index: bitPosition) > 0
|
||||
}
|
||||
|
||||
set {
|
||||
let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte)
|
||||
|
||||
if newValue {
|
||||
bytes[byteCount] |= mask(index: bitPosition)
|
||||
} else {
|
||||
bytes[byteCount] &= ~mask(index: bitPosition)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension Bits {
|
||||
static let bitsInByte = 8
|
||||
|
||||
func mask(index: Int) -> UInt8 {
|
||||
switch index {
|
||||
case 0: return 0b00000001
|
||||
case 1: return 0b00000010
|
||||
case 2: return 0b00000100
|
||||
case 3: return 0b00001000
|
||||
case 4: return 0b00010000
|
||||
case 5: return 0b00100000
|
||||
case 6: return 0b01000000
|
||||
case 7: return 0b10000000
|
||||
default:
|
||||
fatalError("Invalid index: \(index)")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright © 2020 Metabolist. All rights reserved.
|
||||
|
||||
import Foundation
|
||||
|
||||
// https://en.wikipedia.org/wiki/Bloom_filter
|
||||
// https://khanlou.com/2018/09/bloom-filters/
|
||||
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
||||
|
||||
struct SerializableBloomFilter {
|
||||
private var items: Bits
|
||||
|
||||
init() {
|
||||
items = Bits(count: Self.itemCount)
|
||||
}
|
||||
|
||||
init(serialization: Data) throws {
|
||||
items = Bits(bytes: Array(serialization), count: Self.itemCount)
|
||||
}
|
||||
}
|
||||
|
||||
extension SerializableBloomFilter {
|
||||
var serialization: Data { items.data }
|
||||
|
||||
mutating func insert(_ newMember: String) {
|
||||
for index in Self.indices(newMember) {
|
||||
items[index] = true
|
||||
}
|
||||
}
|
||||
|
||||
func contains(_ member: String) -> Bool {
|
||||
Self.indices(member).map { items[$0] }.allSatisfy { $0 }
|
||||
}
|
||||
}
|
||||
|
||||
private extension SerializableBloomFilter {
|
||||
static let itemCount = 1024
|
||||
static let hashFunctions = [djb2, sdbm]
|
||||
|
||||
static func indices(_ string: String) -> [Int] {
|
||||
hashFunctions.map { abs($0(string)) % itemCount }
|
||||
}
|
||||
}
|
||||
|
||||
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
||||
|
||||
private func djb2(_ string: String) -> Int {
|
||||
string.unicodeScalars.map(\.value).reduce(5381) {
|
||||
($0 << 5) &+ $0 &+ Int($1)
|
||||
}
|
||||
}
|
||||
|
||||
private func sdbm(_ string: String) -> Int {
|
||||
string.unicodeScalars.map(\.value).reduce(0) {
|
||||
Int($1) &+ ($0 << 6) &+ ($0 << 16) - $0
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
@testable import SerializableBloomFilter
|
||||
import XCTest
|
||||
|
||||
final class SerializableBloomFilterTests: XCTestCase {
|
||||
func testContains() {
|
||||
var filter = SerializableBloomFilter()
|
||||
|
||||
filter.insert("lol")
|
||||
filter.insert("ok")
|
||||
|
||||
XCTAssert(filter.contains("lol"))
|
||||
XCTAssert(filter.contains("ok"))
|
||||
XCTAssertFalse(filter.contains("wtf"))
|
||||
XCTAssertFalse(filter.contains("no"))
|
||||
}
|
||||
|
||||
func testSerialization() throws {
|
||||
var filter = SerializableBloomFilter()
|
||||
|
||||
filter.insert("lol")
|
||||
filter.insert("ok")
|
||||
|
||||
let serialization = filter.serialization
|
||||
let deserializedFilter = try SerializableBloomFilter(serialization: serialization)
|
||||
|
||||
XCTAssert(deserializedFilter.contains("lol"))
|
||||
XCTAssert(filter.contains("ok"))
|
||||
XCTAssertFalse(deserializedFilter.contains("wtf"))
|
||||
XCTAssertFalse(filter.contains("no"))
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue