mirror of
https://github.com/metabolist/metatext.git
synced 2024-11-22 16:21:00 +00:00
Make bloom filter codable
This commit is contained in:
parent
fd71f48a59
commit
321cea3ccd
9 changed files with 154 additions and 114 deletions
21
CodableBloomFilter/Package.swift
Normal file
21
CodableBloomFilter/Package.swift
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
// swift-tools-version:5.3
|
||||||
|
|
||||||
|
import PackageDescription
|
||||||
|
|
||||||
|
let package = Package(
|
||||||
|
name: "CodableBloomFilter",
|
||||||
|
products: [
|
||||||
|
.library(
|
||||||
|
name: "CodableBloomFilter",
|
||||||
|
targets: ["CodableBloomFilter"])
|
||||||
|
],
|
||||||
|
dependencies: [],
|
||||||
|
targets: [
|
||||||
|
.target(
|
||||||
|
name: "CodableBloomFilter",
|
||||||
|
dependencies: []),
|
||||||
|
.testTarget(
|
||||||
|
name: "CodableBloomFilterTests",
|
||||||
|
dependencies: ["CodableBloomFilter"])
|
||||||
|
]
|
||||||
|
)
|
|
@ -0,0 +1,92 @@
|
||||||
|
// Copyright © 2020 Metabolist. All rights reserved.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
// https://en.wikipedia.org/wiki/Bloom_filter
|
||||||
|
// https://khanlou.com/2018/09/bloom-filters/
|
||||||
|
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
||||||
|
|
||||||
|
struct BloomFilter {
|
||||||
|
let hashes: [Hash]
|
||||||
|
let bitCount: Int
|
||||||
|
|
||||||
|
private var bits: Bits
|
||||||
|
|
||||||
|
init(hashes: [Hash], bitCount: Int) {
|
||||||
|
self.hashes = hashes
|
||||||
|
self.bitCount = bitCount
|
||||||
|
bits = Bits(count: bitCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extension BloomFilter {
|
||||||
|
enum Hash: String, Codable {
|
||||||
|
case djb2
|
||||||
|
case sdbm
|
||||||
|
}
|
||||||
|
|
||||||
|
mutating func insert(_ newMember: String) {
|
||||||
|
for index in indices(newMember) {
|
||||||
|
bits[index] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(_ member: String) -> Bool {
|
||||||
|
indices(member).map { bits[$0] }.allSatisfy { $0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extension BloomFilter: Codable {
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case hashes
|
||||||
|
case bitCount
|
||||||
|
case data
|
||||||
|
}
|
||||||
|
|
||||||
|
init(from decoder: Decoder) throws {
|
||||||
|
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||||
|
let data = try container.decode(Data.self, forKey: .data)
|
||||||
|
|
||||||
|
hashes = try container.decode([Hash].self, forKey: .hashes)
|
||||||
|
bitCount = try container.decode(Int.self, forKey: .bitCount)
|
||||||
|
bits = Bits(bytes: Array(data), count: bitCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
func encode(to encoder: Encoder) throws {
|
||||||
|
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||||
|
|
||||||
|
try container.encode(hashes, forKey: .hashes)
|
||||||
|
try container.encode(bitCount, forKey: .bitCount)
|
||||||
|
try container.encode(bits.data, forKey: .data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extension BloomFilter {
|
||||||
|
func indices(_ string: String) -> [Int] {
|
||||||
|
hashes.map { abs($0.apply(string)) % bitCount }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
||||||
|
|
||||||
|
private extension BloomFilter.Hash {
|
||||||
|
func apply(_ string: String) -> Int {
|
||||||
|
string.unicodeScalars.map(\.value).reduce(initial, then)
|
||||||
|
}
|
||||||
|
|
||||||
|
var initial: Int {
|
||||||
|
switch self {
|
||||||
|
case .djb2: return 5381
|
||||||
|
case .sdbm: return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func then(result: Int, next: UInt32) -> Int {
|
||||||
|
switch self {
|
||||||
|
case .djb2:
|
||||||
|
return (result << 5) &+ result &+ Int(next)
|
||||||
|
case .sdbm:
|
||||||
|
return Int(next) &+ (result << 6) &+ (result << 16) - result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
@testable import CodableBloomFilter
|
||||||
|
import XCTest
|
||||||
|
|
||||||
|
final class CodableBloomFilterTests: XCTestCase {
|
||||||
|
func testContains() {
|
||||||
|
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 1024)
|
||||||
|
|
||||||
|
sut.insert("lol")
|
||||||
|
sut.insert("ok")
|
||||||
|
|
||||||
|
XCTAssert(sut.contains("lol"))
|
||||||
|
XCTAssert(sut.contains("ok"))
|
||||||
|
XCTAssertFalse(sut.contains("wtf"))
|
||||||
|
XCTAssertFalse(sut.contains("no"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func testCoding() throws {
|
||||||
|
var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 64)
|
||||||
|
let expectedSerialization = Data(#"{"bitCount":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
|
||||||
|
|
||||||
|
sut.insert("lol")
|
||||||
|
sut.insert("ok")
|
||||||
|
|
||||||
|
let encoder = JSONEncoder()
|
||||||
|
|
||||||
|
encoder.outputFormatting = .sortedKeys
|
||||||
|
|
||||||
|
let serialization = try encoder.encode(sut)
|
||||||
|
|
||||||
|
XCTAssertEqual(serialization, expectedSerialization)
|
||||||
|
|
||||||
|
let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization)
|
||||||
|
|
||||||
|
XCTAssert(decoded.contains("lol"))
|
||||||
|
XCTAssert(decoded.contains("ok"))
|
||||||
|
XCTAssertFalse(decoded.contains("wtf"))
|
||||||
|
XCTAssertFalse(decoded.contains("no"))
|
||||||
|
}
|
||||||
|
}
|
|
@ -86,7 +86,6 @@
|
||||||
D047FA8C24C3E21200AF17C5 /* Metatext.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Metatext.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
D047FA8C24C3E21200AF17C5 /* Metatext.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Metatext.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
D0666A2124C677B400F3F04B /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
D0666A2124C677B400F3F04B /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
D0666A2524C677B400F3F04B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
D0666A2524C677B400F3F04B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||||
D07E164425037264008B10D0 /* SerializableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = SerializableBloomFilter; sourceTree = "<group>"; };
|
|
||||||
D085C3BB25008DEC008A6C5E /* DB */ = {isa = PBXFileReference; lastKnownFileType = folder; path = DB; sourceTree = "<group>"; };
|
D085C3BB25008DEC008A6C5E /* DB */ = {isa = PBXFileReference; lastKnownFileType = folder; path = DB; sourceTree = "<group>"; };
|
||||||
D0BDF66524FD7A6400C7FA1C /* ServiceLayer */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ServiceLayer; sourceTree = "<group>"; };
|
D0BDF66524FD7A6400C7FA1C /* ServiceLayer */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ServiceLayer; sourceTree = "<group>"; };
|
||||||
D0BEB1F224F8EE8C001B0F04 /* AttachmentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AttachmentView.swift; sourceTree = "<group>"; };
|
D0BEB1F224F8EE8C001B0F04 /* AttachmentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AttachmentView.swift; sourceTree = "<group>"; };
|
||||||
|
@ -122,6 +121,7 @@
|
||||||
D0C7D46E24F76169001EBDBB /* KingfisherOptionsInfo+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "KingfisherOptionsInfo+Extensions.swift"; sourceTree = "<group>"; };
|
D0C7D46E24F76169001EBDBB /* KingfisherOptionsInfo+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "KingfisherOptionsInfo+Extensions.swift"; sourceTree = "<group>"; };
|
||||||
D0C7D46F24F76169001EBDBB /* View+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "View+Extensions.swift"; sourceTree = "<group>"; };
|
D0C7D46F24F76169001EBDBB /* View+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "View+Extensions.swift"; sourceTree = "<group>"; };
|
||||||
D0C7D47124F76169001EBDBB /* Data+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Data+Extensions.swift"; sourceTree = "<group>"; };
|
D0C7D47124F76169001EBDBB /* Data+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Data+Extensions.swift"; sourceTree = "<group>"; };
|
||||||
|
D0D7C013250440610039AD6F /* CodableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CodableBloomFilter; sourceTree = "<group>"; };
|
||||||
D0E0F1E424FC49FC002C04BF /* Mastodon */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Mastodon; sourceTree = "<group>"; };
|
D0E0F1E424FC49FC002C04BF /* Mastodon */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Mastodon; sourceTree = "<group>"; };
|
||||||
D0E2C1CF24FD8BA400854680 /* ViewModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ViewModels; sourceTree = "<group>"; };
|
D0E2C1CF24FD8BA400854680 /* ViewModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ViewModels; sourceTree = "<group>"; };
|
||||||
D0E5361924E3EB4D00FB1CE1 /* Notification Service Extension.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "Notification Service Extension.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
|
D0E5361924E3EB4D00FB1CE1 /* Notification Service Extension.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "Notification Service Extension.appex"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
|
@ -182,8 +182,8 @@
|
||||||
D047FA7F24C3E21000AF17C5 = {
|
D047FA7F24C3E21000AF17C5 = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
D07E164425037264008B10D0 /* SerializableBloomFilter */,
|
|
||||||
D0C7D45224F76169001EBDBB /* Assets.xcassets */,
|
D0C7D45224F76169001EBDBB /* Assets.xcassets */,
|
||||||
|
D0D7C013250440610039AD6F /* CodableBloomFilter */,
|
||||||
D085C3BB25008DEC008A6C5E /* DB */,
|
D085C3BB25008DEC008A6C5E /* DB */,
|
||||||
D0C7D46824F76169001EBDBB /* Extensions */,
|
D0C7D46824F76169001EBDBB /* Extensions */,
|
||||||
D0666A7924C7745A00F3F04B /* Frameworks */,
|
D0666A7924C7745A00F3F04B /* Frameworks */,
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
// swift-tools-version:5.3
|
|
||||||
|
|
||||||
import PackageDescription
|
|
||||||
|
|
||||||
let package = Package(
|
|
||||||
name: "SerializableBloomFilter",
|
|
||||||
platforms: [
|
|
||||||
.iOS(.v14),
|
|
||||||
.macOS(.v11)
|
|
||||||
],
|
|
||||||
products: [
|
|
||||||
.library(
|
|
||||||
name: "SerializableBloomFilter",
|
|
||||||
targets: ["SerializableBloomFilter"])
|
|
||||||
],
|
|
||||||
dependencies: [],
|
|
||||||
targets: [
|
|
||||||
.target(
|
|
||||||
name: "SerializableBloomFilter",
|
|
||||||
dependencies: []),
|
|
||||||
.testTarget(
|
|
||||||
name: "SerializableBloomFilterTests",
|
|
||||||
dependencies: ["SerializableBloomFilter"])
|
|
||||||
]
|
|
||||||
)
|
|
|
@ -1,56 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
// https://en.wikipedia.org/wiki/Bloom_filter
|
|
||||||
// https://khanlou.com/2018/09/bloom-filters/
|
|
||||||
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
|
||||||
|
|
||||||
struct SerializableBloomFilter {
|
|
||||||
private var items: Bits
|
|
||||||
|
|
||||||
init() {
|
|
||||||
items = Bits(count: Self.itemCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
init(serialization: Data) throws {
|
|
||||||
items = Bits(bytes: Array(serialization), count: Self.itemCount)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extension SerializableBloomFilter {
|
|
||||||
var serialization: Data { items.data }
|
|
||||||
|
|
||||||
mutating func insert(_ newMember: String) {
|
|
||||||
for index in Self.indices(newMember) {
|
|
||||||
items[index] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func contains(_ member: String) -> Bool {
|
|
||||||
Self.indices(member).map { items[$0] }.allSatisfy { $0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private extension SerializableBloomFilter {
|
|
||||||
static let itemCount = 1024
|
|
||||||
static let hashFunctions = [djb2, sdbm]
|
|
||||||
|
|
||||||
static func indices(_ string: String) -> [Int] {
|
|
||||||
hashFunctions.map { abs($0(string)) % itemCount }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
|
||||||
|
|
||||||
private func djb2(_ string: String) -> Int {
|
|
||||||
string.unicodeScalars.map(\.value).reduce(5381) {
|
|
||||||
($0 << 5) &+ $0 &+ Int($1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private func sdbm(_ string: String) -> Int {
|
|
||||||
string.unicodeScalars.map(\.value).reduce(0) {
|
|
||||||
Int($1) &+ ($0 << 6) &+ ($0 << 16) - $0
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
@testable import SerializableBloomFilter
|
|
||||||
import XCTest
|
|
||||||
|
|
||||||
final class SerializableBloomFilterTests: XCTestCase {
|
|
||||||
func testContains() {
|
|
||||||
var filter = SerializableBloomFilter()
|
|
||||||
|
|
||||||
filter.insert("lol")
|
|
||||||
filter.insert("ok")
|
|
||||||
|
|
||||||
XCTAssert(filter.contains("lol"))
|
|
||||||
XCTAssert(filter.contains("ok"))
|
|
||||||
XCTAssertFalse(filter.contains("wtf"))
|
|
||||||
XCTAssertFalse(filter.contains("no"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func testSerialization() throws {
|
|
||||||
var filter = SerializableBloomFilter()
|
|
||||||
|
|
||||||
filter.insert("lol")
|
|
||||||
filter.insert("ok")
|
|
||||||
|
|
||||||
let serialization = filter.serialization
|
|
||||||
let deserializedFilter = try SerializableBloomFilter(serialization: serialization)
|
|
||||||
|
|
||||||
XCTAssert(deserializedFilter.contains("lol"))
|
|
||||||
XCTAssert(filter.contains("ok"))
|
|
||||||
XCTAssertFalse(deserializedFilter.contains("wtf"))
|
|
||||||
XCTAssertFalse(filter.contains("no"))
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in a new issue