From 321cea3ccd32b9c3b19f444792f0bfa83868d143 Mon Sep 17 00:00:00 2001 From: Justin Mazzocchi <2831158+jzzocc@users.noreply.github.com> Date: Sat, 5 Sep 2020 15:21:22 -0700 Subject: [PATCH] Make bloom filter codable --- .../.gitignore | 0 CodableBloomFilter/Package.swift | 21 +++++ .../Sources/CodableBloomFilter}/Bits.swift | 0 .../CodableBloomFilter/BloomFilter.swift | 92 +++++++++++++++++++ .../CodableBloomFilterTests.swift | 39 ++++++++ Metatext.xcodeproj/project.pbxproj | 4 +- SerializableBloomFilter/Package.swift | 25 ----- .../SerializableBloomFilter.swift | 56 ----------- .../SerializableBloomFilterTests.swift | 31 ------- 9 files changed, 154 insertions(+), 114 deletions(-) rename {SerializableBloomFilter => CodableBloomFilter}/.gitignore (100%) create mode 100644 CodableBloomFilter/Package.swift rename {SerializableBloomFilter/Sources/SerializableBloomFilter => CodableBloomFilter/Sources/CodableBloomFilter}/Bits.swift (100%) create mode 100644 CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift create mode 100644 CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift delete mode 100644 SerializableBloomFilter/Package.swift delete mode 100644 SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift delete mode 100644 SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift diff --git a/SerializableBloomFilter/.gitignore b/CodableBloomFilter/.gitignore similarity index 100% rename from SerializableBloomFilter/.gitignore rename to CodableBloomFilter/.gitignore diff --git a/CodableBloomFilter/Package.swift b/CodableBloomFilter/Package.swift new file mode 100644 index 0000000..50c8023 --- /dev/null +++ b/CodableBloomFilter/Package.swift @@ -0,0 +1,21 @@ +// swift-tools-version:5.3 + +import PackageDescription + +let package = Package( + name: "CodableBloomFilter", + products: [ + .library( + name: "CodableBloomFilter", + targets: ["CodableBloomFilter"]) + ], + dependencies: [], + targets: [ + .target( + name: "CodableBloomFilter", + dependencies: []), + .testTarget( + name: "CodableBloomFilterTests", + dependencies: ["CodableBloomFilter"]) + ] +) diff --git a/SerializableBloomFilter/Sources/SerializableBloomFilter/Bits.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Bits.swift similarity index 100% rename from SerializableBloomFilter/Sources/SerializableBloomFilter/Bits.swift rename to CodableBloomFilter/Sources/CodableBloomFilter/Bits.swift diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift new file mode 100644 index 0000000..05852e3 --- /dev/null +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift @@ -0,0 +1,92 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +// https://en.wikipedia.org/wiki/Bloom_filter +// https://khanlou.com/2018/09/bloom-filters/ +// This implementation uses deterministic hashing functions so it can be serialized / deserialized + +struct BloomFilter { + let hashes: [Hash] + let bitCount: Int + + private var bits: Bits + + init(hashes: [Hash], bitCount: Int) { + self.hashes = hashes + self.bitCount = bitCount + bits = Bits(count: bitCount) + } +} + +extension BloomFilter { + enum Hash: String, Codable { + case djb2 + case sdbm + } + + mutating func insert(_ newMember: String) { + for index in indices(newMember) { + bits[index] = true + } + } + + func contains(_ member: String) -> Bool { + indices(member).map { bits[$0] }.allSatisfy { $0 } + } +} + +extension BloomFilter: Codable { + enum CodingKeys: String, CodingKey { + case hashes + case bitCount + case data + } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let data = try container.decode(Data.self, forKey: .data) + + hashes = try container.decode([Hash].self, forKey: .hashes) + bitCount = try container.decode(Int.self, forKey: .bitCount) + bits = Bits(bytes: Array(data), count: bitCount) + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + + try container.encode(hashes, forKey: .hashes) + try container.encode(bitCount, forKey: .bitCount) + try container.encode(bits.data, forKey: .data) + } +} + +private extension BloomFilter { + func indices(_ string: String) -> [Int] { + hashes.map { abs($0.apply(string)) % bitCount } + } +} + +// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 + +private extension BloomFilter.Hash { + func apply(_ string: String) -> Int { + string.unicodeScalars.map(\.value).reduce(initial, then) + } + + var initial: Int { + switch self { + case .djb2: return 5381 + case .sdbm: return 0 + } + } + + func then(result: Int, next: UInt32) -> Int { + switch self { + case .djb2: + return (result << 5) &+ result &+ Int(next) + case .sdbm: + return Int(next) &+ (result << 6) &+ (result << 16) - result + } + } +} diff --git a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift new file mode 100644 index 0000000..956eb5a --- /dev/null +++ b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift @@ -0,0 +1,39 @@ +@testable import CodableBloomFilter +import XCTest + +final class CodableBloomFilterTests: XCTestCase { + func testContains() { + var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 1024) + + sut.insert("lol") + sut.insert("ok") + + XCTAssert(sut.contains("lol")) + XCTAssert(sut.contains("ok")) + XCTAssertFalse(sut.contains("wtf")) + XCTAssertFalse(sut.contains("no")) + } + + func testCoding() throws { + var sut = BloomFilter(hashes: [.djb2, .sdbm], bitCount: 64) + let expectedSerialization = Data(#"{"bitCount":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) + + sut.insert("lol") + sut.insert("ok") + + let encoder = JSONEncoder() + + encoder.outputFormatting = .sortedKeys + + let serialization = try encoder.encode(sut) + + XCTAssertEqual(serialization, expectedSerialization) + + let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization) + + XCTAssert(decoded.contains("lol")) + XCTAssert(decoded.contains("ok")) + XCTAssertFalse(decoded.contains("wtf")) + XCTAssertFalse(decoded.contains("no")) + } +} diff --git a/Metatext.xcodeproj/project.pbxproj b/Metatext.xcodeproj/project.pbxproj index de057ea..4cc0dc6 100644 --- a/Metatext.xcodeproj/project.pbxproj +++ b/Metatext.xcodeproj/project.pbxproj @@ -86,7 +86,6 @@ D047FA8C24C3E21200AF17C5 /* Metatext.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Metatext.app; sourceTree = BUILT_PRODUCTS_DIR; }; D0666A2124C677B400F3F04B /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; D0666A2524C677B400F3F04B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; - D07E164425037264008B10D0 /* SerializableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = SerializableBloomFilter; sourceTree = ""; }; D085C3BB25008DEC008A6C5E /* DB */ = {isa = PBXFileReference; lastKnownFileType = folder; path = DB; sourceTree = ""; }; D0BDF66524FD7A6400C7FA1C /* ServiceLayer */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ServiceLayer; sourceTree = ""; }; D0BEB1F224F8EE8C001B0F04 /* AttachmentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AttachmentView.swift; sourceTree = ""; }; @@ -122,6 +121,7 @@ D0C7D46E24F76169001EBDBB /* KingfisherOptionsInfo+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "KingfisherOptionsInfo+Extensions.swift"; sourceTree = ""; }; D0C7D46F24F76169001EBDBB /* View+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "View+Extensions.swift"; sourceTree = ""; }; D0C7D47124F76169001EBDBB /* Data+Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Data+Extensions.swift"; sourceTree = ""; }; + D0D7C013250440610039AD6F /* CodableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CodableBloomFilter; sourceTree = ""; }; D0E0F1E424FC49FC002C04BF /* Mastodon */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Mastodon; sourceTree = ""; }; D0E2C1CF24FD8BA400854680 /* ViewModels */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ViewModels; sourceTree = ""; }; D0E5361924E3EB4D00FB1CE1 /* Notification Service Extension.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "Notification Service Extension.appex"; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -182,8 +182,8 @@ D047FA7F24C3E21000AF17C5 = { isa = PBXGroup; children = ( - D07E164425037264008B10D0 /* SerializableBloomFilter */, D0C7D45224F76169001EBDBB /* Assets.xcassets */, + D0D7C013250440610039AD6F /* CodableBloomFilter */, D085C3BB25008DEC008A6C5E /* DB */, D0C7D46824F76169001EBDBB /* Extensions */, D0666A7924C7745A00F3F04B /* Frameworks */, diff --git a/SerializableBloomFilter/Package.swift b/SerializableBloomFilter/Package.swift deleted file mode 100644 index 4625cf3..0000000 --- a/SerializableBloomFilter/Package.swift +++ /dev/null @@ -1,25 +0,0 @@ -// swift-tools-version:5.3 - -import PackageDescription - -let package = Package( - name: "SerializableBloomFilter", - platforms: [ - .iOS(.v14), - .macOS(.v11) - ], - products: [ - .library( - name: "SerializableBloomFilter", - targets: ["SerializableBloomFilter"]) - ], - dependencies: [], - targets: [ - .target( - name: "SerializableBloomFilter", - dependencies: []), - .testTarget( - name: "SerializableBloomFilterTests", - dependencies: ["SerializableBloomFilter"]) - ] -) diff --git a/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift b/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift deleted file mode 100644 index dcc681b..0000000 --- a/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright © 2020 Metabolist. All rights reserved. - -import Foundation - -// https://en.wikipedia.org/wiki/Bloom_filter -// https://khanlou.com/2018/09/bloom-filters/ -// This implementation uses deterministic hashing functions so it can be serialized / deserialized - -struct SerializableBloomFilter { - private var items: Bits - - init() { - items = Bits(count: Self.itemCount) - } - - init(serialization: Data) throws { - items = Bits(bytes: Array(serialization), count: Self.itemCount) - } -} - -extension SerializableBloomFilter { - var serialization: Data { items.data } - - mutating func insert(_ newMember: String) { - for index in Self.indices(newMember) { - items[index] = true - } - } - - func contains(_ member: String) -> Bool { - Self.indices(member).map { items[$0] }.allSatisfy { $0 } - } -} - -private extension SerializableBloomFilter { - static let itemCount = 1024 - static let hashFunctions = [djb2, sdbm] - - static func indices(_ string: String) -> [Int] { - hashFunctions.map { abs($0(string)) % itemCount } - } -} - -// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 - -private func djb2(_ string: String) -> Int { - string.unicodeScalars.map(\.value).reduce(5381) { - ($0 << 5) &+ $0 &+ Int($1) - } -} - -private func sdbm(_ string: String) -> Int { - string.unicodeScalars.map(\.value).reduce(0) { - Int($1) &+ ($0 << 6) &+ ($0 << 16) - $0 - } -} diff --git a/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift b/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift deleted file mode 100644 index 976c7f9..0000000 --- a/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift +++ /dev/null @@ -1,31 +0,0 @@ -@testable import SerializableBloomFilter -import XCTest - -final class SerializableBloomFilterTests: XCTestCase { - func testContains() { - var filter = SerializableBloomFilter() - - filter.insert("lol") - filter.insert("ok") - - XCTAssert(filter.contains("lol")) - XCTAssert(filter.contains("ok")) - XCTAssertFalse(filter.contains("wtf")) - XCTAssertFalse(filter.contains("no")) - } - - func testSerialization() throws { - var filter = SerializableBloomFilter() - - filter.insert("lol") - filter.insert("ok") - - let serialization = filter.serialization - let deserializedFilter = try SerializableBloomFilter(serialization: serialization) - - XCTAssert(deserializedFilter.contains("lol")) - XCTAssert(filter.contains("ok")) - XCTAssertFalse(deserializedFilter.contains("wtf")) - XCTAssertFalse(filter.contains("no")) - } -}