mirror of
https://github.com/metabolist/metatext.git
synced 2024-12-01 04:20:59 +00:00
Extract CodableBloomFilter package
This commit is contained in:
parent
7327d167c3
commit
a12d2eb17d
12 changed files with 12 additions and 331 deletions
5
CodableBloomFilter/.gitignore
vendored
5
CodableBloomFilter/.gitignore
vendored
|
@ -1,5 +0,0 @@
|
||||||
.DS_Store
|
|
||||||
/.build
|
|
||||||
/Packages
|
|
||||||
/*.xcodeproj
|
|
||||||
xcuserdata/
|
|
|
@ -1,21 +0,0 @@
|
||||||
// swift-tools-version:5.3
|
|
||||||
|
|
||||||
import PackageDescription
|
|
||||||
|
|
||||||
let package = Package(
|
|
||||||
name: "CodableBloomFilter",
|
|
||||||
products: [
|
|
||||||
.library(
|
|
||||||
name: "CodableBloomFilter",
|
|
||||||
targets: ["CodableBloomFilter"])
|
|
||||||
],
|
|
||||||
dependencies: [],
|
|
||||||
targets: [
|
|
||||||
.target(
|
|
||||||
name: "CodableBloomFilter",
|
|
||||||
dependencies: []),
|
|
||||||
.testTarget(
|
|
||||||
name: "CodableBloomFilterTests",
|
|
||||||
dependencies: ["CodableBloomFilter"])
|
|
||||||
]
|
|
||||||
)
|
|
|
@ -1,56 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
struct BitArray {
|
|
||||||
private var bytes: [UInt8]
|
|
||||||
|
|
||||||
init(data: Data) {
|
|
||||||
bytes = Array(data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extension BitArray {
|
|
||||||
var bitCount: Int { bytes.count * UInt8.bitWidth }
|
|
||||||
|
|
||||||
var data: Data { Data(bytes) }
|
|
||||||
|
|
||||||
subscript(index: Int) -> Bool {
|
|
||||||
get {
|
|
||||||
let (byteIndex, mask) = Self.byteIndexAndMask(index: index)
|
|
||||||
|
|
||||||
return bytes[byteIndex] & mask > 0
|
|
||||||
}
|
|
||||||
|
|
||||||
set {
|
|
||||||
let (byteIndex, mask) = Self.byteIndexAndMask(index: index)
|
|
||||||
|
|
||||||
if newValue {
|
|
||||||
bytes[byteIndex] |= mask
|
|
||||||
} else {
|
|
||||||
bytes[byteIndex] &= ~mask
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
extension BitArray: Codable {
|
|
||||||
init(from decoder: Decoder) throws {
|
|
||||||
bytes = Array(try decoder.singleValueContainer().decode(Data.self))
|
|
||||||
}
|
|
||||||
|
|
||||||
func encode(to encoder: Encoder) throws {
|
|
||||||
var container = encoder.singleValueContainer()
|
|
||||||
|
|
||||||
try container.encode(Data(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private extension BitArray {
|
|
||||||
static func byteIndexAndMask(index: Int) -> (Int, UInt8) {
|
|
||||||
let (byteIndex, bitIndex) = index.quotientAndRemainder(dividingBy: UInt8.bitWidth)
|
|
||||||
let mask = UInt8(2 << (bitIndex - 1))
|
|
||||||
|
|
||||||
return (byteIndex, mask)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
public struct BloomFilter<T: DeterministicallyHashable>: Codable {
|
|
||||||
enum CodingKeys: String, CodingKey {
|
|
||||||
case hashes
|
|
||||||
case bits = "data"
|
|
||||||
}
|
|
||||||
|
|
||||||
public let hashes: [Hash]
|
|
||||||
|
|
||||||
private var bits: BitArray
|
|
||||||
|
|
||||||
public init(hashes: Set<Hash>, byteCount: Int) {
|
|
||||||
self.init(hashes: hashes, data: Data(repeating: 0, count: byteCount))
|
|
||||||
}
|
|
||||||
|
|
||||||
public init(hashes: Set<Hash>, data: Data) {
|
|
||||||
// Sort the hashes for consistent decoding output
|
|
||||||
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
|
|
||||||
bits = BitArray(data: data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public extension BloomFilter {
|
|
||||||
var data: Data { bits.data }
|
|
||||||
|
|
||||||
mutating func insert(_ newMember: T) {
|
|
||||||
for index in indices(newMember) {
|
|
||||||
bits[index] = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func contains(_ member: T) -> Bool {
|
|
||||||
indices(member).allSatisfy { bits[$0] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private extension BloomFilter {
|
|
||||||
func indices(_ member: T) -> [Int] {
|
|
||||||
hashes.map { abs($0.apply(member)) % bits.bitCount }
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
public protocol DeterministicallyHashable {
|
|
||||||
var dataForHashingDeterministically: Data { get }
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
extension Data: DeterministicallyHashable {
|
|
||||||
public var dataForHashingDeterministically: Data { self }
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
extension String: DeterministicallyHashable {
|
|
||||||
public var dataForHashingDeterministically: Data { Data(utf8) }
|
|
||||||
}
|
|
|
@ -1,49 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
public enum Hash: String, Codable {
|
|
||||||
case djb232
|
|
||||||
case djb2a32
|
|
||||||
case sdbm32
|
|
||||||
case fnv132
|
|
||||||
case fnv1a32
|
|
||||||
}
|
|
||||||
|
|
||||||
extension Hash {
|
|
||||||
func apply(_ hashable: DeterministicallyHashable) -> Int {
|
|
||||||
Int(Array(hashable.dataForHashingDeterministically)
|
|
||||||
.map(UInt32.init)
|
|
||||||
.reduce(offsetBasis, hash))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// http://www.cse.yorku.ca/~oz/hash.html
|
|
||||||
// http://www.isthe.com/chongo/tech/comp/fnv/
|
|
||||||
|
|
||||||
private extension Hash {
|
|
||||||
static let fnvPrime: UInt32 = 16777619
|
|
||||||
|
|
||||||
var offsetBasis: UInt32 {
|
|
||||||
switch self {
|
|
||||||
case .djb232, .djb2a32: return 5381
|
|
||||||
case .sdbm32: return 0
|
|
||||||
case .fnv132, .fnv1a32: return 2166136261
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func hash(result: UInt32, next: UInt32) -> UInt32 {
|
|
||||||
switch self {
|
|
||||||
case .djb232:
|
|
||||||
return (result << 5) &+ result &+ next
|
|
||||||
case .djb2a32:
|
|
||||||
return (result << 5) &+ result ^ next
|
|
||||||
case .sdbm32:
|
|
||||||
return next &+ (result << 6) &+ (result << 16) &- result
|
|
||||||
case .fnv132:
|
|
||||||
return (result &* Self.fnvPrime) ^ next
|
|
||||||
case .fnv1a32:
|
|
||||||
return (result ^ next) &* Self.fnvPrime
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,132 +0,0 @@
|
||||||
// Copyright © 2020 Metabolist. All rights reserved.
|
|
||||||
|
|
||||||
@testable import CodableBloomFilter
|
|
||||||
import XCTest
|
|
||||||
|
|
||||||
final class CodableBloomFilterTests: XCTestCase {
|
|
||||||
func testHashes() {
|
|
||||||
XCTAssertEqual(Hash.djb232.apply("hash"), 2090320585)
|
|
||||||
XCTAssertEqual(Hash.djb2a32.apply("hash"), 2087809207)
|
|
||||||
XCTAssertEqual(Hash.sdbm32.apply("hash"), 385600046)
|
|
||||||
XCTAssertEqual(Hash.fnv132.apply("hash"), 3616638997)
|
|
||||||
XCTAssertEqual(Hash.fnv1a32.apply("hash"), 3469047761)
|
|
||||||
}
|
|
||||||
|
|
||||||
func testContains() throws {
|
|
||||||
var sut = BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
|
||||||
|
|
||||||
sut.insert("lol")
|
|
||||||
sut.insert("ok")
|
|
||||||
|
|
||||||
XCTAssert(sut.contains("lol"))
|
|
||||||
XCTAssert(sut.contains("ok"))
|
|
||||||
XCTAssertFalse(sut.contains("wtf"))
|
|
||||||
XCTAssertFalse(sut.contains("no"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func testData() throws {
|
|
||||||
var sut = BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
|
||||||
|
|
||||||
sut.insert("lol")
|
|
||||||
sut.insert("ok")
|
|
||||||
|
|
||||||
XCTAssertEqual(sut.data, Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
|
||||||
}
|
|
||||||
|
|
||||||
func testFromData() throws {
|
|
||||||
let sut = BloomFilter<String>(hashes: [.sdbm32, .djb232], data: Data([0, 16, 0, 0, 0, 2, 0, 144]))
|
|
||||||
|
|
||||||
XCTAssert(sut.contains("lol"))
|
|
||||||
XCTAssert(sut.contains("ok"))
|
|
||||||
XCTAssertFalse(sut.contains("wtf"))
|
|
||||||
XCTAssertFalse(sut.contains("no"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func testCoding() throws {
|
|
||||||
var sut = BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
|
||||||
let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)
|
|
||||||
|
|
||||||
sut.insert("lol")
|
|
||||||
sut.insert("ok")
|
|
||||||
|
|
||||||
let encoder = JSONEncoder()
|
|
||||||
|
|
||||||
encoder.outputFormatting = .sortedKeys
|
|
||||||
|
|
||||||
let data = try encoder.encode(sut)
|
|
||||||
|
|
||||||
XCTAssertEqual(data, expectedData)
|
|
||||||
|
|
||||||
let decoded = try JSONDecoder().decode(BloomFilter<String>.self, from: data)
|
|
||||||
|
|
||||||
XCTAssert(decoded.contains("lol"))
|
|
||||||
XCTAssert(decoded.contains("ok"))
|
|
||||||
XCTAssertFalse(decoded.contains("wtf"))
|
|
||||||
XCTAssertFalse(decoded.contains("no"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func testInvalidHash() throws {
|
|
||||||
let invalidData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","invalid"]}"#.utf8)
|
|
||||||
|
|
||||||
XCTAssertThrowsError(try JSONDecoder().decode(BloomFilter<String>.self, from: invalidData)) {
|
|
||||||
guard case DecodingError.dataCorrupted = $0 else {
|
|
||||||
XCTFail("Expected data corrupted error")
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func testDataEncodingStrategy() throws {
|
|
||||||
var sut = BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
|
|
||||||
let expectedData = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8)
|
|
||||||
|
|
||||||
sut.insert("lol")
|
|
||||||
sut.insert("ok")
|
|
||||||
|
|
||||||
let encoder = JSONEncoder()
|
|
||||||
|
|
||||||
encoder.outputFormatting = .sortedKeys
|
|
||||||
encoder.dataEncodingStrategy = .custom { data, encoder in
|
|
||||||
var container = encoder.singleValueContainer()
|
|
||||||
|
|
||||||
try container.encode(data.map { String(format: "%02.2hhx", $0) }.joined())
|
|
||||||
}
|
|
||||||
|
|
||||||
let data = try encoder.encode(sut)
|
|
||||||
|
|
||||||
XCTAssertEqual(data, expectedData)
|
|
||||||
}
|
|
||||||
|
|
||||||
func testDataDecodingStrategy() throws {
|
|
||||||
let data = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8)
|
|
||||||
let decoder = JSONDecoder()
|
|
||||||
|
|
||||||
decoder.dataDecodingStrategy = .custom { decoder in
|
|
||||||
let container = try decoder.singleValueContainer()
|
|
||||||
let string = try container.decode(String.self)
|
|
||||||
var bytes = [UInt8]()
|
|
||||||
var i = string.startIndex
|
|
||||||
|
|
||||||
while i != string.endIndex {
|
|
||||||
let j = string.index(i, offsetBy: 2)
|
|
||||||
|
|
||||||
guard let byte = UInt8(string[i..<j], radix: 16) else {
|
|
||||||
throw DecodingError.dataCorruptedError(in: container, debugDescription: "Invalid byte")
|
|
||||||
}
|
|
||||||
|
|
||||||
bytes.append(byte)
|
|
||||||
i = j
|
|
||||||
}
|
|
||||||
|
|
||||||
return Data(bytes)
|
|
||||||
}
|
|
||||||
|
|
||||||
let sut = try decoder.decode(BloomFilter<String>.self, from: data)
|
|
||||||
|
|
||||||
XCTAssert(sut.contains("lol"))
|
|
||||||
XCTAssert(sut.contains("ok"))
|
|
||||||
XCTAssertFalse(sut.contains("wtf"))
|
|
||||||
XCTAssertFalse(sut.contains("no"))
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -396,7 +396,6 @@
|
||||||
D0D2AC4C25BCD2A9003D5DF2 /* TagTableViewCell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TagTableViewCell.swift; sourceTree = "<group>"; };
|
D0D2AC4C25BCD2A9003D5DF2 /* TagTableViewCell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TagTableViewCell.swift; sourceTree = "<group>"; };
|
||||||
D0D2AC5225BCD2BA003D5DF2 /* TagContentConfiguration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TagContentConfiguration.swift; sourceTree = "<group>"; };
|
D0D2AC5225BCD2BA003D5DF2 /* TagContentConfiguration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TagContentConfiguration.swift; sourceTree = "<group>"; };
|
||||||
D0D2AC6625BD0484003D5DF2 /* LineChartView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LineChartView.swift; sourceTree = "<group>"; };
|
D0D2AC6625BD0484003D5DF2 /* LineChartView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LineChartView.swift; sourceTree = "<group>"; };
|
||||||
D0D7C013250440610039AD6F /* CodableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = CodableBloomFilter; sourceTree = "<group>"; };
|
|
||||||
D0D93EB925D9C70400C622ED /* AutocompleteItemView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemView.swift; sourceTree = "<group>"; };
|
D0D93EB925D9C70400C622ED /* AutocompleteItemView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemView.swift; sourceTree = "<group>"; };
|
||||||
D0D93EBF25D9C71D00C622ED /* AutocompleteItemContentConfiguration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemContentConfiguration.swift; sourceTree = "<group>"; };
|
D0D93EBF25D9C71D00C622ED /* AutocompleteItemContentConfiguration.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemContentConfiguration.swift; sourceTree = "<group>"; };
|
||||||
D0D93ECF25D9C9ED00C622ED /* AutocompleteItemCollectionViewCell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemCollectionViewCell.swift; sourceTree = "<group>"; };
|
D0D93ECF25D9C9ED00C622ED /* AutocompleteItemCollectionViewCell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AutocompleteItemCollectionViewCell.swift; sourceTree = "<group>"; };
|
||||||
|
@ -635,7 +634,6 @@
|
||||||
D0477F2A25C6EB90005C5368 /* Activities */,
|
D0477F2A25C6EB90005C5368 /* Activities */,
|
||||||
D0C7D45224F76169001EBDBB /* Assets.xcassets */,
|
D0C7D45224F76169001EBDBB /* Assets.xcassets */,
|
||||||
D0FE1C9625368A15003EF1EB /* Caches */,
|
D0FE1C9625368A15003EF1EB /* Caches */,
|
||||||
D0D7C013250440610039AD6F /* CodableBloomFilter */,
|
|
||||||
D0A1F4F5252E7D2A004435BF /* Data Sources */,
|
D0A1F4F5252E7D2A004435BF /* Data Sources */,
|
||||||
D085C3BB25008DEC008A6C5E /* DB */,
|
D085C3BB25008DEC008A6C5E /* DB */,
|
||||||
D0C7D46824F76169001EBDBB /* Extensions */,
|
D0C7D46824F76169001EBDBB /* Extensions */,
|
||||||
|
|
|
@ -10,6 +10,15 @@
|
||||||
"version": "1.0.0"
|
"version": "1.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"package": "CodableBloomFilter",
|
||||||
|
"repositoryURL": "https://github.com/metabolist/codable-bloom-filter.git",
|
||||||
|
"state": {
|
||||||
|
"branch": null,
|
||||||
|
"revision": "53f80465056a95253df797f1d8a9199f8b1c0543",
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"package": "CombineExpectations",
|
"package": "CombineExpectations",
|
||||||
"repositoryURL": "https://github.com/groue/CombineExpectations.git",
|
"repositoryURL": "https://github.com/groue/CombineExpectations.git",
|
||||||
|
|
|
@ -18,7 +18,9 @@ let package = Package(
|
||||||
],
|
],
|
||||||
dependencies: [
|
dependencies: [
|
||||||
.package(url: "https://github.com/groue/CombineExpectations.git", .upToNextMajor(from: "0.7.0")),
|
.package(url: "https://github.com/groue/CombineExpectations.git", .upToNextMajor(from: "0.7.0")),
|
||||||
.package(path: "CodableBloomFilter"),
|
.package(name: "CodableBloomFilter",
|
||||||
|
url: "https://github.com/metabolist/codable-bloom-filter.git",
|
||||||
|
.upToNextMajor(from: "1.0.0")),
|
||||||
.package(path: "DB"),
|
.package(path: "DB"),
|
||||||
.package(path: "Keychain"),
|
.package(path: "Keychain"),
|
||||||
.package(path: "MastodonAPI"),
|
.package(path: "MastodonAPI"),
|
||||||
|
|
Loading…
Reference in a new issue