mirror of
https://github.com/metabolist/metatext.git
synced 2025-01-07 04:25:24 +00:00
Access control and genericization
This commit is contained in:
parent
d0e9b2a1e3
commit
414f979f94
5 changed files with 43 additions and 20 deletions
|
@ -6,44 +6,44 @@ import Foundation
|
||||||
// https://khanlou.com/2018/09/bloom-filters/
|
// https://khanlou.com/2018/09/bloom-filters/
|
||||||
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
// This implementation uses deterministic hashing functions so it can be serialized / deserialized
|
||||||
|
|
||||||
struct BloomFilter {
|
public struct BloomFilter<T: DeterministicallyHashable> {
|
||||||
let hashes: [Hash]
|
public let hashes: [Hash]
|
||||||
let bits: Int
|
public let bits: Int
|
||||||
|
|
||||||
private var data: BitArray
|
private var data: BitArray
|
||||||
|
|
||||||
init(hashes: [Hash], bits: Int) {
|
public init(hashes: [Hash], bits: Int) {
|
||||||
self.hashes = hashes
|
self.hashes = hashes
|
||||||
self.bits = bits
|
self.bits = bits
|
||||||
data = BitArray(count: bits)
|
data = BitArray(count: bits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extension BloomFilter {
|
public extension BloomFilter {
|
||||||
enum Hash: String, Codable {
|
enum Hash: String, Codable {
|
||||||
case djb2
|
case djb2
|
||||||
case sdbm
|
case sdbm
|
||||||
}
|
}
|
||||||
|
|
||||||
mutating func insert(_ newMember: String) {
|
mutating func insert(_ newMember: T) {
|
||||||
for index in indices(newMember) {
|
for index in indices(newMember) {
|
||||||
data[index] = true
|
data[index] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func contains(_ member: String) -> Bool {
|
func contains(_ member: T) -> Bool {
|
||||||
indices(member).map { data[$0] }.allSatisfy { $0 }
|
indices(member).map { data[$0] }.allSatisfy { $0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extension BloomFilter: Codable {
|
extension BloomFilter: Codable {
|
||||||
enum CodingKeys: String, CodingKey {
|
private enum CodingKeys: String, CodingKey {
|
||||||
case hashes
|
case hashes
|
||||||
case bits
|
case bits
|
||||||
case data
|
case data
|
||||||
}
|
}
|
||||||
|
|
||||||
init(from decoder: Decoder) throws {
|
public init(from decoder: Decoder) throws {
|
||||||
let container = try decoder.container(keyedBy: CodingKeys.self)
|
let container = try decoder.container(keyedBy: CodingKeys.self)
|
||||||
|
|
||||||
hashes = try container.decode([Hash].self, forKey: .hashes)
|
hashes = try container.decode([Hash].self, forKey: .hashes)
|
||||||
|
@ -51,7 +51,7 @@ extension BloomFilter: Codable {
|
||||||
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
|
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
func encode(to encoder: Encoder) throws {
|
public func encode(to encoder: Encoder) throws {
|
||||||
var container = encoder.container(keyedBy: CodingKeys.self)
|
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||||
|
|
||||||
try container.encode(hashes, forKey: .hashes)
|
try container.encode(hashes, forKey: .hashes)
|
||||||
|
@ -61,16 +61,18 @@ extension BloomFilter: Codable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private extension BloomFilter {
|
private extension BloomFilter {
|
||||||
func indices(_ string: String) -> [Int] {
|
func indices(_ member: T) -> [Int] {
|
||||||
hashes.map { abs($0.apply(string)) % bits }
|
hashes.map { abs($0.apply(member)) % bits }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
|
||||||
|
|
||||||
private extension BloomFilter.Hash {
|
private extension BloomFilter.Hash {
|
||||||
func apply(_ string: String) -> Int {
|
func apply(_ member: T) -> Int {
|
||||||
string.unicodeScalars.map(\.value).reduce(initial, then)
|
Array(member.deterministicallyHashableData)
|
||||||
|
.map(Int.init)
|
||||||
|
.reduce(initial, then)
|
||||||
}
|
}
|
||||||
|
|
||||||
var initial: Int {
|
var initial: Int {
|
||||||
|
@ -80,12 +82,12 @@ private extension BloomFilter.Hash {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func then(result: Int, next: UInt32) -> Int {
|
func then(result: Int, next: Int) -> Int {
|
||||||
switch self {
|
switch self {
|
||||||
case .djb2:
|
case .djb2:
|
||||||
return (result << 5) &+ result &+ Int(next)
|
return (result << 5) &+ result &+ next
|
||||||
case .sdbm:
|
case .sdbm:
|
||||||
return Int(next) &+ (result << 6) &+ (result << 16) - result
|
return next &+ (result << 6) &+ (result << 16) - result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
// Copyright © 2020 Metabolist. All rights reserved.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
public protocol DeterministicallyHashable {
|
||||||
|
var deterministicallyHashableData: Data { get }
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
// Copyright © 2020 Metabolist. All rights reserved.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
extension Data: DeterministicallyHashable {
|
||||||
|
public var deterministicallyHashableData: Data { self }
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
// Copyright © 2020 Metabolist. All rights reserved.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
extension String: DeterministicallyHashable {
|
||||||
|
public var deterministicallyHashableData: Data { Data(utf8) }
|
||||||
|
}
|
|
@ -3,7 +3,7 @@ import XCTest
|
||||||
|
|
||||||
final class CodableBloomFilterTests: XCTestCase {
|
final class CodableBloomFilterTests: XCTestCase {
|
||||||
func testContains() {
|
func testContains() {
|
||||||
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024)
|
var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 1024)
|
||||||
|
|
||||||
sut.insert("lol")
|
sut.insert("lol")
|
||||||
sut.insert("ok")
|
sut.insert("ok")
|
||||||
|
@ -15,7 +15,7 @@ final class CodableBloomFilterTests: XCTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
func testCoding() throws {
|
func testCoding() throws {
|
||||||
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64)
|
var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 64)
|
||||||
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
|
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
|
||||||
|
|
||||||
sut.insert("lol")
|
sut.insert("lol")
|
||||||
|
@ -29,7 +29,7 @@ final class CodableBloomFilterTests: XCTestCase {
|
||||||
|
|
||||||
XCTAssertEqual(serialization, expectedSerialization)
|
XCTAssertEqual(serialization, expectedSerialization)
|
||||||
|
|
||||||
let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization)
|
let decoded = try JSONDecoder().decode(BloomFilter<String>.self, from: serialization)
|
||||||
|
|
||||||
XCTAssert(decoded.contains("lol"))
|
XCTAssert(decoded.contains("lol"))
|
||||||
XCTAssert(decoded.contains("ok"))
|
XCTAssert(decoded.contains("ok"))
|
||||||
|
|
Loading…
Reference in a new issue