Skip to content

Latest commit

 

History

History
118 lines (93 loc) · 3.95 KB

README.md

File metadata and controls

118 lines (93 loc) · 3.95 KB

Circle CI

Cuckoo filter

A densely-packed configurable cuckoo filter implemented in Scala.

SBT settings

resolvers ++= Seq(
  "dl-john-ky-releases"   at "http://dl.john-ky.io/maven/releases",
  "dl-john-ky-snapshots"  at "http://dl.john-ky.io/maven/snapshots")

libraryDependencies += "io.john-ky" %% "pico-cuckoo-filter" % "0.0.1-af1b672"

Usage

These are the imports required to use the cuckoo filter:

import org.pico.cuckoo.filter._
import org.pico.hash.syntax._
import org.pico.hash.{Hash64, Hashable}
import org.pico.twiddle.syntax.anyVal._

The cuckoo filter requires instances of the Hashable type-trait to defined for the filtered element type, for Long and for Fingerprint.

For example, the following is an implementation of these instances for the filtered element type String:

import scala.util.hashing.MurmurHash3

implicit val hashableString = new Hashable[String] {
  override def hash(a: String): Hash64 = Hash64(MurmurHash3.stringHash(a))
}

implicit val hashableLong = new Hashable[Long] {
  override def hash(a: Long): Hash64 = Hash64(MurmurHash3.arrayHash(Array(a)))
}

implicit val hashableFingerprint = new Hashable[Fingerprint] {
  override def hash(a: Fingerprint): Hash64 = a.value.hashed
}
val filter = new CuckooFilter(
    16, // The number fingerprints per bucket
    24.bits, // The number of bits in a finger print
    5, // The maximum number of kicks to attempt before failing an insert
    128) // The total number of buckets

The filter can then be used like this:

filter.insert("Element") // Returns true if the insertion was successful
filter.lookup("Element") // Returns true since the element was just inserted
filter.delete("Element") // Returns true since the element was still in the filter
filter.lookup("Element") // Returns false since the element has just been deleted
filter.delete("Element") // Returns false since the element has already been deleted

Complete sample

import org.pico.cuckoo.filter._
import org.pico.hash.syntax._
import org.pico.hash.{Hash64, Hashable, Hashable2}
import org.pico.twiddle.syntax.anyVal._
import scala.util.hashing.MurmurHash3

object Main {
  implicit val hashableString = new Hashable[String] {
    override def hash(a: String): Hash64 = Hash64(MurmurHash3.stringHash(a))
  }

  implicit val hashable2String = new Hashable2[String] {
    override def hash2(a: String): Hash64 = Hash64(JLong.reverse(MurmurHash3.stringHash(a)))
  }

  implicit val hashableLong = new Hashable[Long] {
    override def hash(a: Long): Hash64 = Hash64(MurmurHash3.arrayHash(Array(a)))
  }

  implicit val hashable2Long = new Hashable2[Long] {
    override def hash2(a: Long): Hash64 = Hash64(JLong.reverse(MurmurHash3.arrayHash(Array(a))))
  }

  implicit val hashableFingerprint = new Hashable[Fingerprint] {
    override def hash(a: Fingerprint): Hash64 = a.value.hashed
  }

  def main(args: Array[String]): Unit = {
    val filter = new CuckooFilter(
        16, // The number fingerprints per bucket
        24.bits, // The number of bits in a finger print
        5, // The maximum number of kicks to attempt before failing an insert
        128) // The total number of buckets

    val a = filter.insert("Element") // Returns true if the insertion was successful
    val b = filter.lookup("Element") // Returns true since the element was just inserted
    val c = filter.delete("Element") // Returns true since the element was still in the filter
    val d = filter.lookup("Element") // Returns false since the element has just been deleted
    val e = filter.delete("Element") // Returns false since the element has already been deleted

    println(s"$a $b $c $d $e")
  }
}

References