263 lines
7.9 KiB
Go
263 lines
7.9 KiB
Go
// Copyright 2019 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package maphash provides hash functions on byte sequences.
|
|
// These hash functions are intended to be used to implement hash tables or
|
|
// other data structures that need to map arbitrary strings or byte
|
|
// sequences to a uniform distribution on unsigned 64-bit integers.
|
|
// Each different instance of a hash table or data structure should use its own Seed.
|
|
//
|
|
// The hash functions are not cryptographically secure.
|
|
// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
|
|
//
|
|
package maphash
|
|
|
|
import (
|
|
"internal/unsafeheader"
|
|
"unsafe"
|
|
)
|
|
|
|
// A Seed is a random value that selects the specific hash function
|
|
// computed by a Hash. If two Hashes use the same Seeds, they
|
|
// will compute the same hash values for any given input.
|
|
// If two Hashes use different Seeds, they are very likely to compute
|
|
// distinct hash values for any given input.
|
|
//
|
|
// A Seed must be initialized by calling MakeSeed.
|
|
// The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
|
|
//
|
|
// Each Seed value is local to a single process and cannot be serialized
|
|
// or otherwise recreated in a different process.
|
|
type Seed struct {
|
|
s uint64
|
|
}
|
|
|
|
// A Hash computes a seeded hash of a byte sequence.
|
|
//
|
|
// The zero Hash is a valid Hash ready to use.
|
|
// A zero Hash chooses a random seed for itself during
|
|
// the first call to a Reset, Write, Seed, or Sum64 method.
|
|
// For control over the seed, use SetSeed.
|
|
//
|
|
// The computed hash values depend only on the initial seed and
|
|
// the sequence of bytes provided to the Hash object, not on the way
|
|
// in which the bytes are provided. For example, the three sequences
|
|
//
|
|
// h.Write([]byte{'f','o','o'})
|
|
// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
|
|
// h.WriteString("foo")
|
|
//
|
|
// all have the same effect.
|
|
//
|
|
// Hashes are intended to be collision-resistant, even for situations
|
|
// where an adversary controls the byte sequences being hashed.
|
|
//
|
|
// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
|
|
// If multiple goroutines must compute the same seeded hash,
|
|
// each can declare its own Hash and call SetSeed with a common Seed.
|
|
type Hash struct {
|
|
_ [0]func() // not comparable
|
|
seed Seed // initial seed used for this hash
|
|
state Seed // current hash of all flushed bytes
|
|
buf [bufSize]byte // unflushed byte buffer
|
|
n int // number of unflushed bytes
|
|
}
|
|
|
|
// bufSize is the size of the Hash write buffer.
|
|
// The buffer ensures that writes depend only on the sequence of bytes,
|
|
// not the sequence of WriteByte/Write/WriteString calls,
|
|
// by always calling rthash with a full buffer (except for the tail).
|
|
const bufSize = 128
|
|
|
|
// initSeed seeds the hash if necessary.
|
|
// initSeed is called lazily before any operation that actually uses h.seed/h.state.
|
|
// Note that this does not include Write/WriteByte/WriteString in the case
|
|
// where they only add to h.buf. (If they write too much, they call h.flush,
|
|
// which does call h.initSeed.)
|
|
func (h *Hash) initSeed() {
|
|
if h.seed.s == 0 {
|
|
seed := MakeSeed()
|
|
h.seed = seed
|
|
h.state = seed
|
|
}
|
|
}
|
|
|
|
// WriteByte adds b to the sequence of bytes hashed by h.
|
|
// It never fails; the error result is for implementing io.ByteWriter.
|
|
func (h *Hash) WriteByte(b byte) error {
|
|
if h.n == len(h.buf) {
|
|
h.flush()
|
|
}
|
|
h.buf[h.n] = b
|
|
h.n++
|
|
return nil
|
|
}
|
|
|
|
// Write adds b to the sequence of bytes hashed by h.
|
|
// It always writes all of b and never fails; the count and error result are for implementing io.Writer.
|
|
func (h *Hash) Write(b []byte) (int, error) {
|
|
size := len(b)
|
|
// Deal with bytes left over in h.buf.
|
|
// h.n <= bufSize is always true.
|
|
// Checking it is ~free and it lets the compiler eliminate a bounds check.
|
|
if h.n > 0 && h.n <= bufSize {
|
|
k := copy(h.buf[h.n:], b)
|
|
h.n += k
|
|
if h.n < bufSize {
|
|
// Copied the entirety of b to h.buf.
|
|
return size, nil
|
|
}
|
|
b = b[k:]
|
|
h.flush()
|
|
// No need to set h.n = 0 here; it happens just before exit.
|
|
}
|
|
// Process as many full buffers as possible, without copying, and calling initSeed only once.
|
|
if len(b) > bufSize {
|
|
h.initSeed()
|
|
for len(b) > bufSize {
|
|
h.state.s = rthash(&b[0], bufSize, h.state.s)
|
|
b = b[bufSize:]
|
|
}
|
|
}
|
|
// Copy the tail.
|
|
copy(h.buf[:], b)
|
|
h.n = len(b)
|
|
return size, nil
|
|
}
|
|
|
|
// WriteString adds the bytes of s to the sequence of bytes hashed by h.
|
|
// It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
|
|
func (h *Hash) WriteString(s string) (int, error) {
|
|
// WriteString mirrors Write. See Write for comments.
|
|
size := len(s)
|
|
if h.n > 0 && h.n <= bufSize {
|
|
k := copy(h.buf[h.n:], s)
|
|
h.n += k
|
|
if h.n < bufSize {
|
|
return size, nil
|
|
}
|
|
s = s[k:]
|
|
h.flush()
|
|
}
|
|
if len(s) > bufSize {
|
|
h.initSeed()
|
|
for len(s) > bufSize {
|
|
ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
|
|
h.state.s = rthash(ptr, bufSize, h.state.s)
|
|
s = s[bufSize:]
|
|
}
|
|
}
|
|
copy(h.buf[:], s)
|
|
h.n = len(s)
|
|
return size, nil
|
|
}
|
|
|
|
// Seed returns h's seed value.
|
|
func (h *Hash) Seed() Seed {
|
|
h.initSeed()
|
|
return h.seed
|
|
}
|
|
|
|
// SetSeed sets h to use seed, which must have been returned by MakeSeed
|
|
// or by another Hash's Seed method.
|
|
// Two Hash objects with the same seed behave identically.
|
|
// Two Hash objects with different seeds will very likely behave differently.
|
|
// Any bytes added to h before this call will be discarded.
|
|
func (h *Hash) SetSeed(seed Seed) {
|
|
if seed.s == 0 {
|
|
panic("maphash: use of uninitialized Seed")
|
|
}
|
|
h.seed = seed
|
|
h.state = seed
|
|
h.n = 0
|
|
}
|
|
|
|
// Reset discards all bytes added to h.
|
|
// (The seed remains the same.)
|
|
func (h *Hash) Reset() {
|
|
h.initSeed()
|
|
h.state = h.seed
|
|
h.n = 0
|
|
}
|
|
|
|
// precondition: buffer is full.
|
|
func (h *Hash) flush() {
|
|
if h.n != len(h.buf) {
|
|
panic("maphash: flush of partially full buffer")
|
|
}
|
|
h.initSeed()
|
|
h.state.s = rthash(&h.buf[0], h.n, h.state.s)
|
|
h.n = 0
|
|
}
|
|
|
|
// Sum64 returns h's current 64-bit value, which depends on
|
|
// h's seed and the sequence of bytes added to h since the
|
|
// last call to Reset or SetSeed.
|
|
//
|
|
// All bits of the Sum64 result are close to uniformly and
|
|
// independently distributed, so it can be safely reduced
|
|
// by using bit masking, shifting, or modular arithmetic.
|
|
func (h *Hash) Sum64() uint64 {
|
|
h.initSeed()
|
|
return rthash(&h.buf[0], h.n, h.state.s)
|
|
}
|
|
|
|
// MakeSeed returns a new random seed.
|
|
func MakeSeed() Seed {
|
|
var s1, s2 uint64
|
|
for {
|
|
s1 = uint64(runtime_fastrand())
|
|
s2 = uint64(runtime_fastrand())
|
|
// We use seed 0 to indicate an uninitialized seed/hash,
|
|
// so keep trying until we get a non-zero seed.
|
|
if s1|s2 != 0 {
|
|
break
|
|
}
|
|
}
|
|
return Seed{s: s1<<32 + s2}
|
|
}
|
|
|
|
//go:linkname runtime_fastrand runtime.fastrand
|
|
func runtime_fastrand() uint32
|
|
|
|
func rthash(ptr *byte, len int, seed uint64) uint64 {
|
|
if len == 0 {
|
|
return seed
|
|
}
|
|
// The runtime hasher only works on uintptr. For 64-bit
|
|
// architectures, we use the hasher directly. Otherwise,
|
|
// we use two parallel hashers on the lower and upper 32 bits.
|
|
if unsafe.Sizeof(uintptr(0)) == 8 {
|
|
return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
|
|
}
|
|
lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
|
|
hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
|
|
return uint64(hi)<<32 | uint64(lo)
|
|
}
|
|
|
|
//go:linkname runtime_memhash runtime.memhash
|
|
//go:noescape
|
|
func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
|
|
|
|
// Sum appends the hash's current 64-bit value to b.
|
|
// It exists for implementing hash.Hash.
|
|
// For direct calls, it is more efficient to use Sum64.
|
|
func (h *Hash) Sum(b []byte) []byte {
|
|
x := h.Sum64()
|
|
return append(b,
|
|
byte(x>>0),
|
|
byte(x>>8),
|
|
byte(x>>16),
|
|
byte(x>>24),
|
|
byte(x>>32),
|
|
byte(x>>40),
|
|
byte(x>>48),
|
|
byte(x>>56))
|
|
}
|
|
|
|
// Size returns h's hash value size, 8 bytes.
|
|
func (h *Hash) Size() int { return 8 }
|
|
|
|
// BlockSize returns h's block size.
|
|
func (h *Hash) BlockSize() int { return len(h.buf) }
|