150 lines
3.7 KiB
Go
150 lines
3.7 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package bytealg
|
|
|
|
import (
|
|
"internal/cpu"
|
|
"unsafe"
|
|
)
|
|
|
|
// Offsets into internal/cpu records for use in assembly.
|
|
const (
|
|
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
|
|
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
|
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
|
|
|
offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
|
|
|
|
offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9)
|
|
)
|
|
|
|
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
|
|
// If MaxLen is not 0, make sure MaxLen >= 4.
|
|
var MaxLen int = 32
|
|
|
|
// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
|
|
// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
|
|
// three of them without causing allocation?
|
|
|
|
// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
|
const PrimeRK = 16777619
|
|
|
|
// HashStrBytes returns the hash and the appropriate multiplicative
|
|
// factor for use in Rabin-Karp algorithm.
|
|
func HashStrBytes(sep []byte) (uint32, uint32) {
|
|
hash := uint32(0)
|
|
for i := 0; i < len(sep); i++ {
|
|
hash = hash*PrimeRK + uint32(sep[i])
|
|
}
|
|
var pow, sq uint32 = 1, PrimeRK
|
|
for i := len(sep); i > 0; i >>= 1 {
|
|
if i&1 != 0 {
|
|
pow *= sq
|
|
}
|
|
sq *= sq
|
|
}
|
|
return hash, pow
|
|
}
|
|
|
|
// HashStr returns the hash and the appropriate multiplicative
|
|
// factor for use in Rabin-Karp algorithm.
|
|
func HashStr(sep string) (uint32, uint32) {
|
|
hash := uint32(0)
|
|
for i := 0; i < len(sep); i++ {
|
|
hash = hash*PrimeRK + uint32(sep[i])
|
|
}
|
|
var pow, sq uint32 = 1, PrimeRK
|
|
for i := len(sep); i > 0; i >>= 1 {
|
|
if i&1 != 0 {
|
|
pow *= sq
|
|
}
|
|
sq *= sq
|
|
}
|
|
return hash, pow
|
|
}
|
|
|
|
// HashStrRevBytes returns the hash of the reverse of sep and the
|
|
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
|
func HashStrRevBytes(sep []byte) (uint32, uint32) {
|
|
hash := uint32(0)
|
|
for i := len(sep) - 1; i >= 0; i-- {
|
|
hash = hash*PrimeRK + uint32(sep[i])
|
|
}
|
|
var pow, sq uint32 = 1, PrimeRK
|
|
for i := len(sep); i > 0; i >>= 1 {
|
|
if i&1 != 0 {
|
|
pow *= sq
|
|
}
|
|
sq *= sq
|
|
}
|
|
return hash, pow
|
|
}
|
|
|
|
// HashStrRev returns the hash of the reverse of sep and the
|
|
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
|
|
func HashStrRev(sep string) (uint32, uint32) {
|
|
hash := uint32(0)
|
|
for i := len(sep) - 1; i >= 0; i-- {
|
|
hash = hash*PrimeRK + uint32(sep[i])
|
|
}
|
|
var pow, sq uint32 = 1, PrimeRK
|
|
for i := len(sep); i > 0; i >>= 1 {
|
|
if i&1 != 0 {
|
|
pow *= sq
|
|
}
|
|
sq *= sq
|
|
}
|
|
return hash, pow
|
|
}
|
|
|
|
// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
|
|
// first occurrence of substr in s, or -1 if not present.
|
|
func IndexRabinKarpBytes(s, sep []byte) int {
|
|
// Rabin-Karp search
|
|
hashsep, pow := HashStrBytes(sep)
|
|
n := len(sep)
|
|
var h uint32
|
|
for i := 0; i < n; i++ {
|
|
h = h*PrimeRK + uint32(s[i])
|
|
}
|
|
if h == hashsep && Equal(s[:n], sep) {
|
|
return 0
|
|
}
|
|
for i := n; i < len(s); {
|
|
h *= PrimeRK
|
|
h += uint32(s[i])
|
|
h -= pow * uint32(s[i-n])
|
|
i++
|
|
if h == hashsep && Equal(s[i-n:i], sep) {
|
|
return i - n
|
|
}
|
|
}
|
|
return -1
|
|
}
|
|
|
|
// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
|
|
// first occurrence of substr in s, or -1 if not present.
|
|
func IndexRabinKarp(s, substr string) int {
|
|
// Rabin-Karp search
|
|
hashss, pow := HashStr(substr)
|
|
n := len(substr)
|
|
var h uint32
|
|
for i := 0; i < n; i++ {
|
|
h = h*PrimeRK + uint32(s[i])
|
|
}
|
|
if h == hashss && s[:n] == substr {
|
|
return 0
|
|
}
|
|
for i := n; i < len(s); {
|
|
h *= PrimeRK
|
|
h += uint32(s[i])
|
|
h -= pow * uint32(s[i-n])
|
|
i++
|
|
if h == hashss && s[i-n:i] == substr {
|
|
return i - n
|
|
}
|
|
}
|
|
return -1
|
|
}
|