360 lines
11 KiB
Go
360 lines
11 KiB
Go
|
// Copyright 2021 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package fuzz
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"fmt"
|
||
|
"go/ast"
|
||
|
"go/parser"
|
||
|
"go/token"
|
||
|
"math"
|
||
|
"strconv"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// encVersion1 will be the first line of a file with version 1 encoding.
|
||
|
var encVersion1 = "go test fuzz v1"
|
||
|
|
||
|
// marshalCorpusFile encodes an arbitrary number of arguments into the file format for the
|
||
|
// corpus.
|
||
|
func marshalCorpusFile(vals ...any) []byte {
|
||
|
if len(vals) == 0 {
|
||
|
panic("must have at least one value to marshal")
|
||
|
}
|
||
|
b := bytes.NewBuffer([]byte(encVersion1 + "\n"))
|
||
|
// TODO(katiehockman): keep uint8 and int32 encoding where applicable,
|
||
|
// instead of changing to byte and rune respectively.
|
||
|
for _, val := range vals {
|
||
|
switch t := val.(type) {
|
||
|
case int, int8, int16, int64, uint, uint16, uint32, uint64, bool:
|
||
|
fmt.Fprintf(b, "%T(%v)\n", t, t)
|
||
|
case float32:
|
||
|
if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) {
|
||
|
// We encode unusual NaNs as hex values, because that is how users are
|
||
|
// likely to encounter them in literature about floating-point encoding.
|
||
|
// This allows us to reproduce fuzz failures that depend on the specific
|
||
|
// NaN representation (for float32 there are about 2^24 possibilities!),
|
||
|
// not just the fact that the value is *a* NaN.
|
||
|
//
|
||
|
// Note that the specific value of float32(math.NaN()) can vary based on
|
||
|
// whether the architecture represents signaling NaNs using a low bit
|
||
|
// (as is common) or a high bit (as commonly implemented on MIPS
|
||
|
// hardware before around 2012). We believe that the increase in clarity
|
||
|
// from identifying "NaN" with math.NaN() is worth the slight ambiguity
|
||
|
// from a platform-dependent value.
|
||
|
fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t))
|
||
|
} else {
|
||
|
// We encode all other values — including the NaN value that is
|
||
|
// bitwise-identical to float32(math.Nan()) — using the default
|
||
|
// formatting, which is equivalent to strconv.FormatFloat with format
|
||
|
// 'g' and can be parsed by strconv.ParseFloat.
|
||
|
//
|
||
|
// For an ordinary floating-point number this format includes
|
||
|
// sufficiently many digits to reconstruct the exact value. For positive
|
||
|
// or negative infinity it is the string "+Inf" or "-Inf". For positive
|
||
|
// or negative zero it is "0" or "-0". For NaN, it is the string "NaN".
|
||
|
fmt.Fprintf(b, "%T(%v)\n", t, t)
|
||
|
}
|
||
|
case float64:
|
||
|
if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) {
|
||
|
fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t))
|
||
|
} else {
|
||
|
fmt.Fprintf(b, "%T(%v)\n", t, t)
|
||
|
}
|
||
|
case string:
|
||
|
fmt.Fprintf(b, "string(%q)\n", t)
|
||
|
case rune: // int32
|
||
|
// Although rune and int32 are represented by the same type, only a subset
|
||
|
// of valid int32 values can be expressed as rune literals. Notably,
|
||
|
// negative numbers, surrogate halves, and values above unicode.MaxRune
|
||
|
// have no quoted representation.
|
||
|
//
|
||
|
// fmt with "%q" (and the corresponding functions in the strconv package)
|
||
|
// would quote out-of-range values to the Unicode replacement character
|
||
|
// instead of the original value (see https://go.dev/issue/51526), so
|
||
|
// they must be treated as int32 instead.
|
||
|
//
|
||
|
// We arbitrarily draw the line at UTF-8 validity, which biases toward the
|
||
|
// "rune" interpretation. (However, we accept either format as input.)
|
||
|
if utf8.ValidRune(t) {
|
||
|
fmt.Fprintf(b, "rune(%q)\n", t)
|
||
|
} else {
|
||
|
fmt.Fprintf(b, "int32(%v)\n", t)
|
||
|
}
|
||
|
case byte: // uint8
|
||
|
// For bytes, we arbitrarily prefer the character interpretation.
|
||
|
// (Every byte has a valid character encoding.)
|
||
|
fmt.Fprintf(b, "byte(%q)\n", t)
|
||
|
case []byte: // []uint8
|
||
|
fmt.Fprintf(b, "[]byte(%q)\n", t)
|
||
|
default:
|
||
|
panic(fmt.Sprintf("unsupported type: %T", t))
|
||
|
}
|
||
|
}
|
||
|
return b.Bytes()
|
||
|
}
|
||
|
|
||
|
// unmarshalCorpusFile decodes corpus bytes into their respective values.
|
||
|
func unmarshalCorpusFile(b []byte) ([]any, error) {
|
||
|
if len(b) == 0 {
|
||
|
return nil, fmt.Errorf("cannot unmarshal empty string")
|
||
|
}
|
||
|
lines := bytes.Split(b, []byte("\n"))
|
||
|
if len(lines) < 2 {
|
||
|
return nil, fmt.Errorf("must include version and at least one value")
|
||
|
}
|
||
|
if string(lines[0]) != encVersion1 {
|
||
|
return nil, fmt.Errorf("unknown encoding version: %s", lines[0])
|
||
|
}
|
||
|
var vals []any
|
||
|
for _, line := range lines[1:] {
|
||
|
line = bytes.TrimSpace(line)
|
||
|
if len(line) == 0 {
|
||
|
continue
|
||
|
}
|
||
|
v, err := parseCorpusValue(line)
|
||
|
if err != nil {
|
||
|
return nil, fmt.Errorf("malformed line %q: %v", line, err)
|
||
|
}
|
||
|
vals = append(vals, v)
|
||
|
}
|
||
|
return vals, nil
|
||
|
}
|
||
|
|
||
|
func parseCorpusValue(line []byte) (any, error) {
|
||
|
fs := token.NewFileSet()
|
||
|
expr, err := parser.ParseExprFrom(fs, "(test)", line, 0)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
call, ok := expr.(*ast.CallExpr)
|
||
|
if !ok {
|
||
|
return nil, fmt.Errorf("expected call expression")
|
||
|
}
|
||
|
if len(call.Args) != 1 {
|
||
|
return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args))
|
||
|
}
|
||
|
arg := call.Args[0]
|
||
|
|
||
|
if arrayType, ok := call.Fun.(*ast.ArrayType); ok {
|
||
|
if arrayType.Len != nil {
|
||
|
return nil, fmt.Errorf("expected []byte or primitive type")
|
||
|
}
|
||
|
elt, ok := arrayType.Elt.(*ast.Ident)
|
||
|
if !ok || elt.Name != "byte" {
|
||
|
return nil, fmt.Errorf("expected []byte")
|
||
|
}
|
||
|
lit, ok := arg.(*ast.BasicLit)
|
||
|
if !ok || lit.Kind != token.STRING {
|
||
|
return nil, fmt.Errorf("string literal required for type []byte")
|
||
|
}
|
||
|
s, err := strconv.Unquote(lit.Value)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return []byte(s), nil
|
||
|
}
|
||
|
|
||
|
var idType *ast.Ident
|
||
|
if selector, ok := call.Fun.(*ast.SelectorExpr); ok {
|
||
|
xIdent, ok := selector.X.(*ast.Ident)
|
||
|
if !ok || xIdent.Name != "math" {
|
||
|
return nil, fmt.Errorf("invalid selector type")
|
||
|
}
|
||
|
switch selector.Sel.Name {
|
||
|
case "Float64frombits":
|
||
|
idType = &ast.Ident{Name: "float64-bits"}
|
||
|
case "Float32frombits":
|
||
|
idType = &ast.Ident{Name: "float32-bits"}
|
||
|
default:
|
||
|
return nil, fmt.Errorf("invalid selector type")
|
||
|
}
|
||
|
} else {
|
||
|
idType, ok = call.Fun.(*ast.Ident)
|
||
|
if !ok {
|
||
|
return nil, fmt.Errorf("expected []byte or primitive type")
|
||
|
}
|
||
|
if idType.Name == "bool" {
|
||
|
id, ok := arg.(*ast.Ident)
|
||
|
if !ok {
|
||
|
return nil, fmt.Errorf("malformed bool")
|
||
|
}
|
||
|
if id.Name == "true" {
|
||
|
return true, nil
|
||
|
} else if id.Name == "false" {
|
||
|
return false, nil
|
||
|
} else {
|
||
|
return nil, fmt.Errorf("true or false required for type bool")
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
val string
|
||
|
kind token.Token
|
||
|
)
|
||
|
if op, ok := arg.(*ast.UnaryExpr); ok {
|
||
|
switch lit := op.X.(type) {
|
||
|
case *ast.BasicLit:
|
||
|
if op.Op != token.SUB {
|
||
|
return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op)
|
||
|
}
|
||
|
// Special case for negative numbers.
|
||
|
val = op.Op.String() + lit.Value // e.g. "-" + "124"
|
||
|
kind = lit.Kind
|
||
|
case *ast.Ident:
|
||
|
if lit.Name != "Inf" {
|
||
|
return nil, fmt.Errorf("expected operation on int or float type")
|
||
|
}
|
||
|
if op.Op == token.SUB {
|
||
|
val = "-Inf"
|
||
|
} else {
|
||
|
val = "+Inf"
|
||
|
}
|
||
|
kind = token.FLOAT
|
||
|
default:
|
||
|
return nil, fmt.Errorf("expected operation on int or float type")
|
||
|
}
|
||
|
} else {
|
||
|
switch lit := arg.(type) {
|
||
|
case *ast.BasicLit:
|
||
|
val, kind = lit.Value, lit.Kind
|
||
|
case *ast.Ident:
|
||
|
if lit.Name != "NaN" {
|
||
|
return nil, fmt.Errorf("literal value required for primitive type")
|
||
|
}
|
||
|
val, kind = "NaN", token.FLOAT
|
||
|
default:
|
||
|
return nil, fmt.Errorf("literal value required for primitive type")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
switch typ := idType.Name; typ {
|
||
|
case "string":
|
||
|
if kind != token.STRING {
|
||
|
return nil, fmt.Errorf("string literal value required for type string")
|
||
|
}
|
||
|
return strconv.Unquote(val)
|
||
|
case "byte", "rune":
|
||
|
if kind == token.INT {
|
||
|
switch typ {
|
||
|
case "rune":
|
||
|
return parseInt(val, typ)
|
||
|
case "byte":
|
||
|
return parseUint(val, typ)
|
||
|
}
|
||
|
}
|
||
|
if kind != token.CHAR {
|
||
|
return nil, fmt.Errorf("character literal required for byte/rune types")
|
||
|
}
|
||
|
n := len(val)
|
||
|
if n < 2 {
|
||
|
return nil, fmt.Errorf("malformed character literal, missing single quotes")
|
||
|
}
|
||
|
code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'')
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
if typ == "rune" {
|
||
|
return code, nil
|
||
|
}
|
||
|
if code >= 256 {
|
||
|
return nil, fmt.Errorf("can only encode single byte to a byte type")
|
||
|
}
|
||
|
return byte(code), nil
|
||
|
case "int", "int8", "int16", "int32", "int64":
|
||
|
if kind != token.INT {
|
||
|
return nil, fmt.Errorf("integer literal required for int types")
|
||
|
}
|
||
|
return parseInt(val, typ)
|
||
|
case "uint", "uint8", "uint16", "uint32", "uint64":
|
||
|
if kind != token.INT {
|
||
|
return nil, fmt.Errorf("integer literal required for uint types")
|
||
|
}
|
||
|
return parseUint(val, typ)
|
||
|
case "float32":
|
||
|
if kind != token.FLOAT && kind != token.INT {
|
||
|
return nil, fmt.Errorf("float or integer literal required for float32 type")
|
||
|
}
|
||
|
v, err := strconv.ParseFloat(val, 32)
|
||
|
return float32(v), err
|
||
|
case "float64":
|
||
|
if kind != token.FLOAT && kind != token.INT {
|
||
|
return nil, fmt.Errorf("float or integer literal required for float64 type")
|
||
|
}
|
||
|
return strconv.ParseFloat(val, 64)
|
||
|
case "float32-bits":
|
||
|
if kind != token.INT {
|
||
|
return nil, fmt.Errorf("integer literal required for math.Float32frombits type")
|
||
|
}
|
||
|
bits, err := parseUint(val, "uint32")
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return math.Float32frombits(bits.(uint32)), nil
|
||
|
case "float64-bits":
|
||
|
if kind != token.FLOAT && kind != token.INT {
|
||
|
return nil, fmt.Errorf("integer literal required for math.Float64frombits type")
|
||
|
}
|
||
|
bits, err := parseUint(val, "uint64")
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return math.Float64frombits(bits.(uint64)), nil
|
||
|
default:
|
||
|
return nil, fmt.Errorf("expected []byte or primitive type")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// parseInt returns an integer of value val and type typ.
|
||
|
func parseInt(val, typ string) (any, error) {
|
||
|
switch typ {
|
||
|
case "int":
|
||
|
// The int type may be either 32 or 64 bits. If 32, the fuzz tests in the
|
||
|
// corpus may include 64-bit values produced by fuzzing runs on 64-bit
|
||
|
// architectures. When running those tests, we implicitly wrap the values to
|
||
|
// fit in a regular int. (The test case is still “interesting”, even if the
|
||
|
// specific values of its inputs are platform-dependent.)
|
||
|
i, err := strconv.ParseInt(val, 0, 64)
|
||
|
return int(i), err
|
||
|
case "int8":
|
||
|
i, err := strconv.ParseInt(val, 0, 8)
|
||
|
return int8(i), err
|
||
|
case "int16":
|
||
|
i, err := strconv.ParseInt(val, 0, 16)
|
||
|
return int16(i), err
|
||
|
case "int32", "rune":
|
||
|
i, err := strconv.ParseInt(val, 0, 32)
|
||
|
return int32(i), err
|
||
|
case "int64":
|
||
|
return strconv.ParseInt(val, 0, 64)
|
||
|
default:
|
||
|
panic("unreachable")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// parseInt returns an unsigned integer of value val and type typ.
|
||
|
func parseUint(val, typ string) (any, error) {
|
||
|
switch typ {
|
||
|
case "uint":
|
||
|
i, err := strconv.ParseUint(val, 0, 64)
|
||
|
return uint(i), err
|
||
|
case "uint8", "byte":
|
||
|
i, err := strconv.ParseUint(val, 0, 8)
|
||
|
return uint8(i), err
|
||
|
case "uint16":
|
||
|
i, err := strconv.ParseUint(val, 0, 16)
|
||
|
return uint16(i), err
|
||
|
case "uint32":
|
||
|
i, err := strconv.ParseUint(val, 0, 32)
|
||
|
return uint32(i), err
|
||
|
case "uint64":
|
||
|
return strconv.ParseUint(val, 0, 64)
|
||
|
default:
|
||
|
panic("unreachable")
|
||
|
}
|
||
|
}
|