293 lines
7.9 KiB
Go
293 lines
7.9 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package lzw
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
)
|
|
|
|
// A writer is a buffered, flushable writer.
|
|
type writer interface {
|
|
io.ByteWriter
|
|
Flush() error
|
|
}
|
|
|
|
const (
|
|
// A code is a 12 bit value, stored as a uint32 when encoding to avoid
|
|
// type conversions when shifting bits.
|
|
maxCode = 1<<12 - 1
|
|
invalidCode = 1<<32 - 1
|
|
// There are 1<<12 possible codes, which is an upper bound on the number of
|
|
// valid hash table entries at any given point in time. tableSize is 4x that.
|
|
tableSize = 4 * 1 << 12
|
|
tableMask = tableSize - 1
|
|
// A hash table entry is a uint32. Zero is an invalid entry since the
|
|
// lower 12 bits of a valid entry must be a non-literal code.
|
|
invalidEntry = 0
|
|
)
|
|
|
|
// Writer is an LZW compressor. It writes the compressed form of the data
|
|
// to an underlying writer (see NewWriter).
|
|
type Writer struct {
|
|
// w is the writer that compressed bytes are written to.
|
|
w writer
|
|
// order, write, bits, nBits and width are the state for
|
|
// converting a code stream into a byte stream.
|
|
order Order
|
|
write func(*Writer, uint32) error
|
|
bits uint32
|
|
nBits uint
|
|
width uint
|
|
// litWidth is the width in bits of literal codes.
|
|
litWidth uint
|
|
// hi is the code implied by the next code emission.
|
|
// overflow is the code at which hi overflows the code width.
|
|
hi, overflow uint32
|
|
// savedCode is the accumulated code at the end of the most recent Write
|
|
// call. It is equal to invalidCode if there was no such call.
|
|
savedCode uint32
|
|
// err is the first error encountered during writing. Closing the writer
|
|
// will make any future Write calls return errClosed
|
|
err error
|
|
// table is the hash table from 20-bit keys to 12-bit values. Each table
|
|
// entry contains key<<12|val and collisions resolve by linear probing.
|
|
// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
|
|
// The values are a 12-bit code.
|
|
table [tableSize]uint32
|
|
}
|
|
|
|
// writeLSB writes the code c for "Least Significant Bits first" data.
|
|
func (w *Writer) writeLSB(c uint32) error {
|
|
w.bits |= c << w.nBits
|
|
w.nBits += w.width
|
|
for w.nBits >= 8 {
|
|
if err := w.w.WriteByte(uint8(w.bits)); err != nil {
|
|
return err
|
|
}
|
|
w.bits >>= 8
|
|
w.nBits -= 8
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// writeMSB writes the code c for "Most Significant Bits first" data.
|
|
func (w *Writer) writeMSB(c uint32) error {
|
|
w.bits |= c << (32 - w.width - w.nBits)
|
|
w.nBits += w.width
|
|
for w.nBits >= 8 {
|
|
if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
|
|
return err
|
|
}
|
|
w.bits <<= 8
|
|
w.nBits -= 8
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// errOutOfCodes is an internal error that means that the writer has run out
|
|
// of unused codes and a clear code needs to be sent next.
|
|
var errOutOfCodes = errors.New("lzw: out of codes")
|
|
|
|
// incHi increments e.hi and checks for both overflow and running out of
|
|
// unused codes. In the latter case, incHi sends a clear code, resets the
|
|
// writer state and returns errOutOfCodes.
|
|
func (w *Writer) incHi() error {
|
|
w.hi++
|
|
if w.hi == w.overflow {
|
|
w.width++
|
|
w.overflow <<= 1
|
|
}
|
|
if w.hi == maxCode {
|
|
clear := uint32(1) << w.litWidth
|
|
if err := w.write(w, clear); err != nil {
|
|
return err
|
|
}
|
|
w.width = w.litWidth + 1
|
|
w.hi = clear + 1
|
|
w.overflow = clear << 1
|
|
for i := range w.table {
|
|
w.table[i] = invalidEntry
|
|
}
|
|
return errOutOfCodes
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Write writes a compressed representation of p to w's underlying writer.
|
|
func (w *Writer) Write(p []byte) (n int, err error) {
|
|
if w.err != nil {
|
|
return 0, w.err
|
|
}
|
|
if len(p) == 0 {
|
|
return 0, nil
|
|
}
|
|
if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
|
|
for _, x := range p {
|
|
if x > maxLit {
|
|
w.err = errors.New("lzw: input byte too large for the litWidth")
|
|
return 0, w.err
|
|
}
|
|
}
|
|
}
|
|
n = len(p)
|
|
code := w.savedCode
|
|
if code == invalidCode {
|
|
// This is the first write; send a clear code.
|
|
// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
|
|
// "Variable-Length-Code LZW Compression" says that "Encoders should
|
|
// output a Clear code as the first code of each image data stream".
|
|
//
|
|
// LZW compression isn't only used by GIF, but it's cheap to follow
|
|
// that directive unconditionally.
|
|
clear := uint32(1) << w.litWidth
|
|
if err := w.write(w, clear); err != nil {
|
|
return 0, err
|
|
}
|
|
// After the starting clear code, the next code sent (for non-empty
|
|
// input) is always a literal code.
|
|
code, p = uint32(p[0]), p[1:]
|
|
}
|
|
loop:
|
|
for _, x := range p {
|
|
literal := uint32(x)
|
|
key := code<<8 | literal
|
|
// If there is a hash table hit for this key then we continue the loop
|
|
// and do not emit a code yet.
|
|
hash := (key>>12 ^ key) & tableMask
|
|
for h, t := hash, w.table[hash]; t != invalidEntry; {
|
|
if key == t>>12 {
|
|
code = t & maxCode
|
|
continue loop
|
|
}
|
|
h = (h + 1) & tableMask
|
|
t = w.table[h]
|
|
}
|
|
// Otherwise, write the current code, and literal becomes the start of
|
|
// the next emitted code.
|
|
if w.err = w.write(w, code); w.err != nil {
|
|
return 0, w.err
|
|
}
|
|
code = literal
|
|
// Increment e.hi, the next implied code. If we run out of codes, reset
|
|
// the writer state (including clearing the hash table) and continue.
|
|
if err1 := w.incHi(); err1 != nil {
|
|
if err1 == errOutOfCodes {
|
|
continue
|
|
}
|
|
w.err = err1
|
|
return 0, w.err
|
|
}
|
|
// Otherwise, insert key -> e.hi into the map that e.table represents.
|
|
for {
|
|
if w.table[hash] == invalidEntry {
|
|
w.table[hash] = (key << 12) | w.hi
|
|
break
|
|
}
|
|
hash = (hash + 1) & tableMask
|
|
}
|
|
}
|
|
w.savedCode = code
|
|
return n, nil
|
|
}
|
|
|
|
// Close closes the Writer, flushing any pending output. It does not close
|
|
// w's underlying writer.
|
|
func (w *Writer) Close() error {
|
|
if w.err != nil {
|
|
if w.err == errClosed {
|
|
return nil
|
|
}
|
|
return w.err
|
|
}
|
|
// Make any future calls to Write return errClosed.
|
|
w.err = errClosed
|
|
// Write the savedCode if valid.
|
|
if w.savedCode != invalidCode {
|
|
if err := w.write(w, w.savedCode); err != nil {
|
|
return err
|
|
}
|
|
if err := w.incHi(); err != nil && err != errOutOfCodes {
|
|
return err
|
|
}
|
|
} else {
|
|
// Write the starting clear code, as w.Write did not.
|
|
clear := uint32(1) << w.litWidth
|
|
if err := w.write(w, clear); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// Write the eof code.
|
|
eof := uint32(1)<<w.litWidth + 1
|
|
if err := w.write(w, eof); err != nil {
|
|
return err
|
|
}
|
|
// Write the final bits.
|
|
if w.nBits > 0 {
|
|
if w.order == MSB {
|
|
w.bits >>= 24
|
|
}
|
|
if err := w.w.WriteByte(uint8(w.bits)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return w.w.Flush()
|
|
}
|
|
|
|
// Reset clears the Writer's state and allows it to be reused again
|
|
// as a new Writer.
|
|
func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
|
|
*w = Writer{}
|
|
w.init(dst, order, litWidth)
|
|
}
|
|
|
|
// NewWriter creates a new io.WriteCloser.
|
|
// Writes to the returned io.WriteCloser are compressed and written to w.
|
|
// It is the caller's responsibility to call Close on the WriteCloser when
|
|
// finished writing.
|
|
// The number of bits to use for literal codes, litWidth, must be in the
|
|
// range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
|
|
//
|
|
// It is guaranteed that the underlying type of the returned io.WriteCloser
|
|
// is a *Writer.
|
|
func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
|
|
return newWriter(w, order, litWidth)
|
|
}
|
|
|
|
func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
|
|
w := new(Writer)
|
|
w.init(dst, order, litWidth)
|
|
return w
|
|
}
|
|
|
|
func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
|
|
switch order {
|
|
case LSB:
|
|
w.write = (*Writer).writeLSB
|
|
case MSB:
|
|
w.write = (*Writer).writeMSB
|
|
default:
|
|
w.err = errors.New("lzw: unknown order")
|
|
return
|
|
}
|
|
if litWidth < 2 || 8 < litWidth {
|
|
w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
|
|
return
|
|
}
|
|
bw, ok := dst.(writer)
|
|
if !ok && dst != nil {
|
|
bw = bufio.NewWriter(dst)
|
|
}
|
|
w.w = bw
|
|
lw := uint(litWidth)
|
|
w.order = order
|
|
w.width = 1 + lw
|
|
w.litWidth = lw
|
|
w.hi = 1<<lw + 1
|
|
w.overflow = 1 << (lw + 1)
|
|
w.savedCode = invalidCode
|
|
}
|