A library for working with Pijul repositories in Go
package pijul

import (
	"bytes"
	"encoding/base32"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"time"

	"github.com/klauspost/compress/zstd"
)

type Change struct {
	Version      uint64
	Message      string
	Description  string
	Timestamp    time.Time
	Authors      []map[string]string
	Dependencies []Hash
	ExtraKnown   []Hash
	Metadata     []byte
	Changes      []Hunk
	ContentsHash Hash
	Unhashed     any
	Contents     []byte
}

type Hash [32]byte

var base32Encoding = base32.StdEncoding.WithPadding(base32.NoPadding)

func (h Hash) String() string {
	if h == (Hash{}) {
		return ""
	}
	return base32Encoding.EncodeToString(append(h[:], 1))
}

func HashFromBase32(b32 string) (Hash, error) {
	b, err := base32Encoding.DecodeString(b32)
	if err != nil {
		return Hash{}, err
	}
	if len(b) != 33 {
		return Hash{}, fmt.Errorf("expected 33 bytes, got %d", len(b))
	}
	if b[32] != 1 {
		return Hash{}, fmt.Errorf("expected Blake3 hash, got type %d", b[32])
	}
	return Hash(b[:32]), nil
}

func mustHashFromBase32(b32 string) Hash {
	h, err := HashFromBase32(b32)
	if err != nil {
		panic(err)
	}
	return h
}

type offsets struct {
	Version        uint64
	HashedLen      uint64
	UnhashedOffset uint64
	UnhashedLen    uint64
	ContentsOffset uint64
	ContentsLen    uint64
	Total          uint64
}

func DeserializeChange(data []byte) (Change, error) {
	br := bytes.NewReader(data)
	var off offsets
	err := binary.Read(br, binary.LittleEndian, &off)
	if err != nil {
		return Change{}, fmt.Errorf("error reading 'offsets' header: %w", err)
	}

	zr, err := zstd.NewReader(bytes.NewReader(data[binary.Size(offsets{}):off.UnhashedOffset]))
	if err != nil {
		return Change{}, fmt.Errorf("error creating zstandard decompressor: %w", err)
	}
	defer zr.Close()

	hashed, err := io.ReadAll(zr)
	if err != nil {
		return Change{}, fmt.Errorf("error getting the 'hashed' data chunk from the change: %w", err)
	}

	var c Change
	err = c.parseHashedData(hashed)
	if err != nil {
		return Change{}, err
	}

	if off.ContentsOffset > off.UnhashedOffset {
		zr.Reset(bytes.NewReader(data[off.UnhashedOffset:off.ContentsOffset]))
		unhashed, err := io.ReadAll(zr)
		if err != nil {
			return Change{}, fmt.Errorf("error getting the 'unhashed' data chunk from the change: %w", err)
		}
		if len(unhashed) > 0 {
			err = json.Unmarshal(unhashed, &c.Unhashed)
			if err != nil {
				c.Unhashed = string(unhashed)
			}
		}
	}

	if int(off.ContentsOffset) < len(data) {
		zr.Reset(bytes.NewReader(data[off.ContentsOffset:]))
		c.Contents, err = io.ReadAll(zr)
		if err != nil {
			return Change{}, fmt.Errorf("error getting the 'contents' data chunk from the change: %w", err)
		}
	}

	return c, nil
}

// parseHashedData parses the portion of the change's data that is stored in the
// Hashed struct in libpijul.
func (c *Change) parseHashedData(data []byte) error {
	var err error
	data, c.Version, err = uint64LE(data)
	if err != nil {
		return err
	}
	if c.Version != 6 && c.Version != 4 {
		return fmt.Errorf("unknown change file version: %d", c.Version)
	}

	data, c.Message, err = toString(lengthData(uint64LE))(data)
	if err != nil {
		return err
	}

	data, description, err := option(toString(lengthData(uint64LE)))(data)
	if err != nil {
		return err
	}
	if description != nil {
		c.Description = *description
	}

	data, c.Timestamp, err = mapWithError(lengthData(uint64LE), func(b []byte) (time.Time, error) {
		return time.ParseInLocation("2006-01-02T15:04:05.999999999Z", string(b), time.UTC)
	})(data)
	if err != nil {
		return err
	}

	switch c.Version {
	case 6:
		data, c.Authors, err = vec(hashMap(toString(lengthData(uint64LE)), toString(lengthData(uint64LE))))(data)
	case 4:
		data, c.Authors, err = vec(authorV4)(data)
	}
	if err != nil {
		return err
	}

	data, c.Dependencies, err = vec(hash)(data)
	if err != nil {
		return err
	}

	data, c.ExtraKnown, err = vec(hash)(data)
	if err != nil {
		return err
	}

	data, c.Metadata, err = lengthData(uint64LE)(data)
	if err != nil {
		return err
	}

	switch c.Version {
	case 6:
		data, c.Changes, err = vec(hunk)(data)
	case 4:
		data, c.Changes, err = vec(hunkV4)(data)
	}
	if err != nil {
		return err
	}

	data, c.ContentsHash, err = hash(data)
	if err != nil {
		return err
	}

	return nil
}

func authorV4(data []byte) ([]byte, map[string]string, error) {
	a := make(map[string]string, 3)
	data, name, err := rustString(data)
	if err != nil {
		return data, nil, err
	}
	a["name"] = name

	data, fullName, err := option(rustString)(data)
	if err != nil {
		return data, nil, err
	}
	if fullName != nil {
		a["full_name"] = *fullName
	}

	data, email, err := option(rustString)(data)
	if err != nil {
		return data, nil, err
	}
	if email != nil {
		a["email"] = *email
	}

	return data, a, nil
}

func hash(data []byte) ([]byte, Hash, error) {
	data, tag, err := uint32LE(data)
	if err != nil {
		return data, Hash{}, err
	}
	if tag == 0 {
		return data, Hash{}, nil
	}
	if tag != 1 {
		return data, Hash{}, fmt.Errorf("bad tag for Hash, expected 0 or 1, got %d", tag)
	}
	data, b, err := take(32)(data)
	if err != nil {
		return data, Hash{}, err
	}
	return data, Hash(b), nil
}