change: introduce changestore, print deleted lines

laumann
Jul 13, 2023, 8:15 AM
5D2IYPL75HEP6JUEILEADSZRRSV72NECT6UQR3PORNDR35I2P5GQC

Dependencies

  • [2] WQSKMNHR change: reformat line
  • [3] L3HKOF4W wrap vendored zstd seekable in own file, #2
  • [4] LKIBESCN change: adjust function signatures
  • [5] VXQYIOBX change: avoid crash on different format versions
  • [6] LPGHALLK change: adjust printing of file adds
  • [7] 7WA2F6RS change: decode resurrectzombies, addroot, delroot
  • [8] RIWSVVAS change: decompress the 'contents' with zstd_seekable
  • [9] KDJUAAAL change: prefix function names with change_
  • [10] AEMTSEJX change: simplify readout of offsets
  • [11] 2U7P5SFQ Change struct names "struct foo -> typedef struct Foo"
  • [12] Q7TKZCJP Add initial support for reading the offsets from a (fixed) change
  • [13] YDQLW2ZO change: rework printing of patches - print Edit and Newvertex types
  • [14] TGT4VSME change: decode FILE_ADD
  • [15] VKLGQREY change: add base32 decode, initial deconstruction of hashed
  • [16] XTKRT6OQ format the codebase
  • [17] NZNIG2UL Fix lots of warnings, adjust build flags
  • [18] DDLQMNGX change: find change files from repo dir
  • [19] NEORNIZE change: decode and print file moves
  • [20] XRGUHSC4 change: fix some formatting issues
  • [21] 3FT3XTJM change: support -v/-h flags
  • [22] WFA5BBRF Fix warnings
  • [23] QEFCNNVC change: display offsets for given change file
  • [24] HKBM7HZG minor: formatting
  • [25] XTVLIC24 change: refactor to print_change()
  • [26] N3PUHKQN change: fix scoping issue
  • [27] JVU3TTT5 all: switch away from typedefing anonymous structs
  • [28] QYRJIOYP change: separate decoding and printing of hashed struct
  • [29] WMFNGOYT change: reduce printed noise, rework some code
  • [30] 33ZXTRXR change: decode and print file deletions
  • [31] OBKF6SII change: decompress the hashed section too
  • [32] UPYQ5FMN make fmt
  • [*] Y26WT3ZF change: decode message, description and timestamp
  • [*] ZPNA2D42 ani: add functions to locate .pijul
  • [*] JAGXXSR7 change: initial work on decoding hunks
  • [*] QA7HY2TH hunk + hash: replace abort() calls with die()
  • [*] YG4DZB3A add representation of hash, decode dependencies

Change contents

  • edit in zstdseek.c at line 36
    [3.1012][3.1012:1057]()
    printf("decompress result: %lu\n", result);
  • edit in scaffold.h at line 7
    [34.164]
    [35.0]
    void *xrealloc(void *ptr, size_t size);
  • edit in scaffold.c at line 50
    [34.459]
    [34.459]
    memset(ret, 0, size);
  • edit in scaffold.c at line 53
    [34.506]
    [34.506]
    return ret;
    }
    void *
    xrealloc(void *ptr, size_t size)
    {
    void *ret;
  • edit in scaffold.c at line 62
    [34.507]
    [34.507]
    ret = realloc(ptr, size);
    if (!ret)
    die("Unable to allocate memory");
  • edit in hunk.h at line 138
    [8.1276]
    [36.1332]
    void hunk_hunklistfree(struct hunklist *);
  • edit in hunk.c at line 51
    [8.1449]
    [37.44]
    }
    void
    hunk_hunklistfree(struct hunklist *hunklist)
    {
    size_t i;
    for (i = 0; i < hunklist->len; i++) {
    struct basehunk *b = &hunklist->entries[i];
    switch (b->hunktype) {
    case FILE_MOVE:
    /* TBD */
    break;
    case FILE_DEL:
    /* TBD */
    break;
    default:
    break;
    }
    }
  • edit in hash.h at line 11
    [38.195]
    [38.195]
    /* FIXME(laumann): this should be a union { uint8_t blake3[BLAKE3_BYTES] } */
  • edit in hash.h at line 22
    [8.1579]
    [8.40]
    /* Return 1 when hashes are equal, 0 otherwise */
    int hasheq(struct hash *, struct hash *);
  • edit in hash.c at line 8
    [37.69]
    [38.421]
    #include "blake3.h"
  • edit in hash.c at line 37
    [8.77]
    [8.77]
    int
    hasheq(struct hash *h1, struct hash *h2)
    {
    size_t i;
  • edit in hash.c at line 43
    [8.78]
    [8.78]
    if (h1->variant != h2->variant)
    return 0;
    switch (h1->variant) {
    case HASH_NONE:
    return 1;
    case HASH_BLAKE3:
    for (i = 0; i < BLAKE3_LEN; i++)
    if (h1->bytes[i] != h2->bytes[i])
    return 0;
    return 1;
    default:
    die("unknown hash variant: %u", h1->variant);
    }
    }
  • edit in change.h at line 77
    [8.2349]
    [8.664]
    void hashedfree(struct hashed *);
  • edit in change.h at line 87
    [8.2396]
    [8.1005]
    struct change {
    struct offsets offsets;
    struct hashed hashed;
    uint8_t *contents;
    };
    struct changeentry {
    size_t num;
    struct hash hash;
    struct change change;
    };
    /**
    * structure for keeping track of multiple changes
    */
    struct changestore {
    size_t len;
    size_t cap;
    struct changeentry *entries;
    };
    void changestoreinit(struct changestore *, size_t cap);
    void changestorefree(struct changestore *);
    /**
    * either fetch a change that's already loaded, or load a change into
    * the changestore.
    */
    struct change *changestoreget(struct changestore *, struct hash *);
  • edit in change.c at line 550
    [8.2024]
    [8.0]
    }
    /**
    * Given an atom from a given change (by hash), extract the contents to return.
    *
    * Returns a malloc()'ed buffer that must be free()'d, if there were
    * any contents to available. Returns NULL otherwise. The size of the
    * returned buffer is provided in the "n" parameter.
    */
    static uint8_t *
    changecontents(
    struct changestore *changes, struct atom *change, uint8_t *contents,
    size_t *n
    )
    {
    struct newvertex *v;
    struct edgemap *e;
    struct change *ch;
    uint8_t *buf = NULL;
    size_t sz = 0;
    switch (change->atomtype) {
    case NEW_VERTEX:
    v = &change->newvertex;
    sz = v->end - v->start;
    buf = xmalloc(sizeof(uint8_t) * sz);
    memcpy(buf, &contents[v->start], sz);
    break;
    case EDGE_MAP:
    e = &change->edgemap;
    if (e->edges.len == 0) {
    /* Technically an error */
    break;
    }
    if (!(e->edges.entries[0].flag & EDGE_FLAG_DELETED))
    break;
    for (size_t i = 0; i < e->edges.len; i++) {
    struct edge *edge = &e->edges.entries[i];
    // FIXME: need to keep track of current! see
    // /home/t/sources/pijul/pijul/libpijul/src/changestore/filesystem.rs (get_contents_ext)
    size_t z = edge->to.end - edge->to.start;
    /* TODO loop over the edges, extract relevant contents from the changestore */
    // keep track of current vertex
    ch = changestoreget(changes, &edge->to.change);
    /* printf("changestoreget("); */
    /* hashprint(&edge->to.change); */
    /* printf(")"); */
    /* printf(" - %s\n", ch ? "ok" : "fail :-("); */
    // We find a change with some contents, and we
    // need to extract contents[start; end] we
    // then need to either xmalloc() buf, or
    // extend it to include more bytes
    if (ch) {
    if (buf) {
    buf = xrealloc(
    buf, sz + sizeof(uint8_t) * z
    );
    } else {
    buf = xmalloc(sizeof(uint8_t) * z);
    }
    memcpy(&buf[sz], &ch->contents[edge->to.start],
    z);
    sz += z;
    }
    }
    break;
    }
    *n = sz;
    return buf;
  • edit in change.c at line 647
    [8.570]
    [8.570]
    }
    void
    printcontents(uint8_t *buf, size_t len, char prefix)
    {
    int waseol = 1;
    size_t i;
    uint8_t x;
    for (i = 0; i < len; i++) {
    if (waseol) {
    printf("%c ", prefix);
    waseol = 0;
    }
    x = buf[i];
    putchar(x);
    if (x == '\n')
    waseol = 1;
    }
    if (!waseol)
    printf("\n");
  • replacement in change.c at line 675
    [8.348][8.4060:4106]()
    print_atom(struct atom *a, uint8_t *contents)
    [8.348]
    [8.387]
    print_atom(struct changestore *changes, struct atom *a, uint8_t *contents)
  • replacement in change.c at line 679
    [8.4149][8.130:142](),[8.417][8.130:142](),[8.142][8.569:593](),[8.246][8.569:593]()
    uint8_t x;
    size_t i;
    int waseol;
    [8.4149]
    [8.446]
    size_t n;
    uint8_t *res;
  • replacement in change.c at line 685
    [8.510][8.510:581]()
    printf("up: ");
    print_positionlist(&v->upcontext);
    printf(", ");
    [8.510]
    [8.581]
    /* printf("up: "); */
    /* print_positionlist(&v->upcontext); */
    /* printf(", "); */
  • replacement in change.c at line 689
    [8.582][8.582:634]()
    printf("new: %lu:%lu, down: ", v->start, v->end);
    [8.582]
    [8.634]
    /* printf("new: %lu:%lu, down: ", v->start, v->end); */
  • replacement in change.c at line 691
    [8.635][8.635:690]()
    print_positionlist(&v->downcontext);
    printf("\n");
    [8.635]
    [8.690]
    /* print_positionlist(&v->downcontext); */
    /* printf("\n"); */
  • replacement in change.c at line 694
    [8.691][8.691:890]()
    waseol = 1;
    for (i = v->start; i < v->end; i++) {
    if (waseol) {
    printf("+ ");
    waseol = 0;
    }
    x = contents[i];
    putchar(x);
    if (x == '\n')
    waseol = 1;
    }
    printf("\n");
    [8.691]
    [8.890]
    printcontents(&contents[v->start], v->end - v->start, '+');
  • replacement in change.c at line 699
    [8.935][8.574:949]()
    printf("edgemap {\n edges: [\n");
    for (i = 0; i < m->edges.len; i++) {
    struct edge *e = &m->edges.entries[i];
    printf(" edge { previous = %u, flag = ",
    e->previous);
    print_edgeflags(e->flag);
    printf(", from = Position { .. }, to = vertex { .. }, introducedby = "
    );
    hashprint(&e->introducedby);
    printf(" }\n");
    }
    printf(" ]\n}\n");
    [8.935]
    [8.962]
    /* edgemap debugging */
    /* printf("edgemap {\n edges: [\n"); */
    /* for (i = 0; i < m->edges.len; i++) { */
    /* struct edge *e = &m->edges.entries[i]; */
    /* printf(" edge { previous = %u, flag = ", */
    /* e->previous); */
    /* print_edgeflags(e->flag); */
    /* printf(", from = position { change = "); */
    /* hashprint(&e->from.change); */
    /* printf(", pos = %lu }", e->from.pos); */
    /* printf(", to = vertex { change = "); */
    /* hashprint(&e->to.change); */
    /* printf(", start = %lu, end = %lu }, introducedby = ", */
    /* e->to.start, e->to.end); */
    /* hashprint(&e->introducedby); */
    /* printf(" }\n"); */
    /* } */
    /* printf(" ]\n}\n"); */
    res = changecontents(changes, a, contents, &n);
    if (res)
    printcontents(res, n, '-');
  • replacement in change.c at line 740
    [8.875][8.875:976]()
    if (bincode_getu8(&bc)) {
    len = bincode_getu64(&bc);
    m->encoding = bincode_getstr(&bc, len);
    }
    [8.875]
    [8.976]
    change_decode_encoding(&bc, &m->encoding);
  • replacement in change.c at line 744
    [8.258][8.4150:4205]()
    print_change(struct hashed *hashed, uint8_t *contents)
    [8.258]
    [8.306]
    print_change(
    struct changestore *changes, struct hashed *hashed, uint8_t *contents
    )
  • replacement in change.c at line 775
    [8.1092][8.619:645](),[8.645][8.1117:1205](),[8.1117][8.1117:1205]()
    printf("\n# Hunks\n\n");
    /* FIXME: Put all the hashes of dependencies in a big list, and number them from 2 */
    [8.1092]
    [8.1205]
    printf("\n# Hunks\n");
  • replacement in change.c at line 779
    [8.4259][8.215:242](),[8.1295][8.215:242]()
    printf("%lu. %s", i + 1,
    [8.4259]
    [8.242]
    printf("\n%lu. %s", i + 1,
  • replacement in change.c at line 789
    [8.1554][8.1554:1582]()
    print_atom(c, contents);
    [8.1554]
    [8.1582]
    print_atom(changes, c, contents);
  • replacement in change.c at line 801
    [8.1779][8.102:133]()
    print_atom(ratm, contents);
    [8.1779]
    [8.1779]
    print_atom(changes, c, contents);
    print_atom(changes, replacement, contents);
  • replacement in change.c at line 829
    [6.551][8.1140:1179](),[8.1140][8.1140:1179]()
    print_atom(&f->contents, contents);
    [6.551]
    [6.552]
    print_atom(changes, &f->contents, contents);
  • replacement in change.c at line 903
    [7.1206][7.1206:1241]()
    print_atom(&r->name, contents);
    [7.1206]
    [7.1241]
    print_atom(changes, &r->name, contents);
  • replacement in change.c at line 905
    [7.1263][7.1263:1299]()
    print_atom(&r->inode, contents);
    [7.1263]
    [7.1299]
    print_atom(changes, &r->inode, contents);
  • edit in change.c at line 912
    [8.1853]
    [8.992]
    }
    }
    struct change *
    changestoreget(struct changestore *store, struct hash *hash)
    {
    size_t i;
    for (i = 0; i < store->len; i++) {
    if (hasheq(hash, &store->entries[i].hash))
    return &store->entries[i].change;
    }
    return NULL;
    }
    void
    changestoreinit(struct changestore *s, size_t cap)
    {
    s->entries = xmalloc(sizeof(struct changeentry) * cap);
    s->cap = cap;
    s->len = 0;
    }
    void
    changestorefree(struct changestore *s)
    {
    size_t i;
    struct changeentry *e;
    struct change *c;
    /* Free all the entries first */
    for (i = 0; i < s->len; i++) {
    e = &s->entries[i];
    c = &e->change;
    hashedfree(&c->hashed);
    if (c->contents)
    free(c->contents);
  • edit in change.c at line 950
    [8.995]
    [8.995]
    }
    void
    hashedfree(struct hashed *h)
    {
    hashlist_free(&h->dependencies);
    hashlist_free(&h->extraknown);
    if (h->metadata)
    free(h->metadata);
  • replacement in change.c at line 962
    [8.1002][8.4:82](),[8.2599][8.4:82](),[8.4][8.4:82]()
    * Takes an input hash, and tries to find a change file to open and read out.
    [8.1002]
    [8.82]
    * Format a path to a change file, given the input repo dir and a
    * hash. The result is placed in dst.
  • replacement in change.c at line 965
    [8.86][8.2199:2203](),[8.2199][8.2199:2203](),[8.2203][4.119:178]()
    int
    change(const char *hash, int verbose, const char *repodir)
    [8.86]
    [8.2212]
    static void
    formatchangepath(char *dst, const char *repodir, const char *hashstr)
  • edit in change.c at line 968
    [8.2214][8.2214:2233](),[8.2233][8.4527:4548](),[8.3285][8.2600:2620](),[8.4548][8.2600:2620](),[8.2254][8.2600:2620](),[8.2620][8.1401:1429](),[8.1429][8.103:117](),[8.117][8.0:32](),[8.32][8.4549:4572](),[8.231][8.2254:2255](),[8.918][8.2254:2255](),[8.2126][8.2254:2255](),[8.3302][8.2254:2255](),[8.4572][8.2254:2255](),[8.2254][8.2254:2255](),[8.2255][8.148:188]()
    int fd;
    int err;
    struct offsets off;
    uint8_t *contents;
    uint8_t contents_hash[32];
    char *h, *p;
    char chfile[PATH_MAX] = { 0 };
    struct hashed hashed;
    /* FIXME - make this a lot nicer... */
  • replacement in change.c at line 969
    [8.282][8.282:323]()
    p = stpncpy(chfile, repodir, PATH_MAX);
    [8.282]
    [8.323]
    char *p;
    p = stpncpy(dst, repodir, PATH_MAX);
  • replacement in change.c at line 977
    [8.412][8.232:251](),[8.2255][8.232:251](),[8.251][8.413:441]()
    h = (char *)hash;
    *p++ = *h++;
    *p++ = *h++;
    [8.412]
    [8.441]
    *p++ = hashstr[0];
    *p++ = hashstr[1];
  • replacement in change.c at line 981
    [8.292][8.455:479]()
    p = stpncpy(p, h, 51);
    [8.292]
    [8.479]
    p = stpncpy(p, &hashstr[2], 51);
  • edit in change.c at line 983
    [8.506]
    [8.339]
    }
  • replacement in change.c at line 985
    [8.340][8.340:374]()
    printf("Opening: %s\n", chfile);
    [8.340]
    [2.0]
    static int
    loadchange(
    struct change *c, struct hash *hash, const char *repodir,
    const char *hashstr, int verbose
    )
    {
    int fd, err;
    char chfile[PATH_MAX] = { 0 };
    struct offsets *off;
    struct hashed *hashed;
    uint8_t contents_hash[32];
    off = &c->offsets;
    hashed = &c->hashed;
    err = 0;
    printf("loading change %s\n", hashstr);
    formatchangepath(chfile, repodir, hashstr);
  • replacement in change.c at line 1010
    [8.2474][8.3303:3344]()
    base32_hashdecode(contents_hash, hash);
    [8.2474]
    [8.2474]
    // FIXME: hash decoding should result in a "struct hash" not
    // just raw bytes
    base32_hashdecode(contents_hash, hashstr);
    memcpy(hash->bytes, contents_hash, BLAKE3_BYTES);
    hash->variant = HASH_BLAKE3;
  • replacement in change.c at line 1016
    [8.2475][8.103:143]()
    err = change_decode_offsets(fd, &off);
    [8.2475]
    [8.2508]
    err = change_decode_offsets(fd, off);
  • replacement in change.c at line 1022
    [8.1874][5.266:335]()
    if (off.version != VERSION) {
    if (off.version == VERSION_NOENC) {
    [8.1874]
    [5.335]
    if (off->version != VERSION) {
    if (off->version == VERSION_NOENC) {
  • replacement in change.c at line 1025
    [5.421][5.421:445]()
    off.version);
    [5.421]
    [5.445]
    off->version);
  • replacement in change.c at line 1028
    [5.512][5.512:536]()
    off.version);
    [5.512]
    [5.536]
    off->version);
  • replacement in change.c at line 1034
    [8.2156][8.2156:2211](),[8.2211][8.919:944]()
    fd, off.unhashed_off - OFFSETS_SIZE, off.hashed_len,
    contents_hash, &hashed
    [8.2156]
    [8.2227]
    fd, off->unhashed_off - OFFSETS_SIZE, off->hashed_len,
    contents_hash, hashed
  • replacement in change.c at line 1046
    [8.1116][8.263:322]()
    if (lseek(fd, (off_t)off.contents_off, SEEK_SET) == -1) {
    [8.1116]
    [8.1168]
    if (lseek(fd, (off_t)off->contents_off, SEEK_SET) == -1) {
  • replacement in change.c at line 1050
    [8.1202][8.1202:1246]()
    contents = change_read_contents(fd, &off);
    [8.1202]
    [8.1246]
    c->contents = change_read_contents(fd, off);
  • replacement in change.c at line 1052
    [8.1260][8.1260:1312]()
    dump_buf("contents", contents, off.contents_len);
    [8.1260]
    [8.1312]
    dump_buf("contents", c->contents, off->contents_len);
    out:
    return err;
    }
  • replacement in change.c at line 1057
    [8.1313][8.1856:1890]()
    print_change(&hashed, contents);
    [8.1313]
    [8.1674]
    /**
    * Same as loadchange() but takes a "struct hash" as input instead of a string
    */
    static int
    loadchangeh(
    struct change *c, struct hash *h, const char *repodir,
    struct hash *hash, int verbose
    )
    {
    char hashstr[54];
  • replacement in change.c at line 1068
    [8.184][8.239:256](),[8.904][8.239:256](),[8.239][8.239:256](),[8.2966][8.2966:2983]()
    free(contents);
    out:
    close(fd);
    [8.1675]
    [8.2983]
    /* FIXME consider that it might not only be blake3 bytes */
    base32_hashencode(hashstr, hash->bytes);
    return loadchange(c, h, repodir, hashstr, verbose);
    }
    /**
    * Takes an input hash, and tries to find a change file to open and read out.
    */
    int
    change(const char *hash, int verbose, const char *repodir)
    {
    int err;
    struct changestore changestore = { 0 };
    struct changeentry *ch;
    size_t i, x;
    changestoreinit(&changestore, 32);
    ch = &changestore.entries[0];
    err = loadchange(&ch->change, &ch->hash, repodir, hash, verbose);
    if (err)
    return err;
    x = 1;
    changestore.len = 1;
    changestore.entries[0].num = 1;
    for (i = 0; i < ch->change.hashed.dependencies.len; i++) {
    err = loadchangeh(
    &changestore.entries[x].change,
    &changestore.entries[x].hash, repodir,
    &ch->change.hashed.dependencies.entries[i], verbose
    );
    if (err)
    goto changeout;
    changestore.len++;
    changestore.entries[x].num = ++x;
    }
    for (i = 0; i < ch->change.hashed.extraknown.len; i++) {
    err = loadchangeh(
    &changestore.entries[x].change,
    &changestore.entries[x].hash, repodir,
    &ch->change.hashed.extraknown.entries[i], verbose
    );
    if (err)
    goto changeout;
    changestore.len++;
    changestore.entries[x].num = ++x;
    }
    printf("x = %lu\n", x);
    print_change(&changestore, &ch->change.hashed, ch->change.contents);
    changeout:
    changestorefree(&changestore);