notes.con commit Add selftests verifying that we can parse notes trees with various fanouts (0057c09)
   1#include "cache.h"
   2#include "commit.h"
   3#include "notes.h"
   4#include "refs.h"
   5#include "utf8.h"
   6#include "strbuf.h"
   7#include "tree-walk.h"
   8
   9/*
  10 * Use a non-balancing simple 16-tree structure with struct int_node as
  11 * internal nodes, and struct leaf_node as leaf nodes. Each int_node has a
  12 * 16-array of pointers to its children.
  13 * The bottom 2 bits of each pointer is used to identify the pointer type
  14 * - ptr & 3 == 0 - NULL pointer, assert(ptr == NULL)
  15 * - ptr & 3 == 1 - pointer to next internal node - cast to struct int_node *
  16 * - ptr & 3 == 2 - pointer to note entry - cast to struct leaf_node *
  17 * - ptr & 3 == 3 - pointer to subtree entry - cast to struct leaf_node *
  18 *
  19 * The root node is a statically allocated struct int_node.
  20 */
  21struct int_node {
  22        void *a[16];
  23};
  24
  25/*
  26 * Leaf nodes come in two variants, note entries and subtree entries,
  27 * distinguished by the LSb of the leaf node pointer (see above).
  28 * As a note entry, the key is the SHA1 of the referenced commit, and the
  29 * value is the SHA1 of the note object.
  30 * As a subtree entry, the key is the prefix SHA1 (w/trailing NULs) of the
  31 * referenced commit, using the last byte of the key to store the length of
  32 * the prefix. The value is the SHA1 of the tree object containing the notes
  33 * subtree.
  34 */
  35struct leaf_node {
  36        unsigned char key_sha1[20];
  37        unsigned char val_sha1[20];
  38};
  39
  40#define PTR_TYPE_NULL     0
  41#define PTR_TYPE_INTERNAL 1
  42#define PTR_TYPE_NOTE     2
  43#define PTR_TYPE_SUBTREE  3
  44
  45#define GET_PTR_TYPE(ptr)       ((uintptr_t) (ptr) & 3)
  46#define CLR_PTR_TYPE(ptr)       ((void *) ((uintptr_t) (ptr) & ~3))
  47#define SET_PTR_TYPE(ptr, type) ((void *) ((uintptr_t) (ptr) | (type)))
  48
  49#define GET_NIBBLE(n, sha1) (((sha1[n >> 1]) >> ((~n & 0x01) << 2)) & 0x0f)
  50
  51#define SUBTREE_SHA1_PREFIXCMP(key_sha1, subtree_sha1) \
  52        (memcmp(key_sha1, subtree_sha1, subtree_sha1[19]))
  53
  54static struct int_node root_node;
  55
  56static int initialized;
  57
  58static void load_subtree(struct leaf_node *subtree, struct int_node *node,
  59                unsigned int n);
  60
  61/*
  62 * To find a leaf_node:
  63 * 1. Start at the root node, with n = 0
  64 * 2. Use the nth nibble of the key as an index into a:
  65 *    - If a[n] is an int_node, recurse into that node and increment n
  66 *    - If a leaf_node with matching key, return leaf_node (assert note entry)
  67 *    - If a matching subtree entry, unpack that subtree entry (and remove it);
  68 *      restart search at the current level.
  69 *    - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node.
  70 *      Backtrack out of the recursion, one level at a time and check a[0]:
  71 *      - If a[0] at the current level is a matching subtree entry, unpack that
  72 *        subtree entry (and remove it); restart search at the current level.
  73 */
  74static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n,
  75                const unsigned char *key_sha1)
  76{
  77        struct leaf_node *l;
  78        unsigned char i = GET_NIBBLE(n, key_sha1);
  79        void *p = tree->a[i];
  80
  81        switch(GET_PTR_TYPE(p)) {
  82        case PTR_TYPE_INTERNAL:
  83                l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1);
  84                if (l)
  85                        return l;
  86                break;
  87        case PTR_TYPE_NOTE:
  88                l = (struct leaf_node *) CLR_PTR_TYPE(p);
  89                if (!hashcmp(key_sha1, l->key_sha1))
  90                        return l; /* return note object matching given key */
  91                break;
  92        case PTR_TYPE_SUBTREE:
  93                l = (struct leaf_node *) CLR_PTR_TYPE(p);
  94                if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) {
  95                        /* unpack tree and resume search */
  96                        tree->a[i] = NULL;
  97                        load_subtree(l, tree, n);
  98                        free(l);
  99                        return note_tree_find(tree, n, key_sha1);
 100                }
 101                break;
 102        case PTR_TYPE_NULL:
 103        default:
 104                assert(!p);
 105                break;
 106        }
 107
 108        /*
 109         * Did not find key at this (or any lower) level.
 110         * Check if there's a matching subtree entry in tree->a[0].
 111         * If so, unpack tree and resume search.
 112         */
 113        p = tree->a[0];
 114        if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE)
 115                return NULL;
 116        l = (struct leaf_node *) CLR_PTR_TYPE(p);
 117        if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) {
 118                /* unpack tree and resume search */
 119                tree->a[0] = NULL;
 120                load_subtree(l, tree, n);
 121                free(l);
 122                return note_tree_find(tree, n, key_sha1);
 123        }
 124        return NULL;
 125}
 126
 127/*
 128 * To insert a leaf_node:
 129 * 1. Start at the root node, with n = 0
 130 * 2. Use the nth nibble of the key as an index into a:
 131 *    - If a[n] is NULL, store the tweaked pointer directly into a[n]
 132 *    - If a[n] is an int_node, recurse into that node and increment n
 133 *    - If a[n] is a leaf_node:
 134 *      1. Check if they're equal, and handle that (abort? overwrite?)
 135 *      2. Create a new int_node, and store both leaf_nodes there
 136 *      3. Store the new int_node into a[n].
 137 */
 138static int note_tree_insert(struct int_node *tree, unsigned char n,
 139                const struct leaf_node *entry, unsigned char type)
 140{
 141        struct int_node *new_node;
 142        const struct leaf_node *l;
 143        int ret;
 144        unsigned char i = GET_NIBBLE(n, entry->key_sha1);
 145        void *p = tree->a[i];
 146        assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL);
 147        switch(GET_PTR_TYPE(p)) {
 148        case PTR_TYPE_NULL:
 149                assert(!p);
 150                tree->a[i] = SET_PTR_TYPE(entry, type);
 151                return 0;
 152        case PTR_TYPE_INTERNAL:
 153                return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type);
 154        default:
 155                assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE ||
 156                        GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE);
 157                l = (const struct leaf_node *) CLR_PTR_TYPE(p);
 158                if (!hashcmp(entry->key_sha1, l->key_sha1))
 159                        return -1; /* abort insert on matching key */
 160                new_node = (struct int_node *)
 161                        xcalloc(sizeof(struct int_node), 1);
 162                ret = note_tree_insert(new_node, n + 1,
 163                        CLR_PTR_TYPE(p), GET_PTR_TYPE(p));
 164                if (ret) {
 165                        free(new_node);
 166                        return -1;
 167                }
 168                tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL);
 169                return note_tree_insert(new_node, n + 1, entry, type);
 170        }
 171}
 172
 173/* Free the entire notes data contained in the given tree */
 174static void note_tree_free(struct int_node *tree)
 175{
 176        unsigned int i;
 177        for (i = 0; i < 16; i++) {
 178                void *p = tree->a[i];
 179                switch(GET_PTR_TYPE(p)) {
 180                case PTR_TYPE_INTERNAL:
 181                        note_tree_free(CLR_PTR_TYPE(p));
 182                        /* fall through */
 183                case PTR_TYPE_NOTE:
 184                case PTR_TYPE_SUBTREE:
 185                        free(CLR_PTR_TYPE(p));
 186                }
 187        }
 188}
 189
 190/*
 191 * Convert a partial SHA1 hex string to the corresponding partial SHA1 value.
 192 * - hex      - Partial SHA1 segment in ASCII hex format
 193 * - hex_len  - Length of above segment. Must be multiple of 2 between 0 and 40
 194 * - sha1     - Partial SHA1 value is written here
 195 * - sha1_len - Max #bytes to store in sha1, Must be >= hex_len / 2, and < 20
 196 * Returns -1 on error (invalid arguments or invalid SHA1 (not in hex format).
 197 * Otherwise, returns number of bytes written to sha1 (i.e. hex_len / 2).
 198 * Pads sha1 with NULs up to sha1_len (not included in returned length).
 199 */
 200static int get_sha1_hex_segment(const char *hex, unsigned int hex_len,
 201                unsigned char *sha1, unsigned int sha1_len)
 202{
 203        unsigned int i, len = hex_len >> 1;
 204        if (hex_len % 2 != 0 || len > sha1_len)
 205                return -1;
 206        for (i = 0; i < len; i++) {
 207                unsigned int val = (hexval(hex[0]) << 4) | hexval(hex[1]);
 208                if (val & ~0xff)
 209                        return -1;
 210                *sha1++ = val;
 211                hex += 2;
 212        }
 213        for (; i < sha1_len; i++)
 214                *sha1++ = 0;
 215        return len;
 216}
 217
 218static void load_subtree(struct leaf_node *subtree, struct int_node *node,
 219                unsigned int n)
 220{
 221        unsigned char commit_sha1[20];
 222        unsigned int prefix_len;
 223        int status;
 224        void *buf;
 225        struct tree_desc desc;
 226        struct name_entry entry;
 227
 228        buf = fill_tree_descriptor(&desc, subtree->val_sha1);
 229        if (!buf)
 230                die("Could not read %s for notes-index",
 231                     sha1_to_hex(subtree->val_sha1));
 232
 233        prefix_len = subtree->key_sha1[19];
 234        assert(prefix_len * 2 >= n);
 235        memcpy(commit_sha1, subtree->key_sha1, prefix_len);
 236        while (tree_entry(&desc, &entry)) {
 237                int len = get_sha1_hex_segment(entry.path, strlen(entry.path),
 238                                commit_sha1 + prefix_len, 20 - prefix_len);
 239                if (len < 0)
 240                        continue; /* entry.path is not a SHA1 sum. Skip */
 241                len += prefix_len;
 242
 243                /*
 244                 * If commit SHA1 is complete (len == 20), assume note object
 245                 * If commit SHA1 is incomplete (len < 20), assume note subtree
 246                 */
 247                if (len <= 20) {
 248                        unsigned char type = PTR_TYPE_NOTE;
 249                        struct leaf_node *l = (struct leaf_node *)
 250                                xcalloc(sizeof(struct leaf_node), 1);
 251                        hashcpy(l->key_sha1, commit_sha1);
 252                        hashcpy(l->val_sha1, entry.sha1);
 253                        if (len < 20) {
 254                                l->key_sha1[19] = (unsigned char) len;
 255                                type = PTR_TYPE_SUBTREE;
 256                        }
 257                        status = note_tree_insert(node, n, l, type);
 258                        assert(!status);
 259                }
 260        }
 261        free(buf);
 262}
 263
 264static void initialize_notes(const char *notes_ref_name)
 265{
 266        unsigned char sha1[20], commit_sha1[20];
 267        unsigned mode;
 268        struct leaf_node root_tree;
 269
 270        if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
 271            get_tree_entry(commit_sha1, "", sha1, &mode))
 272                return;
 273
 274        hashclr(root_tree.key_sha1);
 275        hashcpy(root_tree.val_sha1, sha1);
 276        load_subtree(&root_tree, &root_node, 0);
 277}
 278
 279static unsigned char *lookup_notes(const unsigned char *commit_sha1)
 280{
 281        struct leaf_node *found = note_tree_find(&root_node, 0, commit_sha1);
 282        if (found)
 283                return found->val_sha1;
 284        return NULL;
 285}
 286
 287void free_notes(void)
 288{
 289        note_tree_free(&root_node);
 290        memset(&root_node, 0, sizeof(struct int_node));
 291        initialized = 0;
 292}
 293
 294void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 295                const char *output_encoding, int flags)
 296{
 297        static const char utf8[] = "utf-8";
 298        unsigned char *sha1;
 299        char *msg, *msg_p;
 300        unsigned long linelen, msglen;
 301        enum object_type type;
 302
 303        if (!initialized) {
 304                const char *env = getenv(GIT_NOTES_REF_ENVIRONMENT);
 305                if (env)
 306                        notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
 307                else if (!notes_ref_name)
 308                        notes_ref_name = GIT_NOTES_DEFAULT_REF;
 309                initialize_notes(notes_ref_name);
 310                initialized = 1;
 311        }
 312
 313        sha1 = lookup_notes(commit->object.sha1);
 314        if (!sha1)
 315                return;
 316
 317        if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||
 318                        type != OBJ_BLOB) {
 319                free(msg);
 320                return;
 321        }
 322
 323        if (output_encoding && *output_encoding &&
 324                        strcmp(utf8, output_encoding)) {
 325                char *reencoded = reencode_string(msg, output_encoding, utf8);
 326                if (reencoded) {
 327                        free(msg);
 328                        msg = reencoded;
 329                        msglen = strlen(msg);
 330                }
 331        }
 332
 333        /* we will end the annotation by a newline anyway */
 334        if (msglen && msg[msglen - 1] == '\n')
 335                msglen--;
 336
 337        if (flags & NOTES_SHOW_HEADER)
 338                strbuf_addstr(sb, "\nNotes:\n");
 339
 340        for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) {
 341                linelen = strchrnul(msg_p, '\n') - msg_p;
 342
 343                if (flags & NOTES_INDENT)
 344                        strbuf_addstr(sb, "    ");
 345                strbuf_add(sb, msg_p, linelen);
 346                strbuf_addch(sb, '\n');
 347        }
 348
 349        free(msg);
 350}