77680a31e8fed797715bc7c6727082574951ad0c
   1/*
   2 * Parse and rearrange a svnadmin dump.
   3 * Create the dump with:
   4 * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
   5 *
   6 * Licensed under a two-clause BSD-style license.
   7 * See LICENSE for details.
   8 */
   9
  10#include "cache.h"
  11#include "repo_tree.h"
  12#include "fast_export.h"
  13#include "line_buffer.h"
  14#include "obj_pool.h"
  15#include "string_pool.h"
  16
  17/*
  18 * Compare start of string to literal of equal length;
  19 * must be guarded by length test.
  20 */
  21#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
  22
  23#define NODEACT_REPLACE 4
  24#define NODEACT_DELETE 3
  25#define NODEACT_ADD 2
  26#define NODEACT_CHANGE 1
  27#define NODEACT_UNKNOWN 0
  28
  29#define DUMP_CTX 0
  30#define REV_CTX  1
  31#define NODE_CTX 2
  32
  33#define LENGTH_UNKNOWN (~0)
  34#define DATE_RFC2822_LEN 31
  35
  36/* Create memory pool for log messages */
  37obj_pool_gen(log, char, 4096)
  38
  39static struct line_buffer input = LINE_BUFFER_INIT;
  40
  41static char *log_copy(uint32_t length, const char *log)
  42{
  43        char *buffer;
  44        log_free(log_pool.size);
  45        buffer = log_pointer(log_alloc(length));
  46        strncpy(buffer, log, length);
  47        return buffer;
  48}
  49
  50static struct {
  51        uint32_t action, propLength, textLength, srcRev, type;
  52        uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
  53        uint32_t text_delta, prop_delta;
  54} node_ctx;
  55
  56static struct {
  57        uint32_t revision, author;
  58        unsigned long timestamp;
  59        char *log;
  60} rev_ctx;
  61
  62static struct {
  63        uint32_t version, uuid, url;
  64} dump_ctx;
  65
  66static void reset_node_ctx(char *fname)
  67{
  68        node_ctx.type = 0;
  69        node_ctx.action = NODEACT_UNKNOWN;
  70        node_ctx.propLength = LENGTH_UNKNOWN;
  71        node_ctx.textLength = LENGTH_UNKNOWN;
  72        node_ctx.src[0] = ~0;
  73        node_ctx.srcRev = 0;
  74        pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
  75        node_ctx.text_delta = 0;
  76        node_ctx.prop_delta = 0;
  77}
  78
  79static void reset_rev_ctx(uint32_t revision)
  80{
  81        rev_ctx.revision = revision;
  82        rev_ctx.timestamp = 0;
  83        rev_ctx.log = NULL;
  84        rev_ctx.author = ~0;
  85}
  86
  87static void reset_dump_ctx(uint32_t url)
  88{
  89        dump_ctx.url = url;
  90        dump_ctx.version = 1;
  91        dump_ctx.uuid = ~0;
  92}
  93
  94static void handle_property(const struct strbuf *key_buf,
  95                                const char *val, uint32_t len,
  96                                uint32_t *type_set)
  97{
  98        const char *key = key_buf->buf;
  99        size_t keylen = key_buf->len;
 100
 101        switch (keylen + 1) {
 102        case sizeof("svn:log"):
 103                if (constcmp(key, "svn:log"))
 104                        break;
 105                if (!val)
 106                        die("invalid dump: unsets svn:log");
 107                /* Value length excludes terminating nul. */
 108                rev_ctx.log = log_copy(len + 1, val);
 109                break;
 110        case sizeof("svn:author"):
 111                if (constcmp(key, "svn:author"))
 112                        break;
 113                rev_ctx.author = pool_intern(val);
 114                break;
 115        case sizeof("svn:date"):
 116                if (constcmp(key, "svn:date"))
 117                        break;
 118                if (!val)
 119                        die("invalid dump: unsets svn:date");
 120                if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
 121                        warning("invalid timestamp: %s", val);
 122                break;
 123        case sizeof("svn:executable"):
 124        case sizeof("svn:special"):
 125                if (keylen == strlen("svn:executable") &&
 126                    constcmp(key, "svn:executable"))
 127                        break;
 128                if (keylen == strlen("svn:special") &&
 129                    constcmp(key, "svn:special"))
 130                        break;
 131                if (*type_set) {
 132                        if (!val)
 133                                return;
 134                        die("invalid dump: sets type twice");
 135                }
 136                if (!val) {
 137                        node_ctx.type = REPO_MODE_BLB;
 138                        return;
 139                }
 140                *type_set = 1;
 141                node_ctx.type = keylen == strlen("svn:executable") ?
 142                                REPO_MODE_EXE :
 143                                REPO_MODE_LNK;
 144        }
 145}
 146
 147static void die_short_read(void)
 148{
 149        if (buffer_ferror(&input))
 150                die_errno("error reading dump file");
 151        die("invalid dump: unexpected end of file");
 152}
 153
 154static void read_props(void)
 155{
 156        static struct strbuf key = STRBUF_INIT;
 157        const char *t;
 158        /*
 159         * NEEDSWORK: to support simple mode changes like
 160         *      K 11
 161         *      svn:special
 162         *      V 1
 163         *      *
 164         *      D 14
 165         *      svn:executable
 166         * we keep track of whether a mode has been set and reset to
 167         * plain file only if not.  We should be keeping track of the
 168         * symlink and executable bits separately instead.
 169         */
 170        uint32_t type_set = 0;
 171        while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
 172                uint32_t len;
 173                const char *val;
 174                const char type = t[0];
 175                int ch;
 176
 177                if (!type || t[1] != ' ')
 178                        die("invalid property line: %s\n", t);
 179                len = atoi(&t[2]);
 180                val = buffer_read_string(&input, len);
 181                if (!val || strlen(val) != len)
 182                        die_short_read();
 183
 184                /* Discard trailing newline. */
 185                ch = buffer_read_char(&input);
 186                if (ch == EOF)
 187                        die_short_read();
 188                if (ch != '\n')
 189                        die("invalid dump: expected newline after %s", val);
 190
 191                switch (type) {
 192                case 'K':
 193                case 'D':
 194                        strbuf_reset(&key);
 195                        if (val)
 196                                strbuf_add(&key, val, len);
 197                        if (type == 'K')
 198                                continue;
 199                        assert(type == 'D');
 200                        val = NULL;
 201                        len = 0;
 202                        /* fall through */
 203                case 'V':
 204                        handle_property(&key, val, len, &type_set);
 205                        strbuf_reset(&key);
 206                        continue;
 207                default:
 208                        die("invalid property line: %s\n", t);
 209                }
 210        }
 211}
 212
 213static void handle_node(void)
 214{
 215        uint32_t mark = 0;
 216        const uint32_t type = node_ctx.type;
 217        const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
 218        const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
 219
 220        if (node_ctx.text_delta)
 221                die("text deltas not supported");
 222        if (have_text)
 223                mark = next_blob_mark();
 224        if (node_ctx.action == NODEACT_DELETE) {
 225                if (have_text || have_props || node_ctx.srcRev)
 226                        die("invalid dump: deletion node has "
 227                                "copyfrom info, text, or properties");
 228                return repo_delete(node_ctx.dst);
 229        }
 230        if (node_ctx.action == NODEACT_REPLACE) {
 231                repo_delete(node_ctx.dst);
 232                node_ctx.action = NODEACT_ADD;
 233        }
 234        if (node_ctx.srcRev) {
 235                repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
 236                if (node_ctx.action == NODEACT_ADD)
 237                        node_ctx.action = NODEACT_CHANGE;
 238        }
 239        if (have_text && type == REPO_MODE_DIR)
 240                die("invalid dump: directories cannot have text attached");
 241
 242        /*
 243         * Decide on the new content (mark) and mode (node_ctx.type).
 244         */
 245        if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) {
 246                if (type != REPO_MODE_DIR)
 247                        die("invalid dump: root of tree is not a regular file");
 248        } else if (node_ctx.action == NODEACT_CHANGE) {
 249                uint32_t mode;
 250                if (!have_text)
 251                        mark = repo_read_path(node_ctx.dst);
 252                mode = repo_read_mode(node_ctx.dst);
 253                if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 254                        die("invalid dump: cannot modify a directory into a file");
 255                if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
 256                        die("invalid dump: cannot modify a file into a directory");
 257                node_ctx.type = mode;
 258        } else if (node_ctx.action == NODEACT_ADD) {
 259                if (!have_text && type != REPO_MODE_DIR)
 260                        die("invalid dump: adds node without text");
 261        } else {
 262                die("invalid dump: Node-path block lacks Node-action");
 263        }
 264
 265        /*
 266         * Adjust mode to reflect properties.
 267         */
 268        if (have_props) {
 269                if (!node_ctx.prop_delta)
 270                        node_ctx.type = type;
 271                if (node_ctx.propLength)
 272                        read_props();
 273        }
 274
 275        /*
 276         * Save the result.
 277         */
 278        repo_add(node_ctx.dst, node_ctx.type, mark);
 279        if (have_text)
 280                fast_export_blob(node_ctx.type, mark,
 281                                 node_ctx.textLength, &input);
 282}
 283
 284static void handle_revision(void)
 285{
 286        if (rev_ctx.revision)
 287                repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
 288                        dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
 289}
 290
 291void svndump_read(const char *url)
 292{
 293        char *val;
 294        char *t;
 295        uint32_t active_ctx = DUMP_CTX;
 296        uint32_t len;
 297
 298        reset_dump_ctx(pool_intern(url));
 299        while ((t = buffer_read_line(&input))) {
 300                val = strstr(t, ": ");
 301                if (!val)
 302                        continue;
 303                val += 2;
 304
 305                /* strlen(key) + 1 */
 306                switch (val - t - 1) {
 307                case sizeof("SVN-fs-dump-format-version"):
 308                        if (constcmp(t, "SVN-fs-dump-format-version"))
 309                                continue;
 310                        dump_ctx.version = atoi(val);
 311                        if (dump_ctx.version > 3)
 312                                die("expected svn dump format version <= 3, found %"PRIu32,
 313                                    dump_ctx.version);
 314                        break;
 315                case sizeof("UUID"):
 316                        if (constcmp(t, "UUID"))
 317                                continue;
 318                        dump_ctx.uuid = pool_intern(val);
 319                        break;
 320                case sizeof("Revision-number"):
 321                        if (constcmp(t, "Revision-number"))
 322                                continue;
 323                        if (active_ctx == NODE_CTX)
 324                                handle_node();
 325                        if (active_ctx != DUMP_CTX)
 326                                handle_revision();
 327                        active_ctx = REV_CTX;
 328                        reset_rev_ctx(atoi(val));
 329                        break;
 330                case sizeof("Node-path"):
 331                        if (prefixcmp(t, "Node-"))
 332                                continue;
 333                        if (!constcmp(t + strlen("Node-"), "path")) {
 334                                if (active_ctx == NODE_CTX)
 335                                        handle_node();
 336                                active_ctx = NODE_CTX;
 337                                reset_node_ctx(val);
 338                                break;
 339                        }
 340                        if (constcmp(t + strlen("Node-"), "kind"))
 341                                continue;
 342                        if (!strcmp(val, "dir"))
 343                                node_ctx.type = REPO_MODE_DIR;
 344                        else if (!strcmp(val, "file"))
 345                                node_ctx.type = REPO_MODE_BLB;
 346                        else
 347                                fprintf(stderr, "Unknown node-kind: %s\n", val);
 348                        break;
 349                case sizeof("Node-action"):
 350                        if (constcmp(t, "Node-action"))
 351                                continue;
 352                        if (!strcmp(val, "delete")) {
 353                                node_ctx.action = NODEACT_DELETE;
 354                        } else if (!strcmp(val, "add")) {
 355                                node_ctx.action = NODEACT_ADD;
 356                        } else if (!strcmp(val, "change")) {
 357                                node_ctx.action = NODEACT_CHANGE;
 358                        } else if (!strcmp(val, "replace")) {
 359                                node_ctx.action = NODEACT_REPLACE;
 360                        } else {
 361                                fprintf(stderr, "Unknown node-action: %s\n", val);
 362                                node_ctx.action = NODEACT_UNKNOWN;
 363                        }
 364                        break;
 365                case sizeof("Node-copyfrom-path"):
 366                        if (constcmp(t, "Node-copyfrom-path"))
 367                                continue;
 368                        pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
 369                        break;
 370                case sizeof("Node-copyfrom-rev"):
 371                        if (constcmp(t, "Node-copyfrom-rev"))
 372                                continue;
 373                        node_ctx.srcRev = atoi(val);
 374                        break;
 375                case sizeof("Text-content-length"):
 376                        if (!constcmp(t, "Text-content-length")) {
 377                                node_ctx.textLength = atoi(val);
 378                                break;
 379                        }
 380                        if (constcmp(t, "Prop-content-length"))
 381                                continue;
 382                        node_ctx.propLength = atoi(val);
 383                        break;
 384                case sizeof("Text-delta"):
 385                        if (!constcmp(t, "Text-delta")) {
 386                                node_ctx.text_delta = !strcmp(val, "true");
 387                                break;
 388                        }
 389                        if (constcmp(t, "Prop-delta"))
 390                                continue;
 391                        node_ctx.prop_delta = !strcmp(val, "true");
 392                        break;
 393                case sizeof("Content-length"):
 394                        if (constcmp(t, "Content-length"))
 395                                continue;
 396                        len = atoi(val);
 397                        t = buffer_read_line(&input);
 398                        if (!t)
 399                                die_short_read();
 400                        if (*t)
 401                                die("invalid dump: expected blank line after content length header");
 402                        if (active_ctx == REV_CTX) {
 403                                read_props();
 404                        } else if (active_ctx == NODE_CTX) {
 405                                handle_node();
 406                                active_ctx = REV_CTX;
 407                        } else {
 408                                fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
 409                                if (buffer_skip_bytes(&input, len) != len)
 410                                        die_short_read();
 411                        }
 412                }
 413        }
 414        if (buffer_ferror(&input))
 415                die_short_read();
 416        if (active_ctx == NODE_CTX)
 417                handle_node();
 418        if (active_ctx != DUMP_CTX)
 419                handle_revision();
 420}
 421
 422int svndump_init(const char *filename)
 423{
 424        if (buffer_init(&input, filename))
 425                return error("cannot open %s: %s", filename, strerror(errno));
 426        repo_init();
 427        reset_dump_ctx(~0);
 428        reset_rev_ctx(0);
 429        reset_node_ctx(NULL);
 430        return 0;
 431}
 432
 433void svndump_deinit(void)
 434{
 435        log_reset();
 436        repo_reset();
 437        reset_dump_ctx(~0);
 438        reset_rev_ctx(0);
 439        reset_node_ctx(NULL);
 440        if (buffer_deinit(&input))
 441                fprintf(stderr, "Input error\n");
 442        if (ferror(stdout))
 443                fprintf(stderr, "Output error\n");
 444}
 445
 446void svndump_reset(void)
 447{
 448        log_reset();
 449        buffer_reset(&input);
 450        repo_reset();
 451        reset_dump_ctx(~0);
 452        reset_rev_ctx(0);
 453        reset_node_ctx(NULL);
 454}