list-objects-filter.con commit directory rename detection: more involved edge/corner testcases (faac7ad)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14
  15/* Remember to update object flag allocation in object.h */
  16/*
  17 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  18 * that have been shown, but should be revisited if they appear
  19 * in the traversal (until we mark it SEEN).  This is a way to
  20 * let us silently de-dup calls to show() in the caller.  This
  21 * is subtly different from the "revision.h:SHOWN" and the
  22 * "sha1_name.c:ONELINE_SEEN" bits.  And also different from
  23 * the non-de-dup usage in pack-bitmap.c
  24 */
  25#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  26
  27/*
  28 * A filter for list-objects to omit ALL blobs from the traversal.
  29 * And to OPTIONALLY collect a list of the omitted OIDs.
  30 */
  31struct filter_blobs_none_data {
  32        struct oidset *omits;
  33};
  34
  35static enum list_objects_filter_result filter_blobs_none(
  36        enum list_objects_filter_situation filter_situation,
  37        struct object *obj,
  38        const char *pathname,
  39        const char *filename,
  40        void *filter_data_)
  41{
  42        struct filter_blobs_none_data *filter_data = filter_data_;
  43
  44        switch (filter_situation) {
  45        default:
  46                die("unknown filter_situation");
  47                return LOFR_ZERO;
  48
  49        case LOFS_BEGIN_TREE:
  50                assert(obj->type == OBJ_TREE);
  51                /* always include all tree objects */
  52                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  53
  54        case LOFS_END_TREE:
  55                assert(obj->type == OBJ_TREE);
  56                return LOFR_ZERO;
  57
  58        case LOFS_BLOB:
  59                assert(obj->type == OBJ_BLOB);
  60                assert((obj->flags & SEEN) == 0);
  61
  62                if (filter_data->omits)
  63                        oidset_insert(filter_data->omits, &obj->oid);
  64                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  65        }
  66}
  67
  68static void *filter_blobs_none__init(
  69        struct oidset *omitted,
  70        struct list_objects_filter_options *filter_options,
  71        filter_object_fn *filter_fn,
  72        filter_free_fn *filter_free_fn)
  73{
  74        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  75        d->omits = omitted;
  76
  77        *filter_fn = filter_blobs_none;
  78        *filter_free_fn = free;
  79        return d;
  80}
  81
  82/*
  83 * A filter for list-objects to omit large blobs.
  84 * And to OPTIONALLY collect a list of the omitted OIDs.
  85 */
  86struct filter_blobs_limit_data {
  87        struct oidset *omits;
  88        unsigned long max_bytes;
  89};
  90
  91static enum list_objects_filter_result filter_blobs_limit(
  92        enum list_objects_filter_situation filter_situation,
  93        struct object *obj,
  94        const char *pathname,
  95        const char *filename,
  96        void *filter_data_)
  97{
  98        struct filter_blobs_limit_data *filter_data = filter_data_;
  99        unsigned long object_length;
 100        enum object_type t;
 101
 102        switch (filter_situation) {
 103        default:
 104                die("unknown filter_situation");
 105                return LOFR_ZERO;
 106
 107        case LOFS_BEGIN_TREE:
 108                assert(obj->type == OBJ_TREE);
 109                /* always include all tree objects */
 110                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 111
 112        case LOFS_END_TREE:
 113                assert(obj->type == OBJ_TREE);
 114                return LOFR_ZERO;
 115
 116        case LOFS_BLOB:
 117                assert(obj->type == OBJ_BLOB);
 118                assert((obj->flags & SEEN) == 0);
 119
 120                t = sha1_object_info(obj->oid.hash, &object_length);
 121                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 122                        /*
 123                         * We DO NOT have the blob locally, so we cannot
 124                         * apply the size filter criteria.  Be conservative
 125                         * and force show it (and let the caller deal with
 126                         * the ambiguity).
 127                         */
 128                        goto include_it;
 129                }
 130
 131                if (object_length < filter_data->max_bytes)
 132                        goto include_it;
 133
 134                if (filter_data->omits)
 135                        oidset_insert(filter_data->omits, &obj->oid);
 136                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 137        }
 138
 139include_it:
 140        if (filter_data->omits)
 141                oidset_remove(filter_data->omits, &obj->oid);
 142        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 143}
 144
 145static void *filter_blobs_limit__init(
 146        struct oidset *omitted,
 147        struct list_objects_filter_options *filter_options,
 148        filter_object_fn *filter_fn,
 149        filter_free_fn *filter_free_fn)
 150{
 151        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 152        d->omits = omitted;
 153        d->max_bytes = filter_options->blob_limit_value;
 154
 155        *filter_fn = filter_blobs_limit;
 156        *filter_free_fn = free;
 157        return d;
 158}
 159
 160/*
 161 * A filter driven by a sparse-checkout specification to only
 162 * include blobs that a sparse checkout would populate.
 163 *
 164 * The sparse-checkout spec can be loaded from a blob with the
 165 * given OID or from a local pathname.  We allow an OID because
 166 * the repo may be bare or we may be doing the filtering on the
 167 * server.
 168 */
 169struct frame {
 170        /*
 171         * defval is the usual default include/exclude value that
 172         * should be inherited as we recurse into directories based
 173         * upon pattern matching of the directory itself or of a
 174         * containing directory.
 175         */
 176        int defval;
 177
 178        /*
 179         * 1 if the directory (recursively) contains any provisionally
 180         * omitted objects.
 181         *
 182         * 0 if everything (recursively) contained in this directory
 183         * has been explicitly included (SHOWN) in the result and
 184         * the directory may be short-cut later in the traversal.
 185         */
 186        unsigned child_prov_omit : 1;
 187};
 188
 189struct filter_sparse_data {
 190        struct oidset *omits;
 191        struct exclude_list el;
 192
 193        size_t nr, alloc;
 194        struct frame *array_frame;
 195};
 196
 197static enum list_objects_filter_result filter_sparse(
 198        enum list_objects_filter_situation filter_situation,
 199        struct object *obj,
 200        const char *pathname,
 201        const char *filename,
 202        void *filter_data_)
 203{
 204        struct filter_sparse_data *filter_data = filter_data_;
 205        int val, dtype;
 206        struct frame *frame;
 207
 208        switch (filter_situation) {
 209        default:
 210                die("unknown filter_situation");
 211                return LOFR_ZERO;
 212
 213        case LOFS_BEGIN_TREE:
 214                assert(obj->type == OBJ_TREE);
 215                dtype = DT_DIR;
 216                val = is_excluded_from_list(pathname, strlen(pathname),
 217                                            filename, &dtype, &filter_data->el,
 218                                            &the_index);
 219                if (val < 0)
 220                        val = filter_data->array_frame[filter_data->nr].defval;
 221
 222                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 223                           filter_data->alloc);
 224                filter_data->nr++;
 225                filter_data->array_frame[filter_data->nr].defval = val;
 226                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 227
 228                /*
 229                 * A directory with this tree OID may appear in multiple
 230                 * places in the tree. (Think of a directory move or copy,
 231                 * with no other changes, so the OID is the same, but the
 232                 * full pathnames of objects within this directory are new
 233                 * and may match is_excluded() patterns differently.)
 234                 * So we cannot mark this directory as SEEN (yet), since
 235                 * that will prevent process_tree() from revisiting this
 236                 * tree object with other pathname prefixes.
 237                 *
 238                 * Only _DO_SHOW the tree object the first time we visit
 239                 * this tree object.
 240                 *
 241                 * We always show all tree objects.  A future optimization
 242                 * may want to attempt to narrow this.
 243                 */
 244                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 245                        return LOFR_ZERO;
 246                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 247                return LOFR_DO_SHOW;
 248
 249        case LOFS_END_TREE:
 250                assert(obj->type == OBJ_TREE);
 251                assert(filter_data->nr > 0);
 252
 253                frame = &filter_data->array_frame[filter_data->nr];
 254                filter_data->nr--;
 255
 256                /*
 257                 * Tell our parent directory if any of our children were
 258                 * provisionally omitted.
 259                 */
 260                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 261                        frame->child_prov_omit;
 262
 263                /*
 264                 * If there are NO provisionally omitted child objects (ALL child
 265                 * objects in this folder were INCLUDED), then we can mark the
 266                 * folder as SEEN (so we will not have to revisit it again).
 267                 */
 268                if (!frame->child_prov_omit)
 269                        return LOFR_MARK_SEEN;
 270                return LOFR_ZERO;
 271
 272        case LOFS_BLOB:
 273                assert(obj->type == OBJ_BLOB);
 274                assert((obj->flags & SEEN) == 0);
 275
 276                frame = &filter_data->array_frame[filter_data->nr];
 277
 278                dtype = DT_REG;
 279                val = is_excluded_from_list(pathname, strlen(pathname),
 280                                            filename, &dtype, &filter_data->el,
 281                                            &the_index);
 282                if (val < 0)
 283                        val = frame->defval;
 284                if (val > 0) {
 285                        if (filter_data->omits)
 286                                oidset_remove(filter_data->omits, &obj->oid);
 287                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 288                }
 289
 290                /*
 291                 * Provisionally omit it.  We've already established that
 292                 * this pathname is not in the sparse-checkout specification
 293                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 294                 *
 295                 * However, a pathname elsewhere in the tree may also
 296                 * reference this same blob, so we cannot reject it yet.
 297                 * Leave the LOFR_ bits unset so that if the blob appears
 298                 * again in the traversal, we will be asked again.
 299                 */
 300                if (filter_data->omits)
 301                        oidset_insert(filter_data->omits, &obj->oid);
 302
 303                /*
 304                 * Remember that at least 1 blob in this tree was
 305                 * provisionally omitted.  This prevents us from short
 306                 * cutting the tree in future iterations.
 307                 */
 308                frame->child_prov_omit = 1;
 309                return LOFR_ZERO;
 310        }
 311}
 312
 313
 314static void filter_sparse_free(void *filter_data)
 315{
 316        struct filter_sparse_data *d = filter_data;
 317        /* TODO free contents of 'd' */
 318        free(d);
 319}
 320
 321static void *filter_sparse_oid__init(
 322        struct oidset *omitted,
 323        struct list_objects_filter_options *filter_options,
 324        filter_object_fn *filter_fn,
 325        filter_free_fn *filter_free_fn)
 326{
 327        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 328        d->omits = omitted;
 329        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 330                                           NULL, 0, &d->el) < 0)
 331                die("could not load filter specification");
 332
 333        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 334        d->array_frame[d->nr].defval = 0; /* default to include */
 335        d->array_frame[d->nr].child_prov_omit = 0;
 336
 337        *filter_fn = filter_sparse;
 338        *filter_free_fn = filter_sparse_free;
 339        return d;
 340}
 341
 342static void *filter_sparse_path__init(
 343        struct oidset *omitted,
 344        struct list_objects_filter_options *filter_options,
 345        filter_object_fn *filter_fn,
 346        filter_free_fn *filter_free_fn)
 347{
 348        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 349        d->omits = omitted;
 350        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 351                                           NULL, 0, &d->el, NULL) < 0)
 352                die("could not load filter specification");
 353
 354        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 355        d->array_frame[d->nr].defval = 0; /* default to include */
 356        d->array_frame[d->nr].child_prov_omit = 0;
 357
 358        *filter_fn = filter_sparse;
 359        *filter_free_fn = filter_sparse_free;
 360        return d;
 361}
 362
 363typedef void *(*filter_init_fn)(
 364        struct oidset *omitted,
 365        struct list_objects_filter_options *filter_options,
 366        filter_object_fn *filter_fn,
 367        filter_free_fn *filter_free_fn);
 368
 369/*
 370 * Must match "enum list_objects_filter_choice".
 371 */
 372static filter_init_fn s_filters[] = {
 373        NULL,
 374        filter_blobs_none__init,
 375        filter_blobs_limit__init,
 376        filter_sparse_oid__init,
 377        filter_sparse_path__init,
 378};
 379
 380void *list_objects_filter__init(
 381        struct oidset *omitted,
 382        struct list_objects_filter_options *filter_options,
 383        filter_object_fn *filter_fn,
 384        filter_free_fn *filter_free_fn)
 385{
 386        filter_init_fn init_fn;
 387
 388        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 389
 390        if (filter_options->choice >= LOFC__COUNT)
 391                die("invalid list-objects filter choice: %d",
 392                    filter_options->choice);
 393
 394        init_fn = s_filters[filter_options->choice];
 395        if (init_fn)
 396                return init_fn(omitted, filter_options,
 397                               filter_fn, filter_free_fn);
 398        *filter_fn = NULL;
 399        *filter_free_fn = NULL;
 400        return NULL;
 401}