list-objects-filter.con commit Merge branch 'jc/denoise-rm-to-resolve' (5e9d978)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidmap.h"
  14#include "oidset.h"
  15#include "object-store.h"
  16
  17/* Remember to update object flag allocation in object.h */
  18/*
  19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  20 * that have been shown, but should be revisited if they appear
  21 * in the traversal (until we mark it SEEN).  This is a way to
  22 * let us silently de-dup calls to show() in the caller.  This
  23 * is subtly different from the "revision.h:SHOWN" and the
  24 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  25 * the non-de-dup usage in pack-bitmap.c
  26 */
  27#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  28
  29/*
  30 * A filter for list-objects to omit ALL blobs from the traversal.
  31 * And to OPTIONALLY collect a list of the omitted OIDs.
  32 */
  33struct filter_blobs_none_data {
  34        struct oidset *omits;
  35};
  36
  37static enum list_objects_filter_result filter_blobs_none(
  38        struct repository *r,
  39        enum list_objects_filter_situation filter_situation,
  40        struct object *obj,
  41        const char *pathname,
  42        const char *filename,
  43        void *filter_data_)
  44{
  45        struct filter_blobs_none_data *filter_data = filter_data_;
  46
  47        switch (filter_situation) {
  48        default:
  49                BUG("unknown filter_situation: %d", filter_situation);
  50
  51        case LOFS_BEGIN_TREE:
  52                assert(obj->type == OBJ_TREE);
  53                /* always include all tree objects */
  54                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  55
  56        case LOFS_END_TREE:
  57                assert(obj->type == OBJ_TREE);
  58                return LOFR_ZERO;
  59
  60        case LOFS_BLOB:
  61                assert(obj->type == OBJ_BLOB);
  62                assert((obj->flags & SEEN) == 0);
  63
  64                if (filter_data->omits)
  65                        oidset_insert(filter_data->omits, &obj->oid);
  66                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  67        }
  68}
  69
  70static void *filter_blobs_none__init(
  71        struct oidset *omitted,
  72        struct list_objects_filter_options *filter_options,
  73        filter_object_fn *filter_fn,
  74        filter_free_fn *filter_free_fn)
  75{
  76        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  77        d->omits = omitted;
  78
  79        *filter_fn = filter_blobs_none;
  80        *filter_free_fn = free;
  81        return d;
  82}
  83
  84/*
  85 * A filter for list-objects to omit ALL trees and blobs from the traversal.
  86 * Can OPTIONALLY collect a list of the omitted OIDs.
  87 */
  88struct filter_trees_depth_data {
  89        struct oidset *omits;
  90
  91        /*
  92         * Maps trees to the minimum depth at which they were seen. It is not
  93         * necessary to re-traverse a tree at deeper or equal depths than it has
  94         * already been traversed.
  95         *
  96         * We can't use LOFR_MARK_SEEN for tree objects since this will prevent
  97         * it from being traversed at shallower depths.
  98         */
  99        struct oidmap seen_at_depth;
 100
 101        unsigned long exclude_depth;
 102        unsigned long current_depth;
 103};
 104
 105struct seen_map_entry {
 106        struct oidmap_entry base;
 107        size_t depth;
 108};
 109
 110/* Returns 1 if the oid was in the omits set before it was invoked. */
 111static int filter_trees_update_omits(
 112        struct object *obj,
 113        struct filter_trees_depth_data *filter_data,
 114        int include_it)
 115{
 116        if (!filter_data->omits)
 117                return 0;
 118
 119        if (include_it)
 120                return oidset_remove(filter_data->omits, &obj->oid);
 121        else
 122                return oidset_insert(filter_data->omits, &obj->oid);
 123}
 124
 125static enum list_objects_filter_result filter_trees_depth(
 126        struct repository *r,
 127        enum list_objects_filter_situation filter_situation,
 128        struct object *obj,
 129        const char *pathname,
 130        const char *filename,
 131        void *filter_data_)
 132{
 133        struct filter_trees_depth_data *filter_data = filter_data_;
 134        struct seen_map_entry *seen_info;
 135        int include_it = filter_data->current_depth <
 136                filter_data->exclude_depth;
 137        int filter_res;
 138        int already_seen;
 139
 140        /*
 141         * Note that we do not use _MARK_SEEN in order to allow re-traversal in
 142         * case we encounter a tree or blob again at a shallower depth.
 143         */
 144
 145        switch (filter_situation) {
 146        default:
 147                BUG("unknown filter_situation: %d", filter_situation);
 148
 149        case LOFS_END_TREE:
 150                assert(obj->type == OBJ_TREE);
 151                filter_data->current_depth--;
 152                return LOFR_ZERO;
 153
 154        case LOFS_BLOB:
 155                filter_trees_update_omits(obj, filter_data, include_it);
 156                return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;
 157
 158        case LOFS_BEGIN_TREE:
 159                seen_info = oidmap_get(
 160                        &filter_data->seen_at_depth, &obj->oid);
 161                if (!seen_info) {
 162                        seen_info = xcalloc(1, sizeof(*seen_info));
 163                        oidcpy(&seen_info->base.oid, &obj->oid);
 164                        seen_info->depth = filter_data->current_depth;
 165                        oidmap_put(&filter_data->seen_at_depth, seen_info);
 166                        already_seen = 0;
 167                } else {
 168                        already_seen =
 169                                filter_data->current_depth >= seen_info->depth;
 170                }
 171
 172                if (already_seen) {
 173                        filter_res = LOFR_SKIP_TREE;
 174                } else {
 175                        int been_omitted = filter_trees_update_omits(
 176                                obj, filter_data, include_it);
 177                        seen_info->depth = filter_data->current_depth;
 178
 179                        if (include_it)
 180                                filter_res = LOFR_DO_SHOW;
 181                        else if (filter_data->omits && !been_omitted)
 182                                /*
 183                                 * Must update omit information of children
 184                                 * recursively; they have not been omitted yet.
 185                                 */
 186                                filter_res = LOFR_ZERO;
 187                        else
 188                                filter_res = LOFR_SKIP_TREE;
 189                }
 190
 191                filter_data->current_depth++;
 192                return filter_res;
 193        }
 194}
 195
 196static void filter_trees_free(void *filter_data) {
 197        struct filter_trees_depth_data *d = filter_data;
 198        if (!d)
 199                return;
 200        oidmap_free(&d->seen_at_depth, 1);
 201        free(d);
 202}
 203
 204static void *filter_trees_depth__init(
 205        struct oidset *omitted,
 206        struct list_objects_filter_options *filter_options,
 207        filter_object_fn *filter_fn,
 208        filter_free_fn *filter_free_fn)
 209{
 210        struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));
 211        d->omits = omitted;
 212        oidmap_init(&d->seen_at_depth, 0);
 213        d->exclude_depth = filter_options->tree_exclude_depth;
 214        d->current_depth = 0;
 215
 216        *filter_fn = filter_trees_depth;
 217        *filter_free_fn = filter_trees_free;
 218        return d;
 219}
 220
 221/*
 222 * A filter for list-objects to omit large blobs.
 223 * And to OPTIONALLY collect a list of the omitted OIDs.
 224 */
 225struct filter_blobs_limit_data {
 226        struct oidset *omits;
 227        unsigned long max_bytes;
 228};
 229
 230static enum list_objects_filter_result filter_blobs_limit(
 231        struct repository *r,
 232        enum list_objects_filter_situation filter_situation,
 233        struct object *obj,
 234        const char *pathname,
 235        const char *filename,
 236        void *filter_data_)
 237{
 238        struct filter_blobs_limit_data *filter_data = filter_data_;
 239        unsigned long object_length;
 240        enum object_type t;
 241
 242        switch (filter_situation) {
 243        default:
 244                BUG("unknown filter_situation: %d", filter_situation);
 245
 246        case LOFS_BEGIN_TREE:
 247                assert(obj->type == OBJ_TREE);
 248                /* always include all tree objects */
 249                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 250
 251        case LOFS_END_TREE:
 252                assert(obj->type == OBJ_TREE);
 253                return LOFR_ZERO;
 254
 255        case LOFS_BLOB:
 256                assert(obj->type == OBJ_BLOB);
 257                assert((obj->flags & SEEN) == 0);
 258
 259                t = oid_object_info(r, &obj->oid, &object_length);
 260                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 261                        /*
 262                         * We DO NOT have the blob locally, so we cannot
 263                         * apply the size filter criteria.  Be conservative
 264                         * and force show it (and let the caller deal with
 265                         * the ambiguity).
 266                         */
 267                        goto include_it;
 268                }
 269
 270                if (object_length < filter_data->max_bytes)
 271                        goto include_it;
 272
 273                if (filter_data->omits)
 274                        oidset_insert(filter_data->omits, &obj->oid);
 275                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 276        }
 277
 278include_it:
 279        if (filter_data->omits)
 280                oidset_remove(filter_data->omits, &obj->oid);
 281        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 282}
 283
 284static void *filter_blobs_limit__init(
 285        struct oidset *omitted,
 286        struct list_objects_filter_options *filter_options,
 287        filter_object_fn *filter_fn,
 288        filter_free_fn *filter_free_fn)
 289{
 290        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 291        d->omits = omitted;
 292        d->max_bytes = filter_options->blob_limit_value;
 293
 294        *filter_fn = filter_blobs_limit;
 295        *filter_free_fn = free;
 296        return d;
 297}
 298
 299/*
 300 * A filter driven by a sparse-checkout specification to only
 301 * include blobs that a sparse checkout would populate.
 302 *
 303 * The sparse-checkout spec can be loaded from a blob with the
 304 * given OID or from a local pathname.  We allow an OID because
 305 * the repo may be bare or we may be doing the filtering on the
 306 * server.
 307 */
 308struct frame {
 309        /*
 310         * defval is the usual default include/exclude value that
 311         * should be inherited as we recurse into directories based
 312         * upon pattern matching of the directory itself or of a
 313         * containing directory.
 314         */
 315        int defval;
 316
 317        /*
 318         * 1 if the directory (recursively) contains any provisionally
 319         * omitted objects.
 320         *
 321         * 0 if everything (recursively) contained in this directory
 322         * has been explicitly included (SHOWN) in the result and
 323         * the directory may be short-cut later in the traversal.
 324         */
 325        unsigned child_prov_omit : 1;
 326};
 327
 328struct filter_sparse_data {
 329        struct oidset *omits;
 330        struct exclude_list el;
 331
 332        size_t nr, alloc;
 333        struct frame *array_frame;
 334};
 335
 336static enum list_objects_filter_result filter_sparse(
 337        struct repository *r,
 338        enum list_objects_filter_situation filter_situation,
 339        struct object *obj,
 340        const char *pathname,
 341        const char *filename,
 342        void *filter_data_)
 343{
 344        struct filter_sparse_data *filter_data = filter_data_;
 345        int val, dtype;
 346        struct frame *frame;
 347
 348        switch (filter_situation) {
 349        default:
 350                BUG("unknown filter_situation: %d", filter_situation);
 351
 352        case LOFS_BEGIN_TREE:
 353                assert(obj->type == OBJ_TREE);
 354                dtype = DT_DIR;
 355                val = is_excluded_from_list(pathname, strlen(pathname),
 356                                            filename, &dtype, &filter_data->el,
 357                                            r->index);
 358                if (val < 0)
 359                        val = filter_data->array_frame[filter_data->nr - 1].defval;
 360
 361                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 362                           filter_data->alloc);
 363                filter_data->array_frame[filter_data->nr].defval = val;
 364                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 365                filter_data->nr++;
 366
 367                /*
 368                 * A directory with this tree OID may appear in multiple
 369                 * places in the tree. (Think of a directory move or copy,
 370                 * with no other changes, so the OID is the same, but the
 371                 * full pathnames of objects within this directory are new
 372                 * and may match is_excluded() patterns differently.)
 373                 * So we cannot mark this directory as SEEN (yet), since
 374                 * that will prevent process_tree() from revisiting this
 375                 * tree object with other pathname prefixes.
 376                 *
 377                 * Only _DO_SHOW the tree object the first time we visit
 378                 * this tree object.
 379                 *
 380                 * We always show all tree objects.  A future optimization
 381                 * may want to attempt to narrow this.
 382                 */
 383                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 384                        return LOFR_ZERO;
 385                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 386                return LOFR_DO_SHOW;
 387
 388        case LOFS_END_TREE:
 389                assert(obj->type == OBJ_TREE);
 390                assert(filter_data->nr > 1);
 391
 392                frame = &filter_data->array_frame[--filter_data->nr];
 393
 394                /*
 395                 * Tell our parent directory if any of our children were
 396                 * provisionally omitted.
 397                 */
 398                filter_data->array_frame[filter_data->nr - 1].child_prov_omit |=
 399                        frame->child_prov_omit;
 400
 401                /*
 402                 * If there are NO provisionally omitted child objects (ALL child
 403                 * objects in this folder were INCLUDED), then we can mark the
 404                 * folder as SEEN (so we will not have to revisit it again).
 405                 */
 406                if (!frame->child_prov_omit)
 407                        return LOFR_MARK_SEEN;
 408                return LOFR_ZERO;
 409
 410        case LOFS_BLOB:
 411                assert(obj->type == OBJ_BLOB);
 412                assert((obj->flags & SEEN) == 0);
 413
 414                frame = &filter_data->array_frame[filter_data->nr - 1];
 415
 416                dtype = DT_REG;
 417                val = is_excluded_from_list(pathname, strlen(pathname),
 418                                            filename, &dtype, &filter_data->el,
 419                                            r->index);
 420                if (val < 0)
 421                        val = frame->defval;
 422                if (val > 0) {
 423                        if (filter_data->omits)
 424                                oidset_remove(filter_data->omits, &obj->oid);
 425                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 426                }
 427
 428                /*
 429                 * Provisionally omit it.  We've already established that
 430                 * this pathname is not in the sparse-checkout specification
 431                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 432                 *
 433                 * However, a pathname elsewhere in the tree may also
 434                 * reference this same blob, so we cannot reject it yet.
 435                 * Leave the LOFR_ bits unset so that if the blob appears
 436                 * again in the traversal, we will be asked again.
 437                 */
 438                if (filter_data->omits)
 439                        oidset_insert(filter_data->omits, &obj->oid);
 440
 441                /*
 442                 * Remember that at least 1 blob in this tree was
 443                 * provisionally omitted.  This prevents us from short
 444                 * cutting the tree in future iterations.
 445                 */
 446                frame->child_prov_omit = 1;
 447                return LOFR_ZERO;
 448        }
 449}
 450
 451
 452static void filter_sparse_free(void *filter_data)
 453{
 454        struct filter_sparse_data *d = filter_data;
 455        free(d->array_frame);
 456        free(d);
 457}
 458
 459static void *filter_sparse_oid__init(
 460        struct oidset *omitted,
 461        struct list_objects_filter_options *filter_options,
 462        filter_object_fn *filter_fn,
 463        filter_free_fn *filter_free_fn)
 464{
 465        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 466        d->omits = omitted;
 467        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 468                                           NULL, 0, &d->el) < 0)
 469                die("could not load filter specification");
 470
 471        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 472        d->array_frame[d->nr].defval = 0; /* default to include */
 473        d->array_frame[d->nr].child_prov_omit = 0;
 474        d->nr++;
 475
 476        *filter_fn = filter_sparse;
 477        *filter_free_fn = filter_sparse_free;
 478        return d;
 479}
 480
 481typedef void *(*filter_init_fn)(
 482        struct oidset *omitted,
 483        struct list_objects_filter_options *filter_options,
 484        filter_object_fn *filter_fn,
 485        filter_free_fn *filter_free_fn);
 486
 487/*
 488 * Must match "enum list_objects_filter_choice".
 489 */
 490static filter_init_fn s_filters[] = {
 491        NULL,
 492        filter_blobs_none__init,
 493        filter_blobs_limit__init,
 494        filter_trees_depth__init,
 495        filter_sparse_oid__init,
 496};
 497
 498void *list_objects_filter__init(
 499        struct oidset *omitted,
 500        struct list_objects_filter_options *filter_options,
 501        filter_object_fn *filter_fn,
 502        filter_free_fn *filter_free_fn)
 503{
 504        filter_init_fn init_fn;
 505
 506        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 507
 508        if (filter_options->choice >= LOFC__COUNT)
 509                BUG("invalid list-objects filter choice: %d",
 510                    filter_options->choice);
 511
 512        init_fn = s_filters[filter_options->choice];
 513        if (init_fn)
 514                return init_fn(omitted, filter_options,
 515                               filter_fn, filter_free_fn);
 516        *filter_fn = NULL;
 517        *filter_free_fn = NULL;
 518        return NULL;
 519}