list-objects-filter.con commit Documentation/Makefile: add missing dependency on asciidoctor-extensions (a15ef38)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14#include "object-store.h"
  15
  16/* Remember to update object flag allocation in object.h */
  17/*
  18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  19 * that have been shown, but should be revisited if they appear
  20 * in the traversal (until we mark it SEEN).  This is a way to
  21 * let us silently de-dup calls to show() in the caller.  This
  22 * is subtly different from the "revision.h:SHOWN" and the
  23 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  24 * the non-de-dup usage in pack-bitmap.c
  25 */
  26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  27
  28/*
  29 * A filter for list-objects to omit ALL blobs from the traversal.
  30 * And to OPTIONALLY collect a list of the omitted OIDs.
  31 */
  32struct filter_blobs_none_data {
  33        struct oidset *omits;
  34};
  35
  36static enum list_objects_filter_result filter_blobs_none(
  37        enum list_objects_filter_situation filter_situation,
  38        struct object *obj,
  39        const char *pathname,
  40        const char *filename,
  41        void *filter_data_)
  42{
  43        struct filter_blobs_none_data *filter_data = filter_data_;
  44
  45        switch (filter_situation) {
  46        default:
  47                BUG("unknown filter_situation: %d", filter_situation);
  48
  49        case LOFS_BEGIN_TREE:
  50                assert(obj->type == OBJ_TREE);
  51                /* always include all tree objects */
  52                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  53
  54        case LOFS_END_TREE:
  55                assert(obj->type == OBJ_TREE);
  56                return LOFR_ZERO;
  57
  58        case LOFS_BLOB:
  59                assert(obj->type == OBJ_BLOB);
  60                assert((obj->flags & SEEN) == 0);
  61
  62                if (filter_data->omits)
  63                        oidset_insert(filter_data->omits, &obj->oid);
  64                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  65        }
  66}
  67
  68static void *filter_blobs_none__init(
  69        struct oidset *omitted,
  70        struct list_objects_filter_options *filter_options,
  71        filter_object_fn *filter_fn,
  72        filter_free_fn *filter_free_fn)
  73{
  74        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  75        d->omits = omitted;
  76
  77        *filter_fn = filter_blobs_none;
  78        *filter_free_fn = free;
  79        return d;
  80}
  81
  82/*
  83 * A filter for list-objects to omit ALL trees and blobs from the traversal.
  84 * Can OPTIONALLY collect a list of the omitted OIDs.
  85 */
  86struct filter_trees_none_data {
  87        struct oidset *omits;
  88};
  89
  90static enum list_objects_filter_result filter_trees_none(
  91        enum list_objects_filter_situation filter_situation,
  92        struct object *obj,
  93        const char *pathname,
  94        const char *filename,
  95        void *filter_data_)
  96{
  97        struct filter_trees_none_data *filter_data = filter_data_;
  98
  99        switch (filter_situation) {
 100        default:
 101                BUG("unknown filter_situation: %d", filter_situation);
 102
 103        case LOFS_BEGIN_TREE:
 104        case LOFS_BLOB:
 105                if (filter_data->omits) {
 106                        oidset_insert(filter_data->omits, &obj->oid);
 107                        /* _MARK_SEEN but not _DO_SHOW (hard omit) */
 108                        return LOFR_MARK_SEEN;
 109                } else {
 110                        /*
 111                         * Not collecting omits so no need to to traverse tree.
 112                         */
 113                        return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
 114                }
 115
 116        case LOFS_END_TREE:
 117                assert(obj->type == OBJ_TREE);
 118                return LOFR_ZERO;
 119
 120        }
 121}
 122
 123static void* filter_trees_none__init(
 124        struct oidset *omitted,
 125        struct list_objects_filter_options *filter_options,
 126        filter_object_fn *filter_fn,
 127        filter_free_fn *filter_free_fn)
 128{
 129        struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
 130        d->omits = omitted;
 131
 132        *filter_fn = filter_trees_none;
 133        *filter_free_fn = free;
 134        return d;
 135}
 136
 137/*
 138 * A filter for list-objects to omit large blobs.
 139 * And to OPTIONALLY collect a list of the omitted OIDs.
 140 */
 141struct filter_blobs_limit_data {
 142        struct oidset *omits;
 143        unsigned long max_bytes;
 144};
 145
 146static enum list_objects_filter_result filter_blobs_limit(
 147        enum list_objects_filter_situation filter_situation,
 148        struct object *obj,
 149        const char *pathname,
 150        const char *filename,
 151        void *filter_data_)
 152{
 153        struct filter_blobs_limit_data *filter_data = filter_data_;
 154        unsigned long object_length;
 155        enum object_type t;
 156
 157        switch (filter_situation) {
 158        default:
 159                BUG("unknown filter_situation: %d", filter_situation);
 160
 161        case LOFS_BEGIN_TREE:
 162                assert(obj->type == OBJ_TREE);
 163                /* always include all tree objects */
 164                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 165
 166        case LOFS_END_TREE:
 167                assert(obj->type == OBJ_TREE);
 168                return LOFR_ZERO;
 169
 170        case LOFS_BLOB:
 171                assert(obj->type == OBJ_BLOB);
 172                assert((obj->flags & SEEN) == 0);
 173
 174                t = oid_object_info(the_repository, &obj->oid, &object_length);
 175                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 176                        /*
 177                         * We DO NOT have the blob locally, so we cannot
 178                         * apply the size filter criteria.  Be conservative
 179                         * and force show it (and let the caller deal with
 180                         * the ambiguity).
 181                         */
 182                        goto include_it;
 183                }
 184
 185                if (object_length < filter_data->max_bytes)
 186                        goto include_it;
 187
 188                if (filter_data->omits)
 189                        oidset_insert(filter_data->omits, &obj->oid);
 190                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 191        }
 192
 193include_it:
 194        if (filter_data->omits)
 195                oidset_remove(filter_data->omits, &obj->oid);
 196        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 197}
 198
 199static void *filter_blobs_limit__init(
 200        struct oidset *omitted,
 201        struct list_objects_filter_options *filter_options,
 202        filter_object_fn *filter_fn,
 203        filter_free_fn *filter_free_fn)
 204{
 205        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 206        d->omits = omitted;
 207        d->max_bytes = filter_options->blob_limit_value;
 208
 209        *filter_fn = filter_blobs_limit;
 210        *filter_free_fn = free;
 211        return d;
 212}
 213
 214/*
 215 * A filter driven by a sparse-checkout specification to only
 216 * include blobs that a sparse checkout would populate.
 217 *
 218 * The sparse-checkout spec can be loaded from a blob with the
 219 * given OID or from a local pathname.  We allow an OID because
 220 * the repo may be bare or we may be doing the filtering on the
 221 * server.
 222 */
 223struct frame {
 224        /*
 225         * defval is the usual default include/exclude value that
 226         * should be inherited as we recurse into directories based
 227         * upon pattern matching of the directory itself or of a
 228         * containing directory.
 229         */
 230        int defval;
 231
 232        /*
 233         * 1 if the directory (recursively) contains any provisionally
 234         * omitted objects.
 235         *
 236         * 0 if everything (recursively) contained in this directory
 237         * has been explicitly included (SHOWN) in the result and
 238         * the directory may be short-cut later in the traversal.
 239         */
 240        unsigned child_prov_omit : 1;
 241};
 242
 243struct filter_sparse_data {
 244        struct oidset *omits;
 245        struct exclude_list el;
 246
 247        size_t nr, alloc;
 248        struct frame *array_frame;
 249};
 250
 251static enum list_objects_filter_result filter_sparse(
 252        enum list_objects_filter_situation filter_situation,
 253        struct object *obj,
 254        const char *pathname,
 255        const char *filename,
 256        void *filter_data_)
 257{
 258        struct filter_sparse_data *filter_data = filter_data_;
 259        int val, dtype;
 260        struct frame *frame;
 261
 262        switch (filter_situation) {
 263        default:
 264                BUG("unknown filter_situation: %d", filter_situation);
 265
 266        case LOFS_BEGIN_TREE:
 267                assert(obj->type == OBJ_TREE);
 268                dtype = DT_DIR;
 269                val = is_excluded_from_list(pathname, strlen(pathname),
 270                                            filename, &dtype, &filter_data->el,
 271                                            &the_index);
 272                if (val < 0)
 273                        val = filter_data->array_frame[filter_data->nr].defval;
 274
 275                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 276                           filter_data->alloc);
 277                filter_data->nr++;
 278                filter_data->array_frame[filter_data->nr].defval = val;
 279                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 280
 281                /*
 282                 * A directory with this tree OID may appear in multiple
 283                 * places in the tree. (Think of a directory move or copy,
 284                 * with no other changes, so the OID is the same, but the
 285                 * full pathnames of objects within this directory are new
 286                 * and may match is_excluded() patterns differently.)
 287                 * So we cannot mark this directory as SEEN (yet), since
 288                 * that will prevent process_tree() from revisiting this
 289                 * tree object with other pathname prefixes.
 290                 *
 291                 * Only _DO_SHOW the tree object the first time we visit
 292                 * this tree object.
 293                 *
 294                 * We always show all tree objects.  A future optimization
 295                 * may want to attempt to narrow this.
 296                 */
 297                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 298                        return LOFR_ZERO;
 299                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 300                return LOFR_DO_SHOW;
 301
 302        case LOFS_END_TREE:
 303                assert(obj->type == OBJ_TREE);
 304                assert(filter_data->nr > 0);
 305
 306                frame = &filter_data->array_frame[filter_data->nr];
 307                filter_data->nr--;
 308
 309                /*
 310                 * Tell our parent directory if any of our children were
 311                 * provisionally omitted.
 312                 */
 313                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 314                        frame->child_prov_omit;
 315
 316                /*
 317                 * If there are NO provisionally omitted child objects (ALL child
 318                 * objects in this folder were INCLUDED), then we can mark the
 319                 * folder as SEEN (so we will not have to revisit it again).
 320                 */
 321                if (!frame->child_prov_omit)
 322                        return LOFR_MARK_SEEN;
 323                return LOFR_ZERO;
 324
 325        case LOFS_BLOB:
 326                assert(obj->type == OBJ_BLOB);
 327                assert((obj->flags & SEEN) == 0);
 328
 329                frame = &filter_data->array_frame[filter_data->nr];
 330
 331                dtype = DT_REG;
 332                val = is_excluded_from_list(pathname, strlen(pathname),
 333                                            filename, &dtype, &filter_data->el,
 334                                            &the_index);
 335                if (val < 0)
 336                        val = frame->defval;
 337                if (val > 0) {
 338                        if (filter_data->omits)
 339                                oidset_remove(filter_data->omits, &obj->oid);
 340                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 341                }
 342
 343                /*
 344                 * Provisionally omit it.  We've already established that
 345                 * this pathname is not in the sparse-checkout specification
 346                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 347                 *
 348                 * However, a pathname elsewhere in the tree may also
 349                 * reference this same blob, so we cannot reject it yet.
 350                 * Leave the LOFR_ bits unset so that if the blob appears
 351                 * again in the traversal, we will be asked again.
 352                 */
 353                if (filter_data->omits)
 354                        oidset_insert(filter_data->omits, &obj->oid);
 355
 356                /*
 357                 * Remember that at least 1 blob in this tree was
 358                 * provisionally omitted.  This prevents us from short
 359                 * cutting the tree in future iterations.
 360                 */
 361                frame->child_prov_omit = 1;
 362                return LOFR_ZERO;
 363        }
 364}
 365
 366
 367static void filter_sparse_free(void *filter_data)
 368{
 369        struct filter_sparse_data *d = filter_data;
 370        /* TODO free contents of 'd' */
 371        free(d);
 372}
 373
 374static void *filter_sparse_oid__init(
 375        struct oidset *omitted,
 376        struct list_objects_filter_options *filter_options,
 377        filter_object_fn *filter_fn,
 378        filter_free_fn *filter_free_fn)
 379{
 380        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 381        d->omits = omitted;
 382        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 383                                           NULL, 0, &d->el) < 0)
 384                die("could not load filter specification");
 385
 386        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 387        d->array_frame[d->nr].defval = 0; /* default to include */
 388        d->array_frame[d->nr].child_prov_omit = 0;
 389
 390        *filter_fn = filter_sparse;
 391        *filter_free_fn = filter_sparse_free;
 392        return d;
 393}
 394
 395static void *filter_sparse_path__init(
 396        struct oidset *omitted,
 397        struct list_objects_filter_options *filter_options,
 398        filter_object_fn *filter_fn,
 399        filter_free_fn *filter_free_fn)
 400{
 401        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 402        d->omits = omitted;
 403        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 404                                           NULL, 0, &d->el, NULL) < 0)
 405                die("could not load filter specification");
 406
 407        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 408        d->array_frame[d->nr].defval = 0; /* default to include */
 409        d->array_frame[d->nr].child_prov_omit = 0;
 410
 411        *filter_fn = filter_sparse;
 412        *filter_free_fn = filter_sparse_free;
 413        return d;
 414}
 415
 416typedef void *(*filter_init_fn)(
 417        struct oidset *omitted,
 418        struct list_objects_filter_options *filter_options,
 419        filter_object_fn *filter_fn,
 420        filter_free_fn *filter_free_fn);
 421
 422/*
 423 * Must match "enum list_objects_filter_choice".
 424 */
 425static filter_init_fn s_filters[] = {
 426        NULL,
 427        filter_blobs_none__init,
 428        filter_blobs_limit__init,
 429        filter_trees_none__init,
 430        filter_sparse_oid__init,
 431        filter_sparse_path__init,
 432};
 433
 434void *list_objects_filter__init(
 435        struct oidset *omitted,
 436        struct list_objects_filter_options *filter_options,
 437        filter_object_fn *filter_fn,
 438        filter_free_fn *filter_free_fn)
 439{
 440        filter_init_fn init_fn;
 441
 442        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 443
 444        if (filter_options->choice >= LOFC__COUNT)
 445                BUG("invalid list-objects filter choice: %d",
 446                    filter_options->choice);
 447
 448        init_fn = s_filters[filter_options->choice];
 449        if (init_fn)
 450                return init_fn(omitted, filter_options,
 451                               filter_fn, filter_free_fn);
 452        *filter_fn = NULL;
 453        *filter_free_fn = NULL;
 454        return NULL;
 455}