list-objects-filter.con commit Merge branch 'jk/detect-truncated-zlib-input' into maint (d75c41b)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14#include "object-store.h"
  15
  16/* Remember to update object flag allocation in object.h */
  17/*
  18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  19 * that have been shown, but should be revisited if they appear
  20 * in the traversal (until we mark it SEEN).  This is a way to
  21 * let us silently de-dup calls to show() in the caller.  This
  22 * is subtly different from the "revision.h:SHOWN" and the
  23 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  24 * the non-de-dup usage in pack-bitmap.c
  25 */
  26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  27
  28/*
  29 * A filter for list-objects to omit ALL blobs from the traversal.
  30 * And to OPTIONALLY collect a list of the omitted OIDs.
  31 */
  32struct filter_blobs_none_data {
  33        struct oidset *omits;
  34};
  35
  36static enum list_objects_filter_result filter_blobs_none(
  37        enum list_objects_filter_situation filter_situation,
  38        struct object *obj,
  39        const char *pathname,
  40        const char *filename,
  41        void *filter_data_)
  42{
  43        struct filter_blobs_none_data *filter_data = filter_data_;
  44
  45        switch (filter_situation) {
  46        default:
  47                die("unknown filter_situation");
  48                return LOFR_ZERO;
  49
  50        case LOFS_BEGIN_TREE:
  51                assert(obj->type == OBJ_TREE);
  52                /* always include all tree objects */
  53                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  54
  55        case LOFS_END_TREE:
  56                assert(obj->type == OBJ_TREE);
  57                return LOFR_ZERO;
  58
  59        case LOFS_BLOB:
  60                assert(obj->type == OBJ_BLOB);
  61                assert((obj->flags & SEEN) == 0);
  62
  63                if (filter_data->omits)
  64                        oidset_insert(filter_data->omits, &obj->oid);
  65                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  66        }
  67}
  68
  69static void *filter_blobs_none__init(
  70        struct oidset *omitted,
  71        struct list_objects_filter_options *filter_options,
  72        filter_object_fn *filter_fn,
  73        filter_free_fn *filter_free_fn)
  74{
  75        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  76        d->omits = omitted;
  77
  78        *filter_fn = filter_blobs_none;
  79        *filter_free_fn = free;
  80        return d;
  81}
  82
  83/*
  84 * A filter for list-objects to omit large blobs.
  85 * And to OPTIONALLY collect a list of the omitted OIDs.
  86 */
  87struct filter_blobs_limit_data {
  88        struct oidset *omits;
  89        unsigned long max_bytes;
  90};
  91
  92static enum list_objects_filter_result filter_blobs_limit(
  93        enum list_objects_filter_situation filter_situation,
  94        struct object *obj,
  95        const char *pathname,
  96        const char *filename,
  97        void *filter_data_)
  98{
  99        struct filter_blobs_limit_data *filter_data = filter_data_;
 100        unsigned long object_length;
 101        enum object_type t;
 102
 103        switch (filter_situation) {
 104        default:
 105                die("unknown filter_situation");
 106                return LOFR_ZERO;
 107
 108        case LOFS_BEGIN_TREE:
 109                assert(obj->type == OBJ_TREE);
 110                /* always include all tree objects */
 111                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 112
 113        case LOFS_END_TREE:
 114                assert(obj->type == OBJ_TREE);
 115                return LOFR_ZERO;
 116
 117        case LOFS_BLOB:
 118                assert(obj->type == OBJ_BLOB);
 119                assert((obj->flags & SEEN) == 0);
 120
 121                t = oid_object_info(the_repository, &obj->oid, &object_length);
 122                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 123                        /*
 124                         * We DO NOT have the blob locally, so we cannot
 125                         * apply the size filter criteria.  Be conservative
 126                         * and force show it (and let the caller deal with
 127                         * the ambiguity).
 128                         */
 129                        goto include_it;
 130                }
 131
 132                if (object_length < filter_data->max_bytes)
 133                        goto include_it;
 134
 135                if (filter_data->omits)
 136                        oidset_insert(filter_data->omits, &obj->oid);
 137                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 138        }
 139
 140include_it:
 141        if (filter_data->omits)
 142                oidset_remove(filter_data->omits, &obj->oid);
 143        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 144}
 145
 146static void *filter_blobs_limit__init(
 147        struct oidset *omitted,
 148        struct list_objects_filter_options *filter_options,
 149        filter_object_fn *filter_fn,
 150        filter_free_fn *filter_free_fn)
 151{
 152        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 153        d->omits = omitted;
 154        d->max_bytes = filter_options->blob_limit_value;
 155
 156        *filter_fn = filter_blobs_limit;
 157        *filter_free_fn = free;
 158        return d;
 159}
 160
 161/*
 162 * A filter driven by a sparse-checkout specification to only
 163 * include blobs that a sparse checkout would populate.
 164 *
 165 * The sparse-checkout spec can be loaded from a blob with the
 166 * given OID or from a local pathname.  We allow an OID because
 167 * the repo may be bare or we may be doing the filtering on the
 168 * server.
 169 */
 170struct frame {
 171        /*
 172         * defval is the usual default include/exclude value that
 173         * should be inherited as we recurse into directories based
 174         * upon pattern matching of the directory itself or of a
 175         * containing directory.
 176         */
 177        int defval;
 178
 179        /*
 180         * 1 if the directory (recursively) contains any provisionally
 181         * omitted objects.
 182         *
 183         * 0 if everything (recursively) contained in this directory
 184         * has been explicitly included (SHOWN) in the result and
 185         * the directory may be short-cut later in the traversal.
 186         */
 187        unsigned child_prov_omit : 1;
 188};
 189
 190struct filter_sparse_data {
 191        struct oidset *omits;
 192        struct exclude_list el;
 193
 194        size_t nr, alloc;
 195        struct frame *array_frame;
 196};
 197
 198static enum list_objects_filter_result filter_sparse(
 199        enum list_objects_filter_situation filter_situation,
 200        struct object *obj,
 201        const char *pathname,
 202        const char *filename,
 203        void *filter_data_)
 204{
 205        struct filter_sparse_data *filter_data = filter_data_;
 206        int val, dtype;
 207        struct frame *frame;
 208
 209        switch (filter_situation) {
 210        default:
 211                die("unknown filter_situation");
 212                return LOFR_ZERO;
 213
 214        case LOFS_BEGIN_TREE:
 215                assert(obj->type == OBJ_TREE);
 216                dtype = DT_DIR;
 217                val = is_excluded_from_list(pathname, strlen(pathname),
 218                                            filename, &dtype, &filter_data->el,
 219                                            &the_index);
 220                if (val < 0)
 221                        val = filter_data->array_frame[filter_data->nr].defval;
 222
 223                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 224                           filter_data->alloc);
 225                filter_data->nr++;
 226                filter_data->array_frame[filter_data->nr].defval = val;
 227                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 228
 229                /*
 230                 * A directory with this tree OID may appear in multiple
 231                 * places in the tree. (Think of a directory move or copy,
 232                 * with no other changes, so the OID is the same, but the
 233                 * full pathnames of objects within this directory are new
 234                 * and may match is_excluded() patterns differently.)
 235                 * So we cannot mark this directory as SEEN (yet), since
 236                 * that will prevent process_tree() from revisiting this
 237                 * tree object with other pathname prefixes.
 238                 *
 239                 * Only _DO_SHOW the tree object the first time we visit
 240                 * this tree object.
 241                 *
 242                 * We always show all tree objects.  A future optimization
 243                 * may want to attempt to narrow this.
 244                 */
 245                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 246                        return LOFR_ZERO;
 247                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 248                return LOFR_DO_SHOW;
 249
 250        case LOFS_END_TREE:
 251                assert(obj->type == OBJ_TREE);
 252                assert(filter_data->nr > 0);
 253
 254                frame = &filter_data->array_frame[filter_data->nr];
 255                filter_data->nr--;
 256
 257                /*
 258                 * Tell our parent directory if any of our children were
 259                 * provisionally omitted.
 260                 */
 261                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 262                        frame->child_prov_omit;
 263
 264                /*
 265                 * If there are NO provisionally omitted child objects (ALL child
 266                 * objects in this folder were INCLUDED), then we can mark the
 267                 * folder as SEEN (so we will not have to revisit it again).
 268                 */
 269                if (!frame->child_prov_omit)
 270                        return LOFR_MARK_SEEN;
 271                return LOFR_ZERO;
 272
 273        case LOFS_BLOB:
 274                assert(obj->type == OBJ_BLOB);
 275                assert((obj->flags & SEEN) == 0);
 276
 277                frame = &filter_data->array_frame[filter_data->nr];
 278
 279                dtype = DT_REG;
 280                val = is_excluded_from_list(pathname, strlen(pathname),
 281                                            filename, &dtype, &filter_data->el,
 282                                            &the_index);
 283                if (val < 0)
 284                        val = frame->defval;
 285                if (val > 0) {
 286                        if (filter_data->omits)
 287                                oidset_remove(filter_data->omits, &obj->oid);
 288                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 289                }
 290
 291                /*
 292                 * Provisionally omit it.  We've already established that
 293                 * this pathname is not in the sparse-checkout specification
 294                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 295                 *
 296                 * However, a pathname elsewhere in the tree may also
 297                 * reference this same blob, so we cannot reject it yet.
 298                 * Leave the LOFR_ bits unset so that if the blob appears
 299                 * again in the traversal, we will be asked again.
 300                 */
 301                if (filter_data->omits)
 302                        oidset_insert(filter_data->omits, &obj->oid);
 303
 304                /*
 305                 * Remember that at least 1 blob in this tree was
 306                 * provisionally omitted.  This prevents us from short
 307                 * cutting the tree in future iterations.
 308                 */
 309                frame->child_prov_omit = 1;
 310                return LOFR_ZERO;
 311        }
 312}
 313
 314
 315static void filter_sparse_free(void *filter_data)
 316{
 317        struct filter_sparse_data *d = filter_data;
 318        /* TODO free contents of 'd' */
 319        free(d);
 320}
 321
 322static void *filter_sparse_oid__init(
 323        struct oidset *omitted,
 324        struct list_objects_filter_options *filter_options,
 325        filter_object_fn *filter_fn,
 326        filter_free_fn *filter_free_fn)
 327{
 328        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 329        d->omits = omitted;
 330        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 331                                           NULL, 0, &d->el) < 0)
 332                die("could not load filter specification");
 333
 334        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 335        d->array_frame[d->nr].defval = 0; /* default to include */
 336        d->array_frame[d->nr].child_prov_omit = 0;
 337
 338        *filter_fn = filter_sparse;
 339        *filter_free_fn = filter_sparse_free;
 340        return d;
 341}
 342
 343static void *filter_sparse_path__init(
 344        struct oidset *omitted,
 345        struct list_objects_filter_options *filter_options,
 346        filter_object_fn *filter_fn,
 347        filter_free_fn *filter_free_fn)
 348{
 349        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 350        d->omits = omitted;
 351        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 352                                           NULL, 0, &d->el, NULL) < 0)
 353                die("could not load filter specification");
 354
 355        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 356        d->array_frame[d->nr].defval = 0; /* default to include */
 357        d->array_frame[d->nr].child_prov_omit = 0;
 358
 359        *filter_fn = filter_sparse;
 360        *filter_free_fn = filter_sparse_free;
 361        return d;
 362}
 363
 364typedef void *(*filter_init_fn)(
 365        struct oidset *omitted,
 366        struct list_objects_filter_options *filter_options,
 367        filter_object_fn *filter_fn,
 368        filter_free_fn *filter_free_fn);
 369
 370/*
 371 * Must match "enum list_objects_filter_choice".
 372 */
 373static filter_init_fn s_filters[] = {
 374        NULL,
 375        filter_blobs_none__init,
 376        filter_blobs_limit__init,
 377        filter_sparse_oid__init,
 378        filter_sparse_path__init,
 379};
 380
 381void *list_objects_filter__init(
 382        struct oidset *omitted,
 383        struct list_objects_filter_options *filter_options,
 384        filter_object_fn *filter_fn,
 385        filter_free_fn *filter_free_fn)
 386{
 387        filter_init_fn init_fn;
 388
 389        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 390
 391        if (filter_options->choice >= LOFC__COUNT)
 392                die("invalid list-objects filter choice: %d",
 393                    filter_options->choice);
 394
 395        init_fn = s_filters[filter_options->choice];
 396        if (init_fn)
 397                return init_fn(omitted, filter_options,
 398                               filter_fn, filter_free_fn);
 399        *filter_fn = NULL;
 400        *filter_free_fn = NULL;
 401        return NULL;
 402}