list-objects-filter.con commit Makefile: correct example fuzz build (8b7c2ee)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14#include "object-store.h"
  15
  16/* Remember to update object flag allocation in object.h */
  17/*
  18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  19 * that have been shown, but should be revisited if they appear
  20 * in the traversal (until we mark it SEEN).  This is a way to
  21 * let us silently de-dup calls to show() in the caller.  This
  22 * is subtly different from the "revision.h:SHOWN" and the
  23 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  24 * the non-de-dup usage in pack-bitmap.c
  25 */
  26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  27
  28/*
  29 * A filter for list-objects to omit ALL blobs from the traversal.
  30 * And to OPTIONALLY collect a list of the omitted OIDs.
  31 */
  32struct filter_blobs_none_data {
  33        struct oidset *omits;
  34};
  35
  36static enum list_objects_filter_result filter_blobs_none(
  37        struct repository *r,
  38        enum list_objects_filter_situation filter_situation,
  39        struct object *obj,
  40        const char *pathname,
  41        const char *filename,
  42        void *filter_data_)
  43{
  44        struct filter_blobs_none_data *filter_data = filter_data_;
  45
  46        switch (filter_situation) {
  47        default:
  48                BUG("unknown filter_situation: %d", filter_situation);
  49
  50        case LOFS_BEGIN_TREE:
  51                assert(obj->type == OBJ_TREE);
  52                /* always include all tree objects */
  53                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  54
  55        case LOFS_END_TREE:
  56                assert(obj->type == OBJ_TREE);
  57                return LOFR_ZERO;
  58
  59        case LOFS_BLOB:
  60                assert(obj->type == OBJ_BLOB);
  61                assert((obj->flags & SEEN) == 0);
  62
  63                if (filter_data->omits)
  64                        oidset_insert(filter_data->omits, &obj->oid);
  65                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  66        }
  67}
  68
  69static void *filter_blobs_none__init(
  70        struct oidset *omitted,
  71        struct list_objects_filter_options *filter_options,
  72        filter_object_fn *filter_fn,
  73        filter_free_fn *filter_free_fn)
  74{
  75        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  76        d->omits = omitted;
  77
  78        *filter_fn = filter_blobs_none;
  79        *filter_free_fn = free;
  80        return d;
  81}
  82
  83/*
  84 * A filter for list-objects to omit ALL trees and blobs from the traversal.
  85 * Can OPTIONALLY collect a list of the omitted OIDs.
  86 */
  87struct filter_trees_none_data {
  88        struct oidset *omits;
  89};
  90
  91static enum list_objects_filter_result filter_trees_none(
  92        struct repository *r,
  93        enum list_objects_filter_situation filter_situation,
  94        struct object *obj,
  95        const char *pathname,
  96        const char *filename,
  97        void *filter_data_)
  98{
  99        struct filter_trees_none_data *filter_data = filter_data_;
 100
 101        switch (filter_situation) {
 102        default:
 103                BUG("unknown filter_situation: %d", filter_situation);
 104
 105        case LOFS_BEGIN_TREE:
 106        case LOFS_BLOB:
 107                if (filter_data->omits) {
 108                        oidset_insert(filter_data->omits, &obj->oid);
 109                        /* _MARK_SEEN but not _DO_SHOW (hard omit) */
 110                        return LOFR_MARK_SEEN;
 111                } else {
 112                        /*
 113                         * Not collecting omits so no need to to traverse tree.
 114                         */
 115                        return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
 116                }
 117
 118        case LOFS_END_TREE:
 119                assert(obj->type == OBJ_TREE);
 120                return LOFR_ZERO;
 121
 122        }
 123}
 124
 125static void* filter_trees_none__init(
 126        struct oidset *omitted,
 127        struct list_objects_filter_options *filter_options,
 128        filter_object_fn *filter_fn,
 129        filter_free_fn *filter_free_fn)
 130{
 131        struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
 132        d->omits = omitted;
 133
 134        *filter_fn = filter_trees_none;
 135        *filter_free_fn = free;
 136        return d;
 137}
 138
 139/*
 140 * A filter for list-objects to omit large blobs.
 141 * And to OPTIONALLY collect a list of the omitted OIDs.
 142 */
 143struct filter_blobs_limit_data {
 144        struct oidset *omits;
 145        unsigned long max_bytes;
 146};
 147
 148static enum list_objects_filter_result filter_blobs_limit(
 149        struct repository *r,
 150        enum list_objects_filter_situation filter_situation,
 151        struct object *obj,
 152        const char *pathname,
 153        const char *filename,
 154        void *filter_data_)
 155{
 156        struct filter_blobs_limit_data *filter_data = filter_data_;
 157        unsigned long object_length;
 158        enum object_type t;
 159
 160        switch (filter_situation) {
 161        default:
 162                BUG("unknown filter_situation: %d", filter_situation);
 163
 164        case LOFS_BEGIN_TREE:
 165                assert(obj->type == OBJ_TREE);
 166                /* always include all tree objects */
 167                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 168
 169        case LOFS_END_TREE:
 170                assert(obj->type == OBJ_TREE);
 171                return LOFR_ZERO;
 172
 173        case LOFS_BLOB:
 174                assert(obj->type == OBJ_BLOB);
 175                assert((obj->flags & SEEN) == 0);
 176
 177                t = oid_object_info(r, &obj->oid, &object_length);
 178                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 179                        /*
 180                         * We DO NOT have the blob locally, so we cannot
 181                         * apply the size filter criteria.  Be conservative
 182                         * and force show it (and let the caller deal with
 183                         * the ambiguity).
 184                         */
 185                        goto include_it;
 186                }
 187
 188                if (object_length < filter_data->max_bytes)
 189                        goto include_it;
 190
 191                if (filter_data->omits)
 192                        oidset_insert(filter_data->omits, &obj->oid);
 193                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 194        }
 195
 196include_it:
 197        if (filter_data->omits)
 198                oidset_remove(filter_data->omits, &obj->oid);
 199        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 200}
 201
 202static void *filter_blobs_limit__init(
 203        struct oidset *omitted,
 204        struct list_objects_filter_options *filter_options,
 205        filter_object_fn *filter_fn,
 206        filter_free_fn *filter_free_fn)
 207{
 208        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 209        d->omits = omitted;
 210        d->max_bytes = filter_options->blob_limit_value;
 211
 212        *filter_fn = filter_blobs_limit;
 213        *filter_free_fn = free;
 214        return d;
 215}
 216
 217/*
 218 * A filter driven by a sparse-checkout specification to only
 219 * include blobs that a sparse checkout would populate.
 220 *
 221 * The sparse-checkout spec can be loaded from a blob with the
 222 * given OID or from a local pathname.  We allow an OID because
 223 * the repo may be bare or we may be doing the filtering on the
 224 * server.
 225 */
 226struct frame {
 227        /*
 228         * defval is the usual default include/exclude value that
 229         * should be inherited as we recurse into directories based
 230         * upon pattern matching of the directory itself or of a
 231         * containing directory.
 232         */
 233        int defval;
 234
 235        /*
 236         * 1 if the directory (recursively) contains any provisionally
 237         * omitted objects.
 238         *
 239         * 0 if everything (recursively) contained in this directory
 240         * has been explicitly included (SHOWN) in the result and
 241         * the directory may be short-cut later in the traversal.
 242         */
 243        unsigned child_prov_omit : 1;
 244};
 245
 246struct filter_sparse_data {
 247        struct oidset *omits;
 248        struct exclude_list el;
 249
 250        size_t nr, alloc;
 251        struct frame *array_frame;
 252};
 253
 254static enum list_objects_filter_result filter_sparse(
 255        struct repository *r,
 256        enum list_objects_filter_situation filter_situation,
 257        struct object *obj,
 258        const char *pathname,
 259        const char *filename,
 260        void *filter_data_)
 261{
 262        struct filter_sparse_data *filter_data = filter_data_;
 263        int val, dtype;
 264        struct frame *frame;
 265
 266        switch (filter_situation) {
 267        default:
 268                BUG("unknown filter_situation: %d", filter_situation);
 269
 270        case LOFS_BEGIN_TREE:
 271                assert(obj->type == OBJ_TREE);
 272                dtype = DT_DIR;
 273                val = is_excluded_from_list(pathname, strlen(pathname),
 274                                            filename, &dtype, &filter_data->el,
 275                                            r->index);
 276                if (val < 0)
 277                        val = filter_data->array_frame[filter_data->nr].defval;
 278
 279                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 280                           filter_data->alloc);
 281                filter_data->nr++;
 282                filter_data->array_frame[filter_data->nr].defval = val;
 283                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 284
 285                /*
 286                 * A directory with this tree OID may appear in multiple
 287                 * places in the tree. (Think of a directory move or copy,
 288                 * with no other changes, so the OID is the same, but the
 289                 * full pathnames of objects within this directory are new
 290                 * and may match is_excluded() patterns differently.)
 291                 * So we cannot mark this directory as SEEN (yet), since
 292                 * that will prevent process_tree() from revisiting this
 293                 * tree object with other pathname prefixes.
 294                 *
 295                 * Only _DO_SHOW the tree object the first time we visit
 296                 * this tree object.
 297                 *
 298                 * We always show all tree objects.  A future optimization
 299                 * may want to attempt to narrow this.
 300                 */
 301                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 302                        return LOFR_ZERO;
 303                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 304                return LOFR_DO_SHOW;
 305
 306        case LOFS_END_TREE:
 307                assert(obj->type == OBJ_TREE);
 308                assert(filter_data->nr > 0);
 309
 310                frame = &filter_data->array_frame[filter_data->nr];
 311                filter_data->nr--;
 312
 313                /*
 314                 * Tell our parent directory if any of our children were
 315                 * provisionally omitted.
 316                 */
 317                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 318                        frame->child_prov_omit;
 319
 320                /*
 321                 * If there are NO provisionally omitted child objects (ALL child
 322                 * objects in this folder were INCLUDED), then we can mark the
 323                 * folder as SEEN (so we will not have to revisit it again).
 324                 */
 325                if (!frame->child_prov_omit)
 326                        return LOFR_MARK_SEEN;
 327                return LOFR_ZERO;
 328
 329        case LOFS_BLOB:
 330                assert(obj->type == OBJ_BLOB);
 331                assert((obj->flags & SEEN) == 0);
 332
 333                frame = &filter_data->array_frame[filter_data->nr];
 334
 335                dtype = DT_REG;
 336                val = is_excluded_from_list(pathname, strlen(pathname),
 337                                            filename, &dtype, &filter_data->el,
 338                                            r->index);
 339                if (val < 0)
 340                        val = frame->defval;
 341                if (val > 0) {
 342                        if (filter_data->omits)
 343                                oidset_remove(filter_data->omits, &obj->oid);
 344                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 345                }
 346
 347                /*
 348                 * Provisionally omit it.  We've already established that
 349                 * this pathname is not in the sparse-checkout specification
 350                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 351                 *
 352                 * However, a pathname elsewhere in the tree may also
 353                 * reference this same blob, so we cannot reject it yet.
 354                 * Leave the LOFR_ bits unset so that if the blob appears
 355                 * again in the traversal, we will be asked again.
 356                 */
 357                if (filter_data->omits)
 358                        oidset_insert(filter_data->omits, &obj->oid);
 359
 360                /*
 361                 * Remember that at least 1 blob in this tree was
 362                 * provisionally omitted.  This prevents us from short
 363                 * cutting the tree in future iterations.
 364                 */
 365                frame->child_prov_omit = 1;
 366                return LOFR_ZERO;
 367        }
 368}
 369
 370
 371static void filter_sparse_free(void *filter_data)
 372{
 373        struct filter_sparse_data *d = filter_data;
 374        /* TODO free contents of 'd' */
 375        free(d);
 376}
 377
 378static void *filter_sparse_oid__init(
 379        struct oidset *omitted,
 380        struct list_objects_filter_options *filter_options,
 381        filter_object_fn *filter_fn,
 382        filter_free_fn *filter_free_fn)
 383{
 384        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 385        d->omits = omitted;
 386        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 387                                           NULL, 0, &d->el) < 0)
 388                die("could not load filter specification");
 389
 390        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 391        d->array_frame[d->nr].defval = 0; /* default to include */
 392        d->array_frame[d->nr].child_prov_omit = 0;
 393
 394        *filter_fn = filter_sparse;
 395        *filter_free_fn = filter_sparse_free;
 396        return d;
 397}
 398
 399static void *filter_sparse_path__init(
 400        struct oidset *omitted,
 401        struct list_objects_filter_options *filter_options,
 402        filter_object_fn *filter_fn,
 403        filter_free_fn *filter_free_fn)
 404{
 405        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 406        d->omits = omitted;
 407        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 408                                           NULL, 0, &d->el, NULL) < 0)
 409                die("could not load filter specification");
 410
 411        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 412        d->array_frame[d->nr].defval = 0; /* default to include */
 413        d->array_frame[d->nr].child_prov_omit = 0;
 414
 415        *filter_fn = filter_sparse;
 416        *filter_free_fn = filter_sparse_free;
 417        return d;
 418}
 419
 420typedef void *(*filter_init_fn)(
 421        struct oidset *omitted,
 422        struct list_objects_filter_options *filter_options,
 423        filter_object_fn *filter_fn,
 424        filter_free_fn *filter_free_fn);
 425
 426/*
 427 * Must match "enum list_objects_filter_choice".
 428 */
 429static filter_init_fn s_filters[] = {
 430        NULL,
 431        filter_blobs_none__init,
 432        filter_blobs_limit__init,
 433        filter_trees_none__init,
 434        filter_sparse_oid__init,
 435        filter_sparse_path__init,
 436};
 437
 438void *list_objects_filter__init(
 439        struct oidset *omitted,
 440        struct list_objects_filter_options *filter_options,
 441        filter_object_fn *filter_fn,
 442        filter_free_fn *filter_free_fn)
 443{
 444        filter_init_fn init_fn;
 445
 446        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 447
 448        if (filter_options->choice >= LOFC__COUNT)
 449                BUG("invalid list-objects filter choice: %d",
 450                    filter_options->choice);
 451
 452        init_fn = s_filters[filter_options->choice];
 453        if (init_fn)
 454                return init_fn(omitted, filter_options,
 455                               filter_fn, filter_free_fn);
 456        *filter_fn = NULL;
 457        *filter_free_fn = NULL;
 458        return NULL;
 459}