1#include"cache.h" 2#include"dir.h" 3#include"tag.h" 4#include"commit.h" 5#include"tree.h" 6#include"blob.h" 7#include"diff.h" 8#include"tree-walk.h" 9#include"revision.h" 10#include"list-objects.h" 11#include"list-objects-filter.h" 12#include"list-objects-filter-options.h" 13#include"oidmap.h" 14#include"oidset.h" 15#include"object-store.h" 16 17/* Remember to update object flag allocation in object.h */ 18/* 19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects 20 * that have been shown, but should be revisited if they appear 21 * in the traversal (until we mark it SEEN). This is a way to 22 * let us silently de-dup calls to show() in the caller. This 23 * is subtly different from the "revision.h:SHOWN" and the 24 * "sha1-name.c:ONELINE_SEEN" bits. And also different from 25 * the non-de-dup usage in pack-bitmap.c 26 */ 27#define FILTER_SHOWN_BUT_REVISIT (1<<21) 28 29struct subfilter { 30struct filter *filter; 31struct oidset seen; 32struct oidset omits; 33struct object_id skip_tree; 34unsigned is_skipping_tree :1; 35}; 36 37struct filter { 38enumlist_objects_filter_result(*filter_object_fn)( 39struct repository *r, 40enum list_objects_filter_situation filter_situation, 41struct object *obj, 42const char*pathname, 43const char*filename, 44struct oidset *omits, 45void*filter_data); 46 47/* 48 * Optional. If this function is supplied and the filter needs 49 * to collect omits, then this function is called once before 50 * free_fn is called. 51 * 52 * This is required because the following two conditions hold: 53 * 54 * a. A tree filter can add and remove objects as an object 55 * graph is traversed. 56 * b. A combine filter's omit set is the union of all its 57 * subfilters, which may include tree: filters. 58 * 59 * As such, the omits sets must be separate sets, and can only 60 * be unioned after the traversal is completed. 61 */ 62void(*finalize_omits_fn)(struct oidset *omits,void*filter_data); 63 64void(*free_fn)(void*filter_data); 65 66void*filter_data; 67 68/* If non-NULL, the filter collects a list of the omitted OIDs here. */ 69struct oidset *omits; 70}; 71 72static enum list_objects_filter_result filter_blobs_none( 73struct repository *r, 74enum list_objects_filter_situation filter_situation, 75struct object *obj, 76const char*pathname, 77const char*filename, 78struct oidset *omits, 79void*filter_data_) 80{ 81switch(filter_situation) { 82default: 83BUG("unknown filter_situation:%d", filter_situation); 84 85case LOFS_BEGIN_TREE: 86assert(obj->type == OBJ_TREE); 87/* always include all tree objects */ 88return LOFR_MARK_SEEN | LOFR_DO_SHOW; 89 90case LOFS_END_TREE: 91assert(obj->type == OBJ_TREE); 92return LOFR_ZERO; 93 94case LOFS_BLOB: 95assert(obj->type == OBJ_BLOB); 96assert((obj->flags & SEEN) ==0); 97 98if(omits) 99oidset_insert(omits, &obj->oid); 100return LOFR_MARK_SEEN;/* but not LOFR_DO_SHOW (hard omit) */ 101} 102} 103 104static voidfilter_blobs_none__init( 105struct list_objects_filter_options *filter_options, 106struct filter *filter) 107{ 108 filter->filter_object_fn = filter_blobs_none; 109 filter->free_fn = free; 110} 111 112/* 113 * A filter for list-objects to omit ALL trees and blobs from the traversal. 114 * Can OPTIONALLY collect a list of the omitted OIDs. 115 */ 116struct filter_trees_depth_data { 117/* 118 * Maps trees to the minimum depth at which they were seen. It is not 119 * necessary to re-traverse a tree at deeper or equal depths than it has 120 * already been traversed. 121 * 122 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent 123 * it from being traversed at shallower depths. 124 */ 125struct oidmap seen_at_depth; 126 127unsigned long exclude_depth; 128unsigned long current_depth; 129}; 130 131struct seen_map_entry { 132struct oidmap_entry base; 133size_t depth; 134}; 135 136/* Returns 1 if the oid was in the omits set before it was invoked. */ 137static intfilter_trees_update_omits( 138struct object *obj, 139struct oidset *omits, 140int include_it) 141{ 142if(!omits) 143return0; 144 145if(include_it) 146returnoidset_remove(omits, &obj->oid); 147else 148returnoidset_insert(omits, &obj->oid); 149} 150 151static enum list_objects_filter_result filter_trees_depth( 152struct repository *r, 153enum list_objects_filter_situation filter_situation, 154struct object *obj, 155const char*pathname, 156const char*filename, 157struct oidset *omits, 158void*filter_data_) 159{ 160struct filter_trees_depth_data *filter_data = filter_data_; 161struct seen_map_entry *seen_info; 162int include_it = filter_data->current_depth < 163 filter_data->exclude_depth; 164int filter_res; 165int already_seen; 166 167/* 168 * Note that we do not use _MARK_SEEN in order to allow re-traversal in 169 * case we encounter a tree or blob again at a shallower depth. 170 */ 171 172switch(filter_situation) { 173default: 174BUG("unknown filter_situation:%d", filter_situation); 175 176case LOFS_END_TREE: 177assert(obj->type == OBJ_TREE); 178 filter_data->current_depth--; 179return LOFR_ZERO; 180 181case LOFS_BLOB: 182filter_trees_update_omits(obj, omits, include_it); 183return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; 184 185case LOFS_BEGIN_TREE: 186 seen_info =oidmap_get( 187&filter_data->seen_at_depth, &obj->oid); 188if(!seen_info) { 189 seen_info =xcalloc(1,sizeof(*seen_info)); 190oidcpy(&seen_info->base.oid, &obj->oid); 191 seen_info->depth = filter_data->current_depth; 192oidmap_put(&filter_data->seen_at_depth, seen_info); 193 already_seen =0; 194}else{ 195 already_seen = 196 filter_data->current_depth >= seen_info->depth; 197} 198 199if(already_seen) { 200 filter_res = LOFR_SKIP_TREE; 201}else{ 202int been_omitted =filter_trees_update_omits( 203 obj, omits, include_it); 204 seen_info->depth = filter_data->current_depth; 205 206if(include_it) 207 filter_res = LOFR_DO_SHOW; 208else if(omits && !been_omitted) 209/* 210 * Must update omit information of children 211 * recursively; they have not been omitted yet. 212 */ 213 filter_res = LOFR_ZERO; 214else 215 filter_res = LOFR_SKIP_TREE; 216} 217 218 filter_data->current_depth++; 219return filter_res; 220} 221} 222 223static voidfilter_trees_free(void*filter_data) { 224struct filter_trees_depth_data *d = filter_data; 225if(!d) 226return; 227oidmap_free(&d->seen_at_depth,1); 228free(d); 229} 230 231static voidfilter_trees_depth__init( 232struct list_objects_filter_options *filter_options, 233struct filter *filter) 234{ 235struct filter_trees_depth_data *d =xcalloc(1,sizeof(*d)); 236oidmap_init(&d->seen_at_depth,0); 237 d->exclude_depth = filter_options->tree_exclude_depth; 238 d->current_depth =0; 239 240 filter->filter_data = d; 241 filter->filter_object_fn = filter_trees_depth; 242 filter->free_fn = filter_trees_free; 243} 244 245/* 246 * A filter for list-objects to omit large blobs. 247 * And to OPTIONALLY collect a list of the omitted OIDs. 248 */ 249struct filter_blobs_limit_data { 250unsigned long max_bytes; 251}; 252 253static enum list_objects_filter_result filter_blobs_limit( 254struct repository *r, 255enum list_objects_filter_situation filter_situation, 256struct object *obj, 257const char*pathname, 258const char*filename, 259struct oidset *omits, 260void*filter_data_) 261{ 262struct filter_blobs_limit_data *filter_data = filter_data_; 263unsigned long object_length; 264enum object_type t; 265 266switch(filter_situation) { 267default: 268BUG("unknown filter_situation:%d", filter_situation); 269 270case LOFS_BEGIN_TREE: 271assert(obj->type == OBJ_TREE); 272/* always include all tree objects */ 273return LOFR_MARK_SEEN | LOFR_DO_SHOW; 274 275case LOFS_END_TREE: 276assert(obj->type == OBJ_TREE); 277return LOFR_ZERO; 278 279case LOFS_BLOB: 280assert(obj->type == OBJ_BLOB); 281assert((obj->flags & SEEN) ==0); 282 283 t =oid_object_info(r, &obj->oid, &object_length); 284if(t != OBJ_BLOB) {/* probably OBJ_NONE */ 285/* 286 * We DO NOT have the blob locally, so we cannot 287 * apply the size filter criteria. Be conservative 288 * and force show it (and let the caller deal with 289 * the ambiguity). 290 */ 291goto include_it; 292} 293 294if(object_length < filter_data->max_bytes) 295goto include_it; 296 297if(omits) 298oidset_insert(omits, &obj->oid); 299return LOFR_MARK_SEEN;/* but not LOFR_DO_SHOW (hard omit) */ 300} 301 302include_it: 303if(omits) 304oidset_remove(omits, &obj->oid); 305return LOFR_MARK_SEEN | LOFR_DO_SHOW; 306} 307 308static voidfilter_blobs_limit__init( 309struct list_objects_filter_options *filter_options, 310struct filter *filter) 311{ 312struct filter_blobs_limit_data *d =xcalloc(1,sizeof(*d)); 313 d->max_bytes = filter_options->blob_limit_value; 314 315 filter->filter_data = d; 316 filter->filter_object_fn = filter_blobs_limit; 317 filter->free_fn = free; 318} 319 320/* 321 * A filter driven by a sparse-checkout specification to only 322 * include blobs that a sparse checkout would populate. 323 * 324 * The sparse-checkout spec can be loaded from a blob with the 325 * given OID or from a local pathname. We allow an OID because 326 * the repo may be bare or we may be doing the filtering on the 327 * server. 328 */ 329struct frame { 330/* 331 * defval is the usual default include/exclude value that 332 * should be inherited as we recurse into directories based 333 * upon pattern matching of the directory itself or of a 334 * containing directory. 335 */ 336int defval; 337 338/* 339 * 1 if the directory (recursively) contains any provisionally 340 * omitted objects. 341 * 342 * 0 if everything (recursively) contained in this directory 343 * has been explicitly included (SHOWN) in the result and 344 * the directory may be short-cut later in the traversal. 345 */ 346unsigned child_prov_omit :1; 347}; 348 349struct filter_sparse_data { 350struct exclude_list el; 351 352size_t nr, alloc; 353struct frame *array_frame; 354}; 355 356static enum list_objects_filter_result filter_sparse( 357struct repository *r, 358enum list_objects_filter_situation filter_situation, 359struct object *obj, 360const char*pathname, 361const char*filename, 362struct oidset *omits, 363void*filter_data_) 364{ 365struct filter_sparse_data *filter_data = filter_data_; 366int val, dtype; 367struct frame *frame; 368 369switch(filter_situation) { 370default: 371BUG("unknown filter_situation:%d", filter_situation); 372 373case LOFS_BEGIN_TREE: 374assert(obj->type == OBJ_TREE); 375 dtype = DT_DIR; 376 val =is_excluded_from_list(pathname,strlen(pathname), 377 filename, &dtype, &filter_data->el, 378 r->index); 379if(val <0) 380 val = filter_data->array_frame[filter_data->nr -1].defval; 381 382ALLOC_GROW(filter_data->array_frame, filter_data->nr +1, 383 filter_data->alloc); 384 filter_data->array_frame[filter_data->nr].defval = val; 385 filter_data->array_frame[filter_data->nr].child_prov_omit =0; 386 filter_data->nr++; 387 388/* 389 * A directory with this tree OID may appear in multiple 390 * places in the tree. (Think of a directory move or copy, 391 * with no other changes, so the OID is the same, but the 392 * full pathnames of objects within this directory are new 393 * and may match is_excluded() patterns differently.) 394 * So we cannot mark this directory as SEEN (yet), since 395 * that will prevent process_tree() from revisiting this 396 * tree object with other pathname prefixes. 397 * 398 * Only _DO_SHOW the tree object the first time we visit 399 * this tree object. 400 * 401 * We always show all tree objects. A future optimization 402 * may want to attempt to narrow this. 403 */ 404if(obj->flags & FILTER_SHOWN_BUT_REVISIT) 405return LOFR_ZERO; 406 obj->flags |= FILTER_SHOWN_BUT_REVISIT; 407return LOFR_DO_SHOW; 408 409case LOFS_END_TREE: 410assert(obj->type == OBJ_TREE); 411assert(filter_data->nr >1); 412 413 frame = &filter_data->array_frame[--filter_data->nr]; 414 415/* 416 * Tell our parent directory if any of our children were 417 * provisionally omitted. 418 */ 419 filter_data->array_frame[filter_data->nr -1].child_prov_omit |= 420 frame->child_prov_omit; 421 422/* 423 * If there are NO provisionally omitted child objects (ALL child 424 * objects in this folder were INCLUDED), then we can mark the 425 * folder as SEEN (so we will not have to revisit it again). 426 */ 427if(!frame->child_prov_omit) 428return LOFR_MARK_SEEN; 429return LOFR_ZERO; 430 431case LOFS_BLOB: 432assert(obj->type == OBJ_BLOB); 433assert((obj->flags & SEEN) ==0); 434 435 frame = &filter_data->array_frame[filter_data->nr -1]; 436 437 dtype = DT_REG; 438 val =is_excluded_from_list(pathname,strlen(pathname), 439 filename, &dtype, &filter_data->el, 440 r->index); 441if(val <0) 442 val = frame->defval; 443if(val >0) { 444if(omits) 445oidset_remove(omits, &obj->oid); 446return LOFR_MARK_SEEN | LOFR_DO_SHOW; 447} 448 449/* 450 * Provisionally omit it. We've already established that 451 * this pathname is not in the sparse-checkout specification 452 * with the CURRENT pathname, so we *WANT* to omit this blob. 453 * 454 * However, a pathname elsewhere in the tree may also 455 * reference this same blob, so we cannot reject it yet. 456 * Leave the LOFR_ bits unset so that if the blob appears 457 * again in the traversal, we will be asked again. 458 */ 459if(omits) 460oidset_insert(omits, &obj->oid); 461 462/* 463 * Remember that at least 1 blob in this tree was 464 * provisionally omitted. This prevents us from short 465 * cutting the tree in future iterations. 466 */ 467 frame->child_prov_omit =1; 468return LOFR_ZERO; 469} 470} 471 472 473static voidfilter_sparse_free(void*filter_data) 474{ 475struct filter_sparse_data *d = filter_data; 476free(d->array_frame); 477free(d); 478} 479 480static voidfilter_sparse_oid__init( 481struct list_objects_filter_options *filter_options, 482struct filter *filter) 483{ 484struct filter_sparse_data *d =xcalloc(1,sizeof(*d)); 485if(add_excludes_from_blob_to_list(filter_options->sparse_oid_value, 486 NULL,0, &d->el) <0) 487die("could not load filter specification"); 488 489ALLOC_GROW(d->array_frame, d->nr +1, d->alloc); 490 d->array_frame[d->nr].defval =0;/* default to include */ 491 d->array_frame[d->nr].child_prov_omit =0; 492 d->nr++; 493 494 filter->filter_data = d; 495 filter->filter_object_fn = filter_sparse; 496 filter->free_fn = filter_sparse_free; 497} 498 499/* A filter which only shows objects shown by all sub-filters. */ 500struct combine_filter_data { 501struct subfilter *sub; 502size_t nr; 503}; 504 505static enum list_objects_filter_result process_subfilter( 506struct repository *r, 507enum list_objects_filter_situation filter_situation, 508struct object *obj, 509const char*pathname, 510const char*filename, 511struct subfilter *sub) 512{ 513enum list_objects_filter_result result; 514 515/* 516 * Check and update is_skipping_tree before oidset_contains so 517 * that is_skipping_tree gets unset even when the object is 518 * marked as seen. As of this writing, no filter uses 519 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the 520 * ordering is only theoretically important. Be cautious if you 521 * change the order of the below checks and more filters have 522 * been added! 523 */ 524if(sub->is_skipping_tree) { 525if(filter_situation == LOFS_END_TREE && 526oideq(&obj->oid, &sub->skip_tree)) 527 sub->is_skipping_tree =0; 528else 529return LOFR_ZERO; 530} 531if(oidset_contains(&sub->seen, &obj->oid)) 532return LOFR_ZERO; 533 534 result =list_objects_filter__filter_object( 535 r, filter_situation, obj, pathname, filename, sub->filter); 536 537if(result & LOFR_MARK_SEEN) 538oidset_insert(&sub->seen, &obj->oid); 539 540if(result & LOFR_SKIP_TREE) { 541 sub->is_skipping_tree =1; 542 sub->skip_tree = obj->oid; 543} 544 545return result; 546} 547 548static enum list_objects_filter_result filter_combine( 549struct repository *r, 550enum list_objects_filter_situation filter_situation, 551struct object *obj, 552const char*pathname, 553const char*filename, 554struct oidset *omits, 555void*filter_data) 556{ 557struct combine_filter_data *d = filter_data; 558enum list_objects_filter_result combined_result = 559 LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE; 560size_t sub; 561 562for(sub =0; sub < d->nr; sub++) { 563enum list_objects_filter_result sub_result =process_subfilter( 564 r, filter_situation, obj, pathname, filename, 565&d->sub[sub]); 566if(!(sub_result & LOFR_DO_SHOW)) 567 combined_result &= ~LOFR_DO_SHOW; 568if(!(sub_result & LOFR_MARK_SEEN)) 569 combined_result &= ~LOFR_MARK_SEEN; 570if(!d->sub[sub].is_skipping_tree) 571 combined_result &= ~LOFR_SKIP_TREE; 572} 573 574return combined_result; 575} 576 577static voidfilter_combine__free(void*filter_data) 578{ 579struct combine_filter_data *d = filter_data; 580size_t sub; 581for(sub =0; sub < d->nr; sub++) { 582list_objects_filter__free(d->sub[sub].filter); 583oidset_clear(&d->sub[sub].seen); 584if(d->sub[sub].omits.set.size) 585BUG("expected oidset to be cleared already"); 586} 587free(d->sub); 588} 589 590static voidadd_all(struct oidset *dest,struct oidset *src) { 591struct oidset_iter iter; 592struct object_id *src_oid; 593 594oidset_iter_init(src, &iter); 595while((src_oid =oidset_iter_next(&iter)) != NULL) 596oidset_insert(dest, src_oid); 597} 598 599static voidfilter_combine__finalize_omits( 600struct oidset *omits, 601void*filter_data) 602{ 603struct combine_filter_data *d = filter_data; 604size_t sub; 605 606for(sub =0; sub < d->nr; sub++) { 607add_all(omits, &d->sub[sub].omits); 608oidset_clear(&d->sub[sub].omits); 609} 610} 611 612static voidfilter_combine__init( 613struct list_objects_filter_options *filter_options, 614struct filter* filter) 615{ 616struct combine_filter_data *d =xcalloc(1,sizeof(*d)); 617size_t sub; 618 619 d->nr = filter_options->sub_nr; 620 d->sub =xcalloc(d->nr,sizeof(*d->sub)); 621for(sub =0; sub < d->nr; sub++) 622 d->sub[sub].filter =list_objects_filter__init( 623 filter->omits ? &d->sub[sub].omits : NULL, 624&filter_options->sub[sub]); 625 626 filter->filter_data = d; 627 filter->filter_object_fn = filter_combine; 628 filter->free_fn = filter_combine__free; 629 filter->finalize_omits_fn = filter_combine__finalize_omits; 630} 631 632typedefvoid(*filter_init_fn)( 633struct list_objects_filter_options *filter_options, 634struct filter *filter); 635 636/* 637 * Must match "enum list_objects_filter_choice". 638 */ 639static filter_init_fn s_filters[] = { 640 NULL, 641 filter_blobs_none__init, 642 filter_blobs_limit__init, 643 filter_trees_depth__init, 644 filter_sparse_oid__init, 645 filter_combine__init, 646}; 647 648struct filter *list_objects_filter__init( 649struct oidset *omitted, 650struct list_objects_filter_options *filter_options) 651{ 652struct filter *filter; 653 filter_init_fn init_fn; 654 655assert((sizeof(s_filters) /sizeof(s_filters[0])) == LOFC__COUNT); 656 657if(filter_options->choice >= LOFC__COUNT) 658BUG("invalid list-objects filter choice:%d", 659 filter_options->choice); 660 661 init_fn = s_filters[filter_options->choice]; 662if(!init_fn) 663return NULL; 664 665 filter =xcalloc(1,sizeof(*filter)); 666 filter->omits = omitted; 667init_fn(filter_options, filter); 668return filter; 669} 670 671enum list_objects_filter_result list_objects_filter__filter_object( 672struct repository *r, 673enum list_objects_filter_situation filter_situation, 674struct object *obj, 675const char*pathname, 676const char*filename, 677struct filter *filter) 678{ 679if(filter && (obj->flags & NOT_USER_GIVEN)) 680return filter->filter_object_fn(r, filter_situation, obj, 681 pathname, filename, 682 filter->omits, 683 filter->filter_data); 684/* 685 * No filter is active or user gave object explicitly. In this case, 686 * always show the object (except when LOFS_END_TREE, since this tree 687 * had already been shown when LOFS_BEGIN_TREE). 688 */ 689if(filter_situation == LOFS_END_TREE) 690return0; 691return LOFR_MARK_SEEN | LOFR_DO_SHOW; 692} 693 694voidlist_objects_filter__free(struct filter *filter) 695{ 696if(!filter) 697return; 698if(filter->finalize_omits_fn && filter->omits) 699 filter->finalize_omits_fn(filter->omits, filter->filter_data); 700 filter->free_fn(filter->filter_data); 701free(filter); 702}