1#include"cache.h" 2#include"dir.h" 3#include"tag.h" 4#include"commit.h" 5#include"tree.h" 6#include"blob.h" 7#include"diff.h" 8#include"tree-walk.h" 9#include"revision.h" 10#include"list-objects.h" 11#include"list-objects-filter.h" 12#include"list-objects-filter-options.h" 13#include"oidmap.h" 14#include"oidset.h" 15#include"object-store.h" 16 17/* Remember to update object flag allocation in object.h */ 18/* 19 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects 20 * that have been shown, but should be revisited if they appear 21 * in the traversal (until we mark it SEEN). This is a way to 22 * let us silently de-dup calls to show() in the caller. This 23 * is subtly different from the "revision.h:SHOWN" and the 24 * "sha1-name.c:ONELINE_SEEN" bits. And also different from 25 * the non-de-dup usage in pack-bitmap.c 26 */ 27#define FILTER_SHOWN_BUT_REVISIT (1<<21) 28 29struct subfilter { 30struct filter *filter; 31struct oidset seen; 32struct oidset omits; 33struct object_id skip_tree; 34unsigned is_skipping_tree :1; 35}; 36 37struct filter { 38enumlist_objects_filter_result(*filter_object_fn)( 39struct repository *r, 40enum list_objects_filter_situation filter_situation, 41struct object *obj, 42const char*pathname, 43const char*filename, 44struct oidset *omits, 45void*filter_data); 46 47/* 48 * Optional. If this function is supplied and the filter needs 49 * to collect omits, then this function is called once before 50 * free_fn is called. 51 * 52 * This is required because the following two conditions hold: 53 * 54 * a. A tree filter can add and remove objects as an object 55 * graph is traversed. 56 * b. A combine filter's omit set is the union of all its 57 * subfilters, which may include tree: filters. 58 * 59 * As such, the omits sets must be separate sets, and can only 60 * be unioned after the traversal is completed. 61 */ 62void(*finalize_omits_fn)(struct oidset *omits,void*filter_data); 63 64void(*free_fn)(void*filter_data); 65 66void*filter_data; 67 68/* If non-NULL, the filter collects a list of the omitted OIDs here. */ 69struct oidset *omits; 70}; 71 72static enum list_objects_filter_result filter_blobs_none( 73struct repository *r, 74enum list_objects_filter_situation filter_situation, 75struct object *obj, 76const char*pathname, 77const char*filename, 78struct oidset *omits, 79void*filter_data_) 80{ 81switch(filter_situation) { 82default: 83BUG("unknown filter_situation:%d", filter_situation); 84 85case LOFS_BEGIN_TREE: 86assert(obj->type == OBJ_TREE); 87/* always include all tree objects */ 88return LOFR_MARK_SEEN | LOFR_DO_SHOW; 89 90case LOFS_END_TREE: 91assert(obj->type == OBJ_TREE); 92return LOFR_ZERO; 93 94case LOFS_BLOB: 95assert(obj->type == OBJ_BLOB); 96assert((obj->flags & SEEN) ==0); 97 98if(omits) 99oidset_insert(omits, &obj->oid); 100return LOFR_MARK_SEEN;/* but not LOFR_DO_SHOW (hard omit) */ 101} 102} 103 104static voidfilter_blobs_none__init( 105struct list_objects_filter_options *filter_options, 106struct filter *filter) 107{ 108 filter->filter_object_fn = filter_blobs_none; 109 filter->free_fn = free; 110} 111 112/* 113 * A filter for list-objects to omit ALL trees and blobs from the traversal. 114 * Can OPTIONALLY collect a list of the omitted OIDs. 115 */ 116struct filter_trees_depth_data { 117/* 118 * Maps trees to the minimum depth at which they were seen. It is not 119 * necessary to re-traverse a tree at deeper or equal depths than it has 120 * already been traversed. 121 * 122 * We can't use LOFR_MARK_SEEN for tree objects since this will prevent 123 * it from being traversed at shallower depths. 124 */ 125struct oidmap seen_at_depth; 126 127unsigned long exclude_depth; 128unsigned long current_depth; 129}; 130 131struct seen_map_entry { 132struct oidmap_entry base; 133size_t depth; 134}; 135 136/* Returns 1 if the oid was in the omits set before it was invoked. */ 137static intfilter_trees_update_omits( 138struct object *obj, 139struct oidset *omits, 140int include_it) 141{ 142if(!omits) 143return0; 144 145if(include_it) 146returnoidset_remove(omits, &obj->oid); 147else 148returnoidset_insert(omits, &obj->oid); 149} 150 151static enum list_objects_filter_result filter_trees_depth( 152struct repository *r, 153enum list_objects_filter_situation filter_situation, 154struct object *obj, 155const char*pathname, 156const char*filename, 157struct oidset *omits, 158void*filter_data_) 159{ 160struct filter_trees_depth_data *filter_data = filter_data_; 161struct seen_map_entry *seen_info; 162int include_it = filter_data->current_depth < 163 filter_data->exclude_depth; 164int filter_res; 165int already_seen; 166 167/* 168 * Note that we do not use _MARK_SEEN in order to allow re-traversal in 169 * case we encounter a tree or blob again at a shallower depth. 170 */ 171 172switch(filter_situation) { 173default: 174BUG("unknown filter_situation:%d", filter_situation); 175 176case LOFS_END_TREE: 177assert(obj->type == OBJ_TREE); 178 filter_data->current_depth--; 179return LOFR_ZERO; 180 181case LOFS_BLOB: 182filter_trees_update_omits(obj, omits, include_it); 183return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; 184 185case LOFS_BEGIN_TREE: 186 seen_info =oidmap_get( 187&filter_data->seen_at_depth, &obj->oid); 188if(!seen_info) { 189 seen_info =xcalloc(1,sizeof(*seen_info)); 190oidcpy(&seen_info->base.oid, &obj->oid); 191 seen_info->depth = filter_data->current_depth; 192oidmap_put(&filter_data->seen_at_depth, seen_info); 193 already_seen =0; 194}else{ 195 already_seen = 196 filter_data->current_depth >= seen_info->depth; 197} 198 199if(already_seen) { 200 filter_res = LOFR_SKIP_TREE; 201}else{ 202int been_omitted =filter_trees_update_omits( 203 obj, omits, include_it); 204 seen_info->depth = filter_data->current_depth; 205 206if(include_it) 207 filter_res = LOFR_DO_SHOW; 208else if(omits && !been_omitted) 209/* 210 * Must update omit information of children 211 * recursively; they have not been omitted yet. 212 */ 213 filter_res = LOFR_ZERO; 214else 215 filter_res = LOFR_SKIP_TREE; 216} 217 218 filter_data->current_depth++; 219return filter_res; 220} 221} 222 223static voidfilter_trees_free(void*filter_data) { 224struct filter_trees_depth_data *d = filter_data; 225if(!d) 226return; 227oidmap_free(&d->seen_at_depth,1); 228free(d); 229} 230 231static voidfilter_trees_depth__init( 232struct list_objects_filter_options *filter_options, 233struct filter *filter) 234{ 235struct filter_trees_depth_data *d =xcalloc(1,sizeof(*d)); 236oidmap_init(&d->seen_at_depth,0); 237 d->exclude_depth = filter_options->tree_exclude_depth; 238 d->current_depth =0; 239 240 filter->filter_data = d; 241 filter->filter_object_fn = filter_trees_depth; 242 filter->free_fn = filter_trees_free; 243} 244 245/* 246 * A filter for list-objects to omit large blobs. 247 * And to OPTIONALLY collect a list of the omitted OIDs. 248 */ 249struct filter_blobs_limit_data { 250unsigned long max_bytes; 251}; 252 253static enum list_objects_filter_result filter_blobs_limit( 254struct repository *r, 255enum list_objects_filter_situation filter_situation, 256struct object *obj, 257const char*pathname, 258const char*filename, 259struct oidset *omits, 260void*filter_data_) 261{ 262struct filter_blobs_limit_data *filter_data = filter_data_; 263unsigned long object_length; 264enum object_type t; 265 266switch(filter_situation) { 267default: 268BUG("unknown filter_situation:%d", filter_situation); 269 270case LOFS_BEGIN_TREE: 271assert(obj->type == OBJ_TREE); 272/* always include all tree objects */ 273return LOFR_MARK_SEEN | LOFR_DO_SHOW; 274 275case LOFS_END_TREE: 276assert(obj->type == OBJ_TREE); 277return LOFR_ZERO; 278 279case LOFS_BLOB: 280assert(obj->type == OBJ_BLOB); 281assert((obj->flags & SEEN) ==0); 282 283 t =oid_object_info(r, &obj->oid, &object_length); 284if(t != OBJ_BLOB) {/* probably OBJ_NONE */ 285/* 286 * We DO NOT have the blob locally, so we cannot 287 * apply the size filter criteria. Be conservative 288 * and force show it (and let the caller deal with 289 * the ambiguity). 290 */ 291goto include_it; 292} 293 294if(object_length < filter_data->max_bytes) 295goto include_it; 296 297if(omits) 298oidset_insert(omits, &obj->oid); 299return LOFR_MARK_SEEN;/* but not LOFR_DO_SHOW (hard omit) */ 300} 301 302include_it: 303if(omits) 304oidset_remove(omits, &obj->oid); 305return LOFR_MARK_SEEN | LOFR_DO_SHOW; 306} 307 308static voidfilter_blobs_limit__init( 309struct list_objects_filter_options *filter_options, 310struct filter *filter) 311{ 312struct filter_blobs_limit_data *d =xcalloc(1,sizeof(*d)); 313 d->max_bytes = filter_options->blob_limit_value; 314 315 filter->filter_data = d; 316 filter->filter_object_fn = filter_blobs_limit; 317 filter->free_fn = free; 318} 319 320/* 321 * A filter driven by a sparse-checkout specification to only 322 * include blobs that a sparse checkout would populate. 323 * 324 * The sparse-checkout spec can be loaded from a blob with the 325 * given OID or from a local pathname. We allow an OID because 326 * the repo may be bare or we may be doing the filtering on the 327 * server. 328 */ 329struct frame { 330/* 331 * default_match is the usual default include/exclude value that 332 * should be inherited as we recurse into directories based 333 * upon pattern matching of the directory itself or of a 334 * containing directory. 335 */ 336enum pattern_match_result default_match; 337 338/* 339 * 1 if the directory (recursively) contains any provisionally 340 * omitted objects. 341 * 342 * 0 if everything (recursively) contained in this directory 343 * has been explicitly included (SHOWN) in the result and 344 * the directory may be short-cut later in the traversal. 345 */ 346unsigned child_prov_omit :1; 347}; 348 349struct filter_sparse_data { 350struct pattern_list pl; 351 352size_t nr, alloc; 353struct frame *array_frame; 354}; 355 356static enum list_objects_filter_result filter_sparse( 357struct repository *r, 358enum list_objects_filter_situation filter_situation, 359struct object *obj, 360const char*pathname, 361const char*filename, 362struct oidset *omits, 363void*filter_data_) 364{ 365struct filter_sparse_data *filter_data = filter_data_; 366int dtype; 367struct frame *frame; 368enum pattern_match_result match; 369 370switch(filter_situation) { 371default: 372BUG("unknown filter_situation:%d", filter_situation); 373 374case LOFS_BEGIN_TREE: 375assert(obj->type == OBJ_TREE); 376 dtype = DT_DIR; 377 match =path_matches_pattern_list(pathname,strlen(pathname), 378 filename, &dtype, &filter_data->pl, 379 r->index); 380if(match == UNDECIDED) 381 match = filter_data->array_frame[filter_data->nr -1].default_match; 382 383ALLOC_GROW(filter_data->array_frame, filter_data->nr +1, 384 filter_data->alloc); 385 filter_data->array_frame[filter_data->nr].default_match = match; 386 filter_data->array_frame[filter_data->nr].child_prov_omit =0; 387 filter_data->nr++; 388 389/* 390 * A directory with this tree OID may appear in multiple 391 * places in the tree. (Think of a directory move or copy, 392 * with no other changes, so the OID is the same, but the 393 * full pathnames of objects within this directory are new 394 * and may match is_excluded() patterns differently.) 395 * So we cannot mark this directory as SEEN (yet), since 396 * that will prevent process_tree() from revisiting this 397 * tree object with other pathname prefixes. 398 * 399 * Only _DO_SHOW the tree object the first time we visit 400 * this tree object. 401 * 402 * We always show all tree objects. A future optimization 403 * may want to attempt to narrow this. 404 */ 405if(obj->flags & FILTER_SHOWN_BUT_REVISIT) 406return LOFR_ZERO; 407 obj->flags |= FILTER_SHOWN_BUT_REVISIT; 408return LOFR_DO_SHOW; 409 410case LOFS_END_TREE: 411assert(obj->type == OBJ_TREE); 412assert(filter_data->nr >1); 413 414 frame = &filter_data->array_frame[--filter_data->nr]; 415 416/* 417 * Tell our parent directory if any of our children were 418 * provisionally omitted. 419 */ 420 filter_data->array_frame[filter_data->nr -1].child_prov_omit |= 421 frame->child_prov_omit; 422 423/* 424 * If there are NO provisionally omitted child objects (ALL child 425 * objects in this folder were INCLUDED), then we can mark the 426 * folder as SEEN (so we will not have to revisit it again). 427 */ 428if(!frame->child_prov_omit) 429return LOFR_MARK_SEEN; 430return LOFR_ZERO; 431 432case LOFS_BLOB: 433assert(obj->type == OBJ_BLOB); 434assert((obj->flags & SEEN) ==0); 435 436 frame = &filter_data->array_frame[filter_data->nr -1]; 437 438 dtype = DT_REG; 439 match =path_matches_pattern_list(pathname,strlen(pathname), 440 filename, &dtype, &filter_data->pl, 441 r->index); 442if(match == UNDECIDED) 443 match = frame->default_match; 444if(match == MATCHED) { 445if(omits) 446oidset_remove(omits, &obj->oid); 447return LOFR_MARK_SEEN | LOFR_DO_SHOW; 448} 449 450/* 451 * Provisionally omit it. We've already established that 452 * this pathname is not in the sparse-checkout specification 453 * with the CURRENT pathname, so we *WANT* to omit this blob. 454 * 455 * However, a pathname elsewhere in the tree may also 456 * reference this same blob, so we cannot reject it yet. 457 * Leave the LOFR_ bits unset so that if the blob appears 458 * again in the traversal, we will be asked again. 459 */ 460if(omits) 461oidset_insert(omits, &obj->oid); 462 463/* 464 * Remember that at least 1 blob in this tree was 465 * provisionally omitted. This prevents us from short 466 * cutting the tree in future iterations. 467 */ 468 frame->child_prov_omit =1; 469return LOFR_ZERO; 470} 471} 472 473 474static voidfilter_sparse_free(void*filter_data) 475{ 476struct filter_sparse_data *d = filter_data; 477free(d->array_frame); 478free(d); 479} 480 481static voidfilter_sparse_oid__init( 482struct list_objects_filter_options *filter_options, 483struct filter *filter) 484{ 485struct filter_sparse_data *d =xcalloc(1,sizeof(*d)); 486if(add_patterns_from_blob_to_list(filter_options->sparse_oid_value, 487 NULL,0, &d->pl) <0) 488die("could not load filter specification"); 489 490ALLOC_GROW(d->array_frame, d->nr +1, d->alloc); 491 d->array_frame[d->nr].default_match =0;/* default to include */ 492 d->array_frame[d->nr].child_prov_omit =0; 493 d->nr++; 494 495 filter->filter_data = d; 496 filter->filter_object_fn = filter_sparse; 497 filter->free_fn = filter_sparse_free; 498} 499 500/* A filter which only shows objects shown by all sub-filters. */ 501struct combine_filter_data { 502struct subfilter *sub; 503size_t nr; 504}; 505 506static enum list_objects_filter_result process_subfilter( 507struct repository *r, 508enum list_objects_filter_situation filter_situation, 509struct object *obj, 510const char*pathname, 511const char*filename, 512struct subfilter *sub) 513{ 514enum list_objects_filter_result result; 515 516/* 517 * Check and update is_skipping_tree before oidset_contains so 518 * that is_skipping_tree gets unset even when the object is 519 * marked as seen. As of this writing, no filter uses 520 * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the 521 * ordering is only theoretically important. Be cautious if you 522 * change the order of the below checks and more filters have 523 * been added! 524 */ 525if(sub->is_skipping_tree) { 526if(filter_situation == LOFS_END_TREE && 527oideq(&obj->oid, &sub->skip_tree)) 528 sub->is_skipping_tree =0; 529else 530return LOFR_ZERO; 531} 532if(oidset_contains(&sub->seen, &obj->oid)) 533return LOFR_ZERO; 534 535 result =list_objects_filter__filter_object( 536 r, filter_situation, obj, pathname, filename, sub->filter); 537 538if(result & LOFR_MARK_SEEN) 539oidset_insert(&sub->seen, &obj->oid); 540 541if(result & LOFR_SKIP_TREE) { 542 sub->is_skipping_tree =1; 543 sub->skip_tree = obj->oid; 544} 545 546return result; 547} 548 549static enum list_objects_filter_result filter_combine( 550struct repository *r, 551enum list_objects_filter_situation filter_situation, 552struct object *obj, 553const char*pathname, 554const char*filename, 555struct oidset *omits, 556void*filter_data) 557{ 558struct combine_filter_data *d = filter_data; 559enum list_objects_filter_result combined_result = 560 LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE; 561size_t sub; 562 563for(sub =0; sub < d->nr; sub++) { 564enum list_objects_filter_result sub_result =process_subfilter( 565 r, filter_situation, obj, pathname, filename, 566&d->sub[sub]); 567if(!(sub_result & LOFR_DO_SHOW)) 568 combined_result &= ~LOFR_DO_SHOW; 569if(!(sub_result & LOFR_MARK_SEEN)) 570 combined_result &= ~LOFR_MARK_SEEN; 571if(!d->sub[sub].is_skipping_tree) 572 combined_result &= ~LOFR_SKIP_TREE; 573} 574 575return combined_result; 576} 577 578static voidfilter_combine__free(void*filter_data) 579{ 580struct combine_filter_data *d = filter_data; 581size_t sub; 582for(sub =0; sub < d->nr; sub++) { 583list_objects_filter__free(d->sub[sub].filter); 584oidset_clear(&d->sub[sub].seen); 585if(d->sub[sub].omits.set.size) 586BUG("expected oidset to be cleared already"); 587} 588free(d->sub); 589} 590 591static voidadd_all(struct oidset *dest,struct oidset *src) { 592struct oidset_iter iter; 593struct object_id *src_oid; 594 595oidset_iter_init(src, &iter); 596while((src_oid =oidset_iter_next(&iter)) != NULL) 597oidset_insert(dest, src_oid); 598} 599 600static voidfilter_combine__finalize_omits( 601struct oidset *omits, 602void*filter_data) 603{ 604struct combine_filter_data *d = filter_data; 605size_t sub; 606 607for(sub =0; sub < d->nr; sub++) { 608add_all(omits, &d->sub[sub].omits); 609oidset_clear(&d->sub[sub].omits); 610} 611} 612 613static voidfilter_combine__init( 614struct list_objects_filter_options *filter_options, 615struct filter* filter) 616{ 617struct combine_filter_data *d =xcalloc(1,sizeof(*d)); 618size_t sub; 619 620 d->nr = filter_options->sub_nr; 621 d->sub =xcalloc(d->nr,sizeof(*d->sub)); 622for(sub =0; sub < d->nr; sub++) 623 d->sub[sub].filter =list_objects_filter__init( 624 filter->omits ? &d->sub[sub].omits : NULL, 625&filter_options->sub[sub]); 626 627 filter->filter_data = d; 628 filter->filter_object_fn = filter_combine; 629 filter->free_fn = filter_combine__free; 630 filter->finalize_omits_fn = filter_combine__finalize_omits; 631} 632 633typedefvoid(*filter_init_fn)( 634struct list_objects_filter_options *filter_options, 635struct filter *filter); 636 637/* 638 * Must match "enum list_objects_filter_choice". 639 */ 640static filter_init_fn s_filters[] = { 641 NULL, 642 filter_blobs_none__init, 643 filter_blobs_limit__init, 644 filter_trees_depth__init, 645 filter_sparse_oid__init, 646 filter_combine__init, 647}; 648 649struct filter *list_objects_filter__init( 650struct oidset *omitted, 651struct list_objects_filter_options *filter_options) 652{ 653struct filter *filter; 654 filter_init_fn init_fn; 655 656assert((sizeof(s_filters) /sizeof(s_filters[0])) == LOFC__COUNT); 657 658if(filter_options->choice >= LOFC__COUNT) 659BUG("invalid list-objects filter choice:%d", 660 filter_options->choice); 661 662 init_fn = s_filters[filter_options->choice]; 663if(!init_fn) 664return NULL; 665 666 filter =xcalloc(1,sizeof(*filter)); 667 filter->omits = omitted; 668init_fn(filter_options, filter); 669return filter; 670} 671 672enum list_objects_filter_result list_objects_filter__filter_object( 673struct repository *r, 674enum list_objects_filter_situation filter_situation, 675struct object *obj, 676const char*pathname, 677const char*filename, 678struct filter *filter) 679{ 680if(filter && (obj->flags & NOT_USER_GIVEN)) 681return filter->filter_object_fn(r, filter_situation, obj, 682 pathname, filename, 683 filter->omits, 684 filter->filter_data); 685/* 686 * No filter is active or user gave object explicitly. In this case, 687 * always show the object (except when LOFS_END_TREE, since this tree 688 * had already been shown when LOFS_BEGIN_TREE). 689 */ 690if(filter_situation == LOFS_END_TREE) 691return0; 692return LOFR_MARK_SEEN | LOFR_DO_SHOW; 693} 694 695voidlist_objects_filter__free(struct filter *filter) 696{ 697if(!filter) 698return; 699if(filter->finalize_omits_fn && filter->omits) 700 filter->finalize_omits_fn(filter->omits, filter->filter_data); 701 filter->free_fn(filter->filter_data); 702free(filter); 703}