1#include"cache.h" 2#include"pack-revindex.h" 3 4/* 5 * Pack index for existing packs give us easy access to the offsets into 6 * corresponding pack file where each object's data starts, but the entries 7 * do not store the size of the compressed representation (uncompressed 8 * size is easily available by examining the pack entry header). It is 9 * also rather expensive to find the sha1 for an object given its offset. 10 * 11 * The pack index file is sorted by object name mapping to offset; 12 * this revindex array is a list of offset/index_nr pairs 13 * ordered by offset, so if you know the offset of an object, next offset 14 * is where its packed representation ends and the index_nr can be used to 15 * get the object sha1 from the main index. 16 */ 17 18/* 19 * This is a least-significant-digit radix sort. 20 * 21 * It sorts each of the "n" items in "entries" by its offset field. The "max" 22 * parameter must be at least as large as the largest offset in the array, 23 * and lets us quit the sort early. 24 */ 25static voidsort_revindex(struct revindex_entry *entries,unsigned n, off_t max) 26{ 27/* 28 * We use a "digit" size of 16 bits. That keeps our memory 29 * usage reasonable, and we can generally (for a 4G or smaller 30 * packfile) quit after two rounds of radix-sorting. 31 */ 32#define DIGIT_SIZE (16) 33#define BUCKETS (1 << DIGIT_SIZE) 34/* 35 * We want to know the bucket that a[i] will go into when we are using 36 * the digit that is N bits from the (least significant) end. 37 */ 38#define BUCKET_FOR(a, i, bits) (((a)[(i)].offset >> (bits)) & (BUCKETS-1)) 39 40/* 41 * We need O(n) temporary storage. Rather than do an extra copy of the 42 * partial results into "entries", we sort back and forth between the 43 * real array and temporary storage. In each iteration of the loop, we 44 * keep track of them with alias pointers, always sorting from "from" 45 * to "to". 46 */ 47struct revindex_entry *tmp, *from, *to; 48int bits; 49unsigned*pos; 50 51ALLOC_ARRAY(pos, BUCKETS); 52ALLOC_ARRAY(tmp, n); 53 from = entries; 54 to = tmp; 55 56/* 57 * If (max >> bits) is zero, then we know that the radix digit we are 58 * on (and any higher) will be zero for all entries, and our loop will 59 * be a no-op, as everybody lands in the same zero-th bucket. 60 */ 61for(bits =0; max >> bits; bits += DIGIT_SIZE) { 62unsigned i; 63 64memset(pos,0, BUCKETS *sizeof(*pos)); 65 66/* 67 * We want pos[i] to store the index of the last element that 68 * will go in bucket "i" (actually one past the last element). 69 * To do this, we first count the items that will go in each 70 * bucket, which gives us a relative offset from the last 71 * bucket. We can then cumulatively add the index from the 72 * previous bucket to get the true index. 73 */ 74for(i =0; i < n; i++) 75 pos[BUCKET_FOR(from, i, bits)]++; 76for(i =1; i < BUCKETS; i++) 77 pos[i] += pos[i-1]; 78 79/* 80 * Now we can drop the elements into their correct buckets (in 81 * our temporary array). We iterate the pos counter backwards 82 * to avoid using an extra index to count up. And since we are 83 * going backwards there, we must also go backwards through the 84 * array itself, to keep the sort stable. 85 * 86 * Note that we use an unsigned iterator to make sure we can 87 * handle 2^32-1 objects, even on a 32-bit system. But this 88 * means we cannot use the more obvious "i >= 0" loop condition 89 * for counting backwards, and must instead check for 90 * wrap-around with UINT_MAX. 91 */ 92for(i = n -1; i != UINT_MAX; i--) 93 to[--pos[BUCKET_FOR(from, i, bits)]] = from[i]; 94 95/* 96 * Now "to" contains the most sorted list, so we swap "from" and 97 * "to" for the next iteration. 98 */ 99SWAP(from, to); 100} 101 102/* 103 * If we ended with our data in the original array, great. If not, 104 * we have to move it back from the temporary storage. 105 */ 106if(from != entries) 107COPY_ARRAY(entries, tmp, n); 108free(tmp); 109free(pos); 110 111#undef BUCKET_FOR 112#undef BUCKETS 113#undef DIGIT_SIZE 114} 115 116/* 117 * Ordered list of offsets of objects in the pack. 118 */ 119static voidcreate_pack_revindex(struct packed_git *p) 120{ 121unsigned num_ent = p->num_objects; 122unsigned i; 123const char*index = p->index_data; 124 125ALLOC_ARRAY(p->revindex, num_ent +1); 126 index +=4*256; 127 128if(p->index_version >1) { 129const uint32_t*off_32 = 130(uint32_t*)(index +8+ p->num_objects * (20+4)); 131const uint32_t*off_64 = off_32 + p->num_objects; 132for(i =0; i < num_ent; i++) { 133uint32_t off =ntohl(*off_32++); 134if(!(off &0x80000000)) { 135 p->revindex[i].offset = off; 136}else{ 137 p->revindex[i].offset = 138((uint64_t)ntohl(*off_64++)) <<32; 139 p->revindex[i].offset |= 140ntohl(*off_64++); 141} 142 p->revindex[i].nr = i; 143} 144}else{ 145for(i =0; i < num_ent; i++) { 146uint32_t hl = *((uint32_t*)(index +24* i)); 147 p->revindex[i].offset =ntohl(hl); 148 p->revindex[i].nr = i; 149} 150} 151 152/* This knows the pack format -- the 20-byte trailer 153 * follows immediately after the last object data. 154 */ 155 p->revindex[num_ent].offset = p->pack_size -20; 156 p->revindex[num_ent].nr = -1; 157sort_revindex(p->revindex, num_ent, p->pack_size); 158} 159 160voidload_pack_revindex(struct packed_git *p) 161{ 162if(!p->revindex) 163create_pack_revindex(p); 164} 165 166intfind_revindex_position(struct packed_git *p, off_t ofs) 167{ 168int lo =0; 169int hi = p->num_objects +1; 170struct revindex_entry *revindex = p->revindex; 171 172do{ 173unsigned mi = lo + (hi - lo) /2; 174if(revindex[mi].offset == ofs) { 175return mi; 176}else if(ofs < revindex[mi].offset) 177 hi = mi; 178else 179 lo = mi +1; 180}while(lo < hi); 181 182error("bad offset for revindex"); 183return-1; 184} 185 186struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs) 187{ 188int pos; 189 190load_pack_revindex(p); 191 pos =find_revindex_position(p, ofs); 192 193if(pos <0) 194return NULL; 195 196return p->revindex + pos; 197}