From: Junio C Hamano Date: Sat, 11 Mar 2006 07:02:23 +0000 (-0800) Subject: Merge branch 'nh/http' into next X-Git-Tag: v1.3.0-rc1~37 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/c827a84c694f95a9c2e179192d34c33eccdbb10a?hp=1a703cba6dac85936a5026587a90dcb827d00088 Merge branch 'nh/http' into next * nh/http: http-push: cleanup http-push: support for updating remote info/refs http-push: improve remote lock management http-push: refactor remote file/directory processing HTTP slot reuse fixes http-push: fix revision walk --- diff --git a/Documentation/git-fsck-objects.txt b/Documentation/git-fsck-objects.txt index 387b435484..93ce9dcc92 100644 --- a/Documentation/git-fsck-objects.txt +++ b/Documentation/git-fsck-objects.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git-fsck-objects' [--tags] [--root] [--unreachable] [--cache] - [--standalone | --full] [--strict] [*] + [--full] [--strict] [*] DESCRIPTION ----------- @@ -38,21 +38,14 @@ index file and all SHA1 references in .git/refs/* as heads. Consider any object recorded in the index also as a head node for an unreachability trace. ---standalone:: - Limit checks to the contents of GIT_OBJECT_DIRECTORY - ($GIT_DIR/objects), making sure that it is consistent and - complete without referring to objects found in alternate - object pools listed in GIT_ALTERNATE_OBJECT_DIRECTORIES, - nor packed git archives found in $GIT_DIR/objects/pack; - cannot be used with --full. - --full:: Check not just objects in GIT_OBJECT_DIRECTORY ($GIT_DIR/objects), but also the ones found in alternate - object pools listed in GIT_ALTERNATE_OBJECT_DIRECTORIES, + object pools listed in GIT_ALTERNATE_OBJECT_DIRECTORIES + or $GIT_DIR/objects/info/alternates, and in packed git archives found in $GIT_DIR/objects/pack and corresponding pack subdirectories in alternate - object pools; cannot be used with --standalone. + object pools. --strict:: Enable more strict checking, namely to catch a file mode diff --git a/Makefile b/Makefile index 8a20c76e9e..0bdf03b7de 100644 --- a/Makefile +++ b/Makefile @@ -190,7 +190,7 @@ PYMODULES = \ LIB_FILE=libgit.a LIB_H = \ - blob.h cache.h commit.h count-delta.h csum-file.h delta.h \ + blob.h cache.h commit.h csum-file.h delta.h \ diff.h object.h pack.h pkt-line.h quote.h refs.h \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h @@ -200,7 +200,7 @@ DIFF_OBJS = \ diffcore-delta.o LIB_OBJS = \ - blob.o commit.o connect.o count-delta.o csum-file.o \ + blob.o commit.o connect.o csum-file.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ diff --git a/blame.c b/blame.c index 90338af31c..1fb507028b 100644 --- a/blame.c +++ b/blame.c @@ -14,6 +14,7 @@ #include "tree.h" #include "blob.h" #include "diff.h" +#include "diffcore.h" #include "revision.h" #define DEBUG 0 @@ -34,7 +35,9 @@ struct util_info { char *buf; unsigned long size; int num_lines; -// const char* path; + const char* pathname; + + void* topo_data; }; struct chunk { @@ -342,25 +345,34 @@ static int map_line(struct commit *commit, int line) return info->line_map[line]; } -static int fill_util_info(struct commit *commit, const char *path) +static struct util_info* get_util(struct commit *commit) { - struct util_info *util; - if (commit->object.util) - return 0; + struct util_info *util = commit->object.util; + + if (util) + return util; util = xmalloc(sizeof(struct util_info)); + util->buf = NULL; + util->size = 0; + util->line_map = NULL; + util->num_lines = -1; + util->pathname = NULL; + commit->object.util = util; + return util; +} + +static int fill_util_info(struct commit *commit) +{ + struct util_info *util = commit->object.util; + + assert(util); + assert(util->pathname); - if (get_blob_sha1(commit->tree, path, util->sha1)) { - free(util); + if (get_blob_sha1(commit->tree, util->pathname, util->sha1)) return 1; - } else { - util->buf = NULL; - util->size = 0; - util->line_map = NULL; - util->num_lines = -1; - commit->object.util = util; + else return 0; - } } static void alloc_line_map(struct commit *commit) @@ -389,10 +401,11 @@ static void alloc_line_map(struct commit *commit) static void init_first_commit(struct commit* commit, const char* filename) { - struct util_info* util; + struct util_info* util = commit->object.util; int i; - if (fill_util_info(commit, filename)) + util->pathname = filename; + if (fill_util_info(commit)) die("fill_util_info failed"); alloc_line_map(commit); @@ -453,7 +466,7 @@ static void process_commits(struct rev_info *rev, const char *path, if(num_parents == 0) *initial = commit; - if(fill_util_info(commit, path)) + if (fill_util_info(commit)) continue; alloc_line_map(commit); @@ -471,7 +484,7 @@ static void process_commits(struct rev_info *rev, const char *path, printf("parent: %s\n", sha1_to_hex(parent->object.sha1)); - if(fill_util_info(parent, path)) { + if (fill_util_info(parent)) { num_parents--; continue; } @@ -511,6 +524,135 @@ static void process_commits(struct rev_info *rev, const char *path, } while ((commit = get_revision(rev)) != NULL); } + +static int compare_tree_path(struct rev_info* revs, + struct commit* c1, struct commit* c2) +{ + const char* paths[2]; + struct util_info* util = c2->object.util; + paths[0] = util->pathname; + paths[1] = NULL; + + diff_tree_setup_paths(get_pathspec(revs->prefix, paths)); + return rev_compare_tree(c1->tree, c2->tree); +} + + +static int same_tree_as_empty_path(struct rev_info *revs, struct tree* t1, + const char* path) +{ + const char* paths[2]; + paths[0] = path; + paths[1] = NULL; + + diff_tree_setup_paths(get_pathspec(revs->prefix, paths)); + return rev_same_tree_as_empty(t1); +} + +static const char* find_rename(struct commit* commit, struct commit* parent) +{ + struct util_info* cutil = commit->object.util; + struct diff_options diff_opts; + const char *paths[1]; + int i; + + if (DEBUG) { + printf("find_rename commit: %s ", + sha1_to_hex(commit->object.sha1)); + puts(sha1_to_hex(parent->object.sha1)); + } + + diff_setup(&diff_opts); + diff_opts.recursive = 1; + diff_opts.detect_rename = DIFF_DETECT_RENAME; + paths[0] = NULL; + diff_tree_setup_paths(paths); + if (diff_setup_done(&diff_opts) < 0) + die("diff_setup_done failed"); + + diff_tree_sha1(commit->tree->object.sha1, parent->tree->object.sha1, + "", &diff_opts); + diffcore_std(&diff_opts); + + for (i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *p = diff_queued_diff.queue[i]; + + if (p->status == 'R' && !strcmp(p->one->path, cutil->pathname)) { + if (DEBUG) + printf("rename %s -> %s\n", p->one->path, p->two->path); + return p->two->path; + } + } + + return 0; +} + +static void simplify_commit(struct rev_info *revs, struct commit *commit) +{ + struct commit_list **pp, *parent; + + if (!commit->tree) + return; + + if (!commit->parents) { + struct util_info* util = commit->object.util; + if (!same_tree_as_empty_path(revs, commit->tree, + util->pathname)) + commit->object.flags |= TREECHANGE; + return; + } + + pp = &commit->parents; + while ((parent = *pp) != NULL) { + struct commit *p = parent->item; + + if (p->object.flags & UNINTERESTING) { + pp = &parent->next; + continue; + } + + parse_commit(p); + switch (compare_tree_path(revs, p, commit)) { + case REV_TREE_SAME: + parent->next = NULL; + commit->parents = parent; + get_util(p)->pathname = get_util(commit)->pathname; + return; + + case REV_TREE_NEW: + { + + struct util_info* util = commit->object.util; + if (revs->remove_empty_trees && + same_tree_as_empty_path(revs, p->tree, + util->pathname)) { + const char* new_name = find_rename(commit, p); + if (new_name) { + struct util_info* putil = get_util(p); + if (!putil->pathname) + putil->pathname = strdup(new_name); + } else { + *pp = parent->next; + continue; + } + } + } + + /* fallthrough */ + case REV_TREE_DIFFERENT: + pp = &parent->next; + if (!get_util(p)->pathname) + get_util(p)->pathname = + get_util(commit)->pathname; + continue; + } + die("bad tree compare for commit %s", + sha1_to_hex(commit->object.sha1)); + } + commit->object.flags |= TREECHANGE; +} + + struct commit_info { char* author; @@ -569,6 +711,18 @@ static const char* format_time(unsigned long time, const char* tz_str) return time_buf; } +static void topo_setter(struct commit* c, void* data) +{ + struct util_info* util = c->object.util; + util->topo_data = data; +} + +static void* topo_getter(struct commit* c) +{ + struct util_info* util = c->object.util; + return util->topo_data; +} + int main(int argc, const char **argv) { int i; @@ -580,8 +734,8 @@ int main(int argc, const char **argv) int sha1_len = 8; int compability = 0; int options = 1; + struct commit* start_commit; - int num_args; const char* args[10]; struct rev_info rev; @@ -634,28 +788,29 @@ int main(int argc, const char **argv) strcpy(filename_buf, filename); filename = filename_buf; - { - struct commit* c; - if (get_sha1(commit, sha1)) - die("get_sha1 failed, commit '%s' not found", commit); - c = lookup_commit_reference(sha1); - - if (fill_util_info(c, filename)) { - printf("%s not found in %s\n", filename, commit); - return 1; - } + if (get_sha1(commit, sha1)) + die("get_sha1 failed, commit '%s' not found", commit); + start_commit = lookup_commit_reference(sha1); + get_util(start_commit)->pathname = filename; + if (fill_util_info(start_commit)) { + printf("%s not found in %s\n", filename, commit); + return 1; } - num_args = 0; - args[num_args++] = NULL; - args[num_args++] = "--topo-order"; - args[num_args++] = "--remove-empty"; - args[num_args++] = commit; - args[num_args++] = "--"; - args[num_args++] = filename; - args[num_args] = NULL; - setup_revisions(num_args, args, &rev, "HEAD"); + init_revisions(&rev); + rev.remove_empty_trees = 1; + rev.topo_order = 1; + rev.prune_fn = simplify_commit; + rev.topo_setter = topo_setter; + rev.topo_getter = topo_getter; + rev.limited = 1; + + commit_list_insert(start_commit, &rev.commits); + + args[0] = filename; + args[1] = NULL; + diff_tree_setup_paths(args); prepare_revision_walk(&rev); process_commits(&rev, filename, &initial); @@ -665,17 +820,21 @@ int main(int argc, const char **argv) for (i = 0; i < num_blame_lines; i++) { struct commit *c = blame_lines[i]; + struct util_info* u; + if (!c) c = initial; + u = c->object.util; get_commit_info(c, &ci); fwrite(sha1_to_hex(c->object.sha1), sha1_len, 1, stdout); if(compability) printf("\t(%10s\t%10s\t%d)", ci.author, format_time(ci.author_time, ci.author_tz), i+1); else - printf(" (%-15.15s %10s %*d) ", ci.author, - format_time(ci.author_time, ci.author_tz), + printf(" %s (%-15.15s %10s %*d) ", u->pathname, + ci.author, format_time(ci.author_time, + ci.author_tz), max_digits, i+1); if(i == num_blame_lines - 1) { diff --git a/commit.c b/commit.c index 06d5439152..eb42d517a4 100644 --- a/commit.c +++ b/commit.c @@ -569,10 +569,28 @@ int count_parents(struct commit * commit) return count; } +void topo_sort_default_setter(struct commit *c, void *data) +{ + c->object.util = data; +} + +void *topo_sort_default_getter(struct commit *c) +{ + return c->object.util; +} + /* * Performs an in-place topological sort on the list supplied. */ void sort_in_topological_order(struct commit_list ** list, int lifo) +{ + sort_in_topological_order_fn(list, lifo, topo_sort_default_setter, + topo_sort_default_getter); +} + +void sort_in_topological_order_fn(struct commit_list ** list, int lifo, + topo_sort_set_fn_t setter, + topo_sort_get_fn_t getter) { struct commit_list * next = *list; struct commit_list * work = NULL, **insert; @@ -596,7 +614,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) next=*list; while (next) { next_nodes->list_item = next; - next->item->object.util = next_nodes; + setter(next->item, next_nodes); next_nodes++; next = next->next; } @@ -606,8 +624,8 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) struct commit_list * parents = next->item->parents; while (parents) { struct commit * parent=parents->item; - struct sort_node * pn = (struct sort_node *)parent->object.util; - + struct sort_node * pn = (struct sort_node *) getter(parent); + if (pn) pn->indegree++; parents=parents->next; @@ -624,7 +642,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) next=*list; insert = &work; while (next) { - struct sort_node * node = (struct sort_node *)next->item->object.util; + struct sort_node * node = (struct sort_node *) getter(next->item); if (node->indegree == 0) { insert = &commit_list_insert(next->item, insert)->next; @@ -637,15 +655,15 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) sort_by_date(&work); while (work) { struct commit * work_item = pop_commit(&work); - struct sort_node * work_node = (struct sort_node *)work_item->object.util; + struct sort_node * work_node = (struct sort_node *) getter(work_item); struct commit_list * parents = work_item->parents; while (parents) { struct commit * parent=parents->item; - struct sort_node * pn = (struct sort_node *)parent->object.util; - + struct sort_node * pn = (struct sort_node *) getter(parent); + if (pn) { - /* + /* * parents are only enqueued for emission * when all their children have been emitted thereby * guaranteeing topological order. @@ -667,7 +685,7 @@ void sort_in_topological_order(struct commit_list ** list, int lifo) *pptr = work_node->list_item; pptr = &(*pptr)->next; *pptr = NULL; - work_item->object.util = NULL; + setter(work_item, NULL); } free(nodes); } diff --git a/commit.h b/commit.h index 70a7c75e65..98682b232a 100644 --- a/commit.h +++ b/commit.h @@ -65,15 +65,29 @@ int count_parents(struct commit * commit); /* * Performs an in-place topological sort of list supplied. * - * Pre-conditions: + * Pre-conditions for sort_in_topological_order: * all commits in input list and all parents of those * commits must have object.util == NULL - * - * Post-conditions: + * + * Pre-conditions for sort_in_topological_order_fn: + * all commits in input list and all parents of those + * commits must have getter(commit) == NULL + * + * Post-conditions: * invariant of resulting list is: * a reachable from b => ord(b) < ord(a) * in addition, when lifo == 0, commits on parallel tracks are * sorted in the dates order. */ + +typedef void (*topo_sort_set_fn_t)(struct commit*, void *data); +typedef void* (*topo_sort_get_fn_t)(struct commit*); + +void topo_sort_default_setter(struct commit *c, void *data); +void *topo_sort_default_getter(struct commit *c); + void sort_in_topological_order(struct commit_list ** list, int lifo); +void sort_in_topological_order_fn(struct commit_list ** list, int lifo, + topo_sort_set_fn_t setter, + topo_sort_get_fn_t getter); #endif /* COMMIT_H */ diff --git a/count-delta.c b/count-delta.c deleted file mode 100644 index 058a2aadb1..0000000000 --- a/count-delta.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - * The delta-parsing part is almost straight copy of patch-delta.c - * which is (C) 2005 Nicolas Pitre . - */ -#include -#include -#include -#include "delta.h" -#include "count-delta.h" - -/* - * NOTE. We do not _interpret_ delta fully. As an approximation, we - * just count the number of bytes that are copied from the source, and - * the number of literal data bytes that are inserted. - * - * Number of bytes that are _not_ copied from the source is deletion, - * and number of inserted literal bytes are addition, so sum of them - * is the extent of damage. - */ -int count_delta(void *delta_buf, unsigned long delta_size, - unsigned long *src_copied, unsigned long *literal_added) -{ - unsigned long copied_from_source, added_literal; - const unsigned char *data, *top; - unsigned char cmd; - unsigned long src_size, dst_size, out; - - if (delta_size < DELTA_SIZE_MIN) - return -1; - - data = delta_buf; - top = delta_buf + delta_size; - - src_size = get_delta_hdr_size(&data); - dst_size = get_delta_hdr_size(&data); - - added_literal = copied_from_source = out = 0; - while (data < top) { - cmd = *data++; - if (cmd & 0x80) { - unsigned long cp_off = 0, cp_size = 0; - if (cmd & 0x01) cp_off = *data++; - if (cmd & 0x02) cp_off |= (*data++ << 8); - if (cmd & 0x04) cp_off |= (*data++ << 16); - if (cmd & 0x08) cp_off |= (*data++ << 24); - if (cmd & 0x10) cp_size = *data++; - if (cmd & 0x20) cp_size |= (*data++ << 8); - if (cmd & 0x40) cp_size |= (*data++ << 16); - if (cp_size == 0) cp_size = 0x10000; - - copied_from_source += cp_size; - out += cp_size; - } else { - /* write literal into dst */ - added_literal += cmd; - out += cmd; - data += cmd; - } - } - - /* sanity check */ - if (data != top || out != dst_size) - return -1; - - /* delete size is what was _not_ copied from source. - * edit size is that and literal additions. - */ - *src_copied = copied_from_source; - *literal_added = added_literal; - return 0; -} diff --git a/count-delta.h b/count-delta.h deleted file mode 100644 index 7359629827..0000000000 --- a/count-delta.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Copyright (C) 2005 Junio C Hamano - */ -#ifndef COUNT_DELTA_H -#define COUNT_DELTA_H - -int count_delta(void *, unsigned long, - unsigned long *src_copied, unsigned long *literal_added); - -#endif diff --git a/diffcore-break.c b/diffcore-break.c index 0fc2b860be..71ad58a25a 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, src_copied, literal_added; - int to_break = 0; + unsigned long delta_size, base_size, src_copied, literal_added, + src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" * is the default. @@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src, &src_copied, &literal_added)) return 0; + /* sanity */ + if (src->size < src_copied) + src_copied = src->size; + if (dst->size < literal_added + src_copied) { + if (src_copied < dst->size) + literal_added = dst->size - src_copied; + else + literal_added = 0; + } + src_removed = src->size - src_copied; + /* Compute merge-score, which is "how much is removed * from the source material". The clean-up stage will * merge the surviving pair together if the score is * less than the minimum, after rename/copy runs. */ - if (src->size <= src_copied) - ; /* all copied, nothing removed */ - else { - delta_size = src->size - src_copied; - *merge_score_p = delta_size * MAX_SCORE / src->size; - } - + *merge_score_p = src_removed * MAX_SCORE / src->size; + /* Extent of damage, which counts both inserts and * deletes. */ - if (src->size + literal_added <= src_copied) - delta_size = 0; /* avoid wrapping around */ - else - delta_size = (src->size - src_copied) + literal_added; - - /* We break if the edit exceeds the minimum. - * i.e. (break_score / MAX_SCORE < delta_size / base_size) + delta_size = src_removed + literal_added; + if (delta_size * MAX_SCORE / base_size < break_score) + return 0; + + /* If you removed a lot without adding new material, that is + * not really a rewrite. */ - if (break_score * base_size < delta_size * MAX_SCORE) - to_break = 1; + if ((src->size * break_score < src_removed * MAX_SCORE) && + (literal_added * 20 < src_removed) && + (literal_added * 20 < src_copied)) + return 0; - return to_break; + return 1; } void diffcore_break(int break_score) diff --git a/diffcore-delta.c b/diffcore-delta.c index 1e6a6911ec..70bacff837 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -1,32 +1,53 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" -#include "delta.h" -#include "count-delta.h" - -static int diffcore_count_changes_1(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, - unsigned long delta_limit, - unsigned long *src_copied, - unsigned long *literal_added) + +/* + * Idea here is very simple. + * + * We have total of (sz-N+1) N-byte overlapping sequences in buf whose + * size is sz. If the same N-byte sequence appears in both source and + * destination, we say the byte that starts that sequence is shared + * between them (i.e. copied from source to destination). + * + * For each possible N-byte sequence, if the source buffer has more + * instances of it than the destination buffer, that means the + * difference are the number of bytes not copied from source to + * destination. If the counts are the same, everything was copied + * from source to destination. If the destination has more, + * everything was copied, and destination added more. + * + * We are doing an approximation so we do not really have to waste + * memory by actually storing the sequence. We just hash them into + * somewhere around 2^16 hashbuckets and count the occurrences. + * + * The length of the sequence is arbitrarily set to 8 for now. + */ + +#define HASHBASE 65537 /* next_prime(2^16) */ + +static void hash_chars(unsigned char *buf, unsigned long sz, int *count) { - void *delta; - unsigned long delta_size; - - delta = diff_delta(src, src_size, - dst, dst_size, - &delta_size, delta_limit); - if (!delta) - /* If delta_limit is exceeded, we have too much differences */ - return -1; + unsigned int accum1, accum2, i; - /* Estimate the edit size by interpreting delta. */ - if (count_delta(delta, delta_size, src_copied, literal_added)) { - free(delta); - return -1; + /* an 8-byte shift register made of accum1 and accum2. New + * bytes come at LSB of accum2, and shifted up to accum1 + */ + for (i = accum1 = accum2 = 0; i < 7; i++, sz--) { + accum1 = (accum1 << 8) | (accum2 >> 24); + accum2 = (accum2 << 8) | *buf++; + } + while (sz) { + accum1 = (accum1 << 8) | (accum2 >> 24); + accum2 = (accum2 << 8) | *buf++; + /* We want something that hashes permuted byte + * sequences nicely; simpler hash like (accum1 ^ + * accum2) does not perform as well. + */ + i = (accum1 + accum2 * 0x61) % HASHBASE; + count[i]++; + sz--; } - free(delta); - return 0; } int diffcore_count_changes(void *src, unsigned long src_size, @@ -35,9 +56,28 @@ int diffcore_count_changes(void *src, unsigned long src_size, unsigned long *src_copied, unsigned long *literal_added) { - return diffcore_count_changes_1(src, src_size, - dst, dst_size, - delta_limit, - src_copied, - literal_added); + int *src_count, *dst_count, i; + unsigned long sc, la; + + if (src_size < 8 || dst_size < 8) + return -1; + + src_count = xcalloc(HASHBASE * 2, sizeof(int)); + dst_count = src_count + HASHBASE; + hash_chars(src, src_size, src_count); + hash_chars(dst, dst_size, dst_count); + + sc = la = 0; + for (i = 0; i < HASHBASE; i++) { + if (src_count[i] < dst_count[i]) { + la += dst_count[i] - src_count[i]; + sc += src_count[i]; + } + else /* i.e. if (dst_count[i] <= src_count[i]) */ + sc += dst_count[i]; + } + *src_copied = sc; + *literal_added = la; + free(src_count); + return 0; } diff --git a/diffcore-rename.c b/diffcore-rename.c index 55cf1c37f3..625b589fb7 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -170,19 +170,15 @@ static int estimate_similarity(struct diff_filespec *src, &src_copied, &literal_added)) return 0; - /* Extent of damage */ - if (src->size + literal_added < src_copied) - delta_size = 0; - else - delta_size = (src->size - src_copied) + literal_added; - - /* - * Now we will give some score to it. 100% edit gets 0 points - * and 0% edit gets MAX_SCORE points. + /* How similar are they? + * what percentage of material in dst are from source? */ - score = MAX_SCORE - (MAX_SCORE * delta_size / base_size); - if (score < 0) return 0; - if (MAX_SCORE < score) return MAX_SCORE; + if (dst->size < src_copied) + score = MAX_SCORE; + else if (!dst->size) + score = 0; /* should not happen */ + else + score = src_copied * MAX_SCORE / dst->size; return score; } diff --git a/diffcore.h b/diffcore.h index dba4f17658..d31b3b476c 100644 --- a/diffcore.h +++ b/diffcore.h @@ -17,8 +17,8 @@ */ #define MAX_SCORE 60000.0 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ -#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/ -#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/ +#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */ +#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ diff --git a/fsck-objects.c b/fsck-objects.c index 4ddd67699c..59b25904cb 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -14,10 +14,9 @@ static int show_root = 0; static int show_tags = 0; static int show_unreachable = 0; -static int standalone = 0; static int check_full = 0; static int check_strict = 0; -static int keep_cache_objects = 0; +static int keep_cache_objects = 0; static unsigned char head_sha1[20]; #ifdef NO_D_INO_IN_DIRENT @@ -68,7 +67,7 @@ static void check_connectivity(void) continue; if (!obj->parsed) { - if (!standalone && has_sha1_file(obj->sha1)) + if (has_sha1_file(obj->sha1)) ; /* it is in pack */ else printf("missing %s %s\n", @@ -82,7 +81,7 @@ static void check_connectivity(void) for (j = 0; j < refs->count; j++) { struct object *ref = refs->ref[j]; if (ref->parsed || - (!standalone && has_sha1_file(ref->sha1))) + (has_sha1_file(ref->sha1))) continue; printf("broken link from %7s %s\n", obj->type, sha1_to_hex(obj->sha1)); @@ -390,7 +389,7 @@ static int fsck_handle_ref(const char *refname, const unsigned char *sha1) obj = lookup_object(sha1); if (!obj) { - if (!standalone && has_sha1_file(sha1)) { + if (has_sha1_file(sha1)) { default_refs++; return 0; /* it is in a pack */ } @@ -464,10 +463,6 @@ int main(int argc, char **argv) keep_cache_objects = 1; continue; } - if (!strcmp(arg, "--standalone")) { - standalone = 1; - continue; - } if (!strcmp(arg, "--full")) { check_full = 1; continue; @@ -477,14 +472,9 @@ int main(int argc, char **argv) continue; } if (*arg == '-') - usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--standalone | --full] [--strict] *]"); + usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--full] [--strict] *]"); } - if (standalone && check_full) - die("Only one of --standalone or --full can be used."); - if (standalone) - putenv("GIT_ALTERNATE_OBJECT_DIRECTORIES="); - fsck_head_link(); fsck_object_dir(get_object_directory()); if (check_full) { diff --git a/git-fetch.sh b/git-fetch.sh index 0346d4a45c..c0eb96752e 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -94,6 +94,9 @@ append_fetch_head () { # remote-nick is the URL given on the command line (or a shorthand) # remote-name is the $GIT_DIR relative refs/ path we computed # for this refspec. + + # the $note_ variable will be fed to git-fmt-merge-msg for further + # processing. case "$remote_name_" in HEAD) note_= ;; @@ -103,6 +106,9 @@ append_fetch_head () { refs/tags/*) note_="$(expr "$remote_name_" : 'refs/tags/\(.*\)')" note_="tag '$note_' of " ;; + refs/remotes/*) + note_="$(expr "$remote_name_" : 'refs/remotes/\(.*\)')" + note_="remote branch '$note_' of " ;; *) note_="$remote_name of " ;; esac @@ -147,10 +153,10 @@ fast_forward_local () { else echo >&2 "* $1: storing $3" fi - git-update-ref "$1" "$2" + git-update-ref "$1" "$2" ;; - refs/heads/*) + refs/heads/* | refs/remotes/*) # $1 is the ref being updated. # $2 is the new value for the ref. local=$(git-rev-parse --verify "$1^0" 2>/dev/null) diff --git a/git-fmt-merge-msg.perl b/git-fmt-merge-msg.perl index afe80e6321..5986e5414a 100755 --- a/git-fmt-merge-msg.perl +++ b/git-fmt-merge-msg.perl @@ -75,6 +75,7 @@ sub shortlog { $src{$src} = { BRANCH => [], TAG => [], + R_BRANCH => [], GENERIC => [], # &1 == has HEAD. # &2 == has others. @@ -91,6 +92,11 @@ sub shortlog { push @{$src{$src}{TAG}}, $1; $src{$src}{HEAD_STATUS} |= 2; } + elsif (/^remote branch (.*)$/) { + $origin = $1; + push @{$src{$src}{R_BRANCH}}, $1; + $src{$src}{HEAD_STATUS} |= 2; + } elsif (/^HEAD$/) { $origin = $src; $src{$src}{HEAD_STATUS} |= 1; @@ -123,6 +129,8 @@ sub shortlog { } push @this, andjoin("branch ", "branches ", $src{$src}{BRANCH}); + push @this, andjoin("remote branch ", "remote branches ", + $src{$src}{R_BRANCH}); push @this, andjoin("tag ", "tags ", $src{$src}{TAG}); push @this, andjoin("commit ", "commits ", diff --git a/git-parse-remote.sh b/git-parse-remote.sh index 5f158c613f..63f22818e6 100755 --- a/git-parse-remote.sh +++ b/git-parse-remote.sh @@ -86,14 +86,14 @@ canon_refs_list_for_fetch () { local=$(expr "$ref" : '[^:]*:\(.*\)') case "$remote" in '') remote=HEAD ;; - refs/heads/* | refs/tags/*) ;; - heads/* | tags/* ) remote="refs/$remote" ;; + refs/heads/* | refs/tags/* | refs/remotes/*) ;; + heads/* | tags/* | remotes/* ) remote="refs/$remote" ;; *) remote="refs/heads/$remote" ;; esac case "$local" in '') local= ;; - refs/heads/* | refs/tags/*) ;; - heads/* | tags/* ) local="refs/$local" ;; + refs/heads/* | refs/tags/* | refs/remotes/*) ;; + heads/* | tags/* | remotes/* ) local="refs/$local" ;; *) local="refs/heads/$local" ;; esac diff --git a/pack-check.c b/pack-check.c index eca32b6cab..84ed90d369 100644 --- a/pack-check.c +++ b/pack-check.c @@ -70,13 +70,17 @@ static int verify_packfile(struct packed_git *p) } +#define MAX_CHAIN 40 + static void show_pack_info(struct packed_git *p) { struct pack_header *hdr; int nr_objects, i; + unsigned int chain_histogram[MAX_CHAIN]; hdr = p->pack_base; nr_objects = ntohl(hdr->hdr_entries); + memset(chain_histogram, 0, sizeof(chain_histogram)); for (i = 0; i < nr_objects; i++) { unsigned char sha1[20], base_sha1[20]; @@ -97,11 +101,25 @@ static void show_pack_info(struct packed_git *p) printf("%s ", sha1_to_hex(sha1)); if (!delta_chain_length) printf("%-6s %lu %u\n", type, size, e.offset); - else + else { printf("%-6s %lu %u %u %s\n", type, size, e.offset, delta_chain_length, sha1_to_hex(base_sha1)); + if (delta_chain_length < MAX_CHAIN) + chain_histogram[delta_chain_length]++; + else + chain_histogram[0]++; + } } + for (i = 0; i < MAX_CHAIN; i++) { + if (!chain_histogram[i]) + continue; + printf("chain length %s %d: %d object%s\n", + i ? "=" : ">=", + i ? i : MAX_CHAIN, + chain_histogram[i], + 1 < chain_histogram[i] ? "s" : ""); + } } int verify_pack(struct packed_git *p, int verbose) diff --git a/pack-objects.c b/pack-objects.c index 136a7f5aad..49357c6735 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -32,9 +32,6 @@ struct object_entry { * be used as the base objectto delta huge * objects against. */ - int based_on_preferred; /* current delta candidate is a preferred - * one, or delta against a preferred one. - */ }; /* @@ -824,8 +821,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de { struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; - int old_preferred = (old_entry->preferred_base || - old_entry->based_on_preferred); unsigned long size, oldsize, delta_size, sizediff; long max_size; void *delta_buf; @@ -867,27 +862,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de * delete). */ max_size = size / 2 - 20; - if (cur_entry->delta) { - if (cur_entry->based_on_preferred) { - if (old_preferred) - max_size = cur_entry->delta_size-1; - else - /* trying with non-preferred one when we - * already have a delta based on preferred - * one is pointless. - */ - return -1; - } - else if (!old_preferred) - max_size = cur_entry->delta_size-1; - else - /* otherwise... even if delta with a - * preferred one produces a bigger result than - * what we currently have, which is based on a - * non-preferred one, it is OK. - */ - ; - } + if (cur_entry->delta) + max_size = cur_entry->delta_size-1; if (sizediff >= max_size) return -1; delta_buf = diff_delta(old->data, oldsize, @@ -897,7 +873,6 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de cur_entry->delta = old_entry; cur_entry->delta_size = delta_size; cur_entry->depth = old_entry->depth + 1; - cur_entry->based_on_preferred = old_preferred; free(delta_buf); return 0; } @@ -966,6 +941,15 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (try_delta(n, m, depth) < 0) break; } +#if 0 + /* if we made n a delta, and if n is already at max + * depth, leaving it in the window is pointless. we + * should evict it first. + * ... in theory only; somehow this makes things worse. + */ + if (entry->delta && depth <= entry->depth) + continue; +#endif idx++; if (idx >= window) idx = 0; diff --git a/rev-list.c b/rev-list.c index 8e4d83efba..812d237f47 100644 --- a/rev-list.c +++ b/rev-list.c @@ -190,7 +190,7 @@ static int count_distance(struct commit_list *entry) if (commit->object.flags & (UNINTERESTING | COUNTED)) break; - if (!revs.paths || (commit->object.flags & TREECHANGE)) + if (!revs.prune_fn || (commit->object.flags & TREECHANGE)) nr++; commit->object.flags |= COUNTED; p = commit->parents; @@ -224,7 +224,7 @@ static struct commit_list *find_bisection(struct commit_list *list) nr = 0; p = list; while (p) { - if (!revs.paths || (p->item->object.flags & TREECHANGE)) + if (!revs.prune_fn || (p->item->object.flags & TREECHANGE)) nr++; p = p->next; } @@ -234,7 +234,7 @@ static struct commit_list *find_bisection(struct commit_list *list) for (p = list; p; p = p->next) { int distance; - if (revs.paths && !(p->item->object.flags & TREECHANGE)) + if (revs.prune_fn && !(p->item->object.flags & TREECHANGE)) continue; distance = count_distance(p); diff --git a/revision.c b/revision.c index c8d93ff106..01386ed6d4 100644 --- a/revision.c +++ b/revision.c @@ -199,31 +199,27 @@ static int everybody_uninteresting(struct commit_list *orig) return 1; } -#define TREE_SAME 0 -#define TREE_NEW 1 -#define TREE_DIFFERENT 2 -static int tree_difference = TREE_SAME; +static int tree_difference = REV_TREE_SAME; static void file_add_remove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, const char *base, const char *path) { - int diff = TREE_DIFFERENT; + int diff = REV_TREE_DIFFERENT; /* - * Is it an add of a new file? It means that - * the old tree didn't have it at all, so we - * will turn "TREE_SAME" -> "TREE_NEW", but - * leave any "TREE_DIFFERENT" alone (and if - * it already was "TREE_NEW", we'll keep it - * "TREE_NEW" of course). + * Is it an add of a new file? It means that the old tree + * didn't have it at all, so we will turn "REV_TREE_SAME" -> + * "REV_TREE_NEW", but leave any "REV_TREE_DIFFERENT" alone + * (and if it already was "REV_TREE_NEW", we'll keep it + * "REV_TREE_NEW" of course). */ if (addremove == '+') { diff = tree_difference; - if (diff != TREE_SAME) + if (diff != REV_TREE_SAME) return; - diff = TREE_NEW; + diff = REV_TREE_NEW; } tree_difference = diff; } @@ -234,7 +230,7 @@ static void file_change(struct diff_options *options, const unsigned char *new_sha1, const char *base, const char *path) { - tree_difference = TREE_DIFFERENT; + tree_difference = REV_TREE_DIFFERENT; } static struct diff_options diff_opt = { @@ -243,19 +239,19 @@ static struct diff_options diff_opt = { .change = file_change, }; -static int compare_tree(struct tree *t1, struct tree *t2) +int rev_compare_tree(struct tree *t1, struct tree *t2) { if (!t1) - return TREE_NEW; + return REV_TREE_NEW; if (!t2) - return TREE_DIFFERENT; - tree_difference = TREE_SAME; + return REV_TREE_DIFFERENT; + tree_difference = REV_TREE_SAME; if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0) - return TREE_DIFFERENT; + return REV_TREE_DIFFERENT; return tree_difference; } -static int same_tree_as_empty(struct tree *t1) +int rev_same_tree_as_empty(struct tree *t1) { int retval; void *tree; @@ -288,7 +284,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) return; if (!commit->parents) { - if (!same_tree_as_empty(commit->tree)) + if (!rev_same_tree_as_empty(commit->tree)) commit->object.flags |= TREECHANGE; return; } @@ -298,8 +294,8 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) struct commit *p = parent->item; parse_commit(p); - switch (compare_tree(p->tree, commit->tree)) { - case TREE_SAME: + switch (rev_compare_tree(p->tree, commit->tree)) { + case REV_TREE_SAME: if (p->object.flags & UNINTERESTING) { /* Even if a merge with an uninteresting * side branch brought the entire change @@ -314,13 +310,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) commit->parents = parent; return; - case TREE_NEW: - if (revs->remove_empty_trees && same_tree_as_empty(p->tree)) { + case REV_TREE_NEW: + if (revs->remove_empty_trees && + rev_same_tree_as_empty(p->tree)) { *pp = parent->next; continue; } /* fallthrough */ - case TREE_DIFFERENT: + case REV_TREE_DIFFERENT: tree_changed = 1; pp = &parent->next; continue; @@ -368,8 +365,8 @@ static void add_parents_to_list(struct rev_info *revs, struct commit *commit, st * simplify the commit history and find the parent * that has no differences in the path set if one exists. */ - if (revs->paths) - try_to_simplify_commit(revs, commit); + if (revs->prune_fn) + revs->prune_fn(revs, commit); parent = commit->parents; while (parent) { @@ -391,9 +388,6 @@ static void limit_list(struct rev_info *revs) struct commit_list *newlist = NULL; struct commit_list **p = &newlist; - if (revs->paths) - diff_tree_setup_paths(revs->paths); - while (list) { struct commit_list *entry = list; struct commit *commit = list->item; @@ -445,6 +439,23 @@ static void handle_all(struct rev_info *revs, unsigned flags) for_each_ref(handle_one_ref); } +void init_revisions(struct rev_info *revs) +{ + memset(revs, 0, sizeof(*revs)); + revs->lifo = 1; + revs->dense = 1; + revs->prefix = setup_git_directory(); + revs->max_age = -1; + revs->min_age = -1; + revs->max_count = -1; + + revs->prune_fn = NULL; + revs->prune_data = NULL; + + revs->topo_setter = topo_sort_default_setter; + revs->topo_getter = topo_sort_default_getter; +} + /* * Parse revision information, filling in the "rev_info" structure, * and removing the used arguments from the argument list. @@ -458,13 +469,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch const char **unrecognized = argv + 1; int left = 1; - memset(revs, 0, sizeof(*revs)); - revs->lifo = 1; - revs->dense = 1; - revs->prefix = setup_git_directory(); - revs->max_age = -1; - revs->min_age = -1; - revs->max_count = -1; + init_revisions(revs); /* First, search for "--" */ seen_dashdash = 0; @@ -474,7 +479,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch continue; argv[i] = NULL; argc = i; - revs->paths = get_pathspec(revs->prefix, argv + i + 1); + revs->prune_data = get_pathspec(revs->prefix, argv + i + 1); seen_dashdash = 1; break; } @@ -638,7 +643,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch if (lstat(argv[j], &st) < 0) die("'%s': %s", arg, strerror(errno)); } - revs->paths = get_pathspec(revs->prefix, argv + i); + revs->prune_data = get_pathspec(revs->prefix, argv + i); break; } commit = get_commit_reference(revs, arg, sha1, flags ^ local_flags); @@ -652,8 +657,13 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch commit = get_commit_reference(revs, def, sha1, 0); add_one_commit(commit, revs); } - if (revs->paths) + + if (revs->prune_data) { + diff_tree_setup_paths(revs->prune_data); + revs->prune_fn = try_to_simplify_commit; revs->limited = 1; + } + return left; } @@ -663,7 +673,9 @@ void prepare_revision_walk(struct rev_info *revs) if (revs->limited) limit_list(revs); if (revs->topo_order) - sort_in_topological_order(&revs->commits, revs->lifo); + sort_in_topological_order_fn(&revs->commits, revs->lifo, + revs->topo_setter, + revs->topo_getter); } static int rewrite_one(struct commit **pp) @@ -719,7 +731,7 @@ struct commit *get_revision(struct rev_info *revs) return NULL; if (revs->no_merges && commit->parents && commit->parents->next) goto next; - if (revs->paths && revs->dense) { + if (revs->prune_fn && revs->dense) { if (!(commit->object.flags & TREECHANGE)) goto next; rewrite_parents(commit); diff --git a/revision.h b/revision.h index 31e8f61567..6c2becad13 100644 --- a/revision.h +++ b/revision.h @@ -7,6 +7,10 @@ #define SHOWN (1u<<3) #define TMP_MARK (1u<<4) /* for isolated cases; clean after use */ +struct rev_info; + +typedef void (prune_fn_t)(struct rev_info *revs, struct commit *commit); + struct rev_info { /* Starting list */ struct commit_list *commits; @@ -14,7 +18,8 @@ struct rev_info { /* Basic information */ const char *prefix; - const char **paths; + void *prune_data; + prune_fn_t *prune_fn; /* Traversal flags */ unsigned int dense:1, @@ -33,9 +38,20 @@ struct rev_info { int max_count; unsigned long max_age; unsigned long min_age; + + topo_sort_set_fn_t topo_setter; + topo_sort_get_fn_t topo_getter; }; +#define REV_TREE_SAME 0 +#define REV_TREE_NEW 1 +#define REV_TREE_DIFFERENT 2 + /* revision.c */ +extern int rev_same_tree_as_empty(struct tree *t1); +extern int rev_compare_tree(struct tree *t1, struct tree *t2); + +extern void init_revisions(struct rev_info *revs); extern int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def); extern void prepare_revision_walk(struct rev_info *revs); extern struct commit *get_revision(struct rev_info *revs);