#include "string-list.h"
/*
- * Make sure "ref" is something reasonable to have under ".git/refs/";
- * We do not like it if:
+ * How to handle various characters in refnames:
+ * This table is used by both the SIMD and non-SIMD code. It has
+ * some cases that are only useful for the SIMD; these are handled
+ * equivalently to the listed disposition in the non-SIMD code.
+ * 0: An acceptable character for refs
+ * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
+ * 2: \0: End-of-component and string
+ * 3: /: End-of-component (SIMD or = 2)
+ * 4: ., look for a preceding . to reject .. in refs
+ * 5: {, look for a preceding @ to reject @{ in refs
+ * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
+ * 7: A bad character except * (see check_refname_component below)
+ */
+static unsigned char refname_disposition[256] = {
+ 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 4, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 7, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 7
+};
+
+/*
+ * Try to read one refname component from the front of refname.
+ * Return the length of the component found, or -1 if the component is
+ * not legal. It is legal if it is something reasonable to have under
+ * ".git/refs/"; We do not like it if:
*
* - any path component of it begins with ".", or
* - it has double dots "..", or
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
- * - it ends with a "/".
- * - it ends with ".lock"
+ * - it has pattern-matching notation "*", "?", "[", anywhere, or
+ * - it ends with a "/", or
+ * - it ends with ".lock", or
* - it contains a "\" (backslash)
*/
-
-/* Return true iff ch is not allowed in reference names. */
-static inline int bad_ref_char(int ch)
-{
- if (((unsigned) ch) <= ' ' || ch == 0x7f ||
- ch == '~' || ch == '^' || ch == ':' || ch == '\\')
- return 1;
- /* 2.13 Pattern Matching Notation */
- if (ch == '*' || ch == '?' || ch == '[') /* Unsupported */
- return 1;
- return 0;
-}
-
-/*
- * Try to read one refname component from the front of refname. Return
- * the length of the component found, or -1 if the component is not
- * legal.
- */
static int check_refname_component(const char *refname, int flags)
{
const char *cp;
char last = '\0';
for (cp = refname; ; cp++) {
- char ch = *cp;
- if (ch == '\0' || ch == '/')
+ int ch = *cp & 255;
+ unsigned char disp = refname_disposition[ch];
+ switch (disp) {
+ case 2: /* fall-through */
+ case 3:
+ goto out;
+ case 4:
+ if (last == '.')
+ return -1; /* Refname contains "..". */
+ break;
+ case 5:
+ if (last == '@')
+ return -1; /* Refname contains "@{". */
break;
- if (bad_ref_char(ch))
- return -1; /* Illegal character in refname. */
- if (last == '.' && ch == '.')
- return -1; /* Refname contains "..". */
- if (last == '@' && ch == '{')
- return -1; /* Refname contains "@{". */
+ case 6: /* fall-through */
+ case 7:
+ return -1;
+ }
last = ch;
}
+out:
if (cp == refname)
return 0; /* Component has zero length. */
if (refname[0] == '.') {
return cp - refname;
}
-int check_refname_format(const char *refname, int flags)
+static int check_refname_format_bytewise(const char *refname, int flags)
{
int component_len, component_count = 0;
return 0;
}
+#if defined(__GNUC__) && defined(__x86_64__)
+#define SSE_VECTOR_BYTES 16
+
+/* Vectorized version of check_refname_format. */
+int check_refname_format(const char *refname, int flags)
+{
+ const char *cp = refname;
+
+ const __m128i dot = _mm_set1_epi8('.');
+ const __m128i at = _mm_set1_epi8('@');
+ const __m128i curly = _mm_set1_epi8('{');
+ const __m128i slash = _mm_set1_epi8('/');
+ const __m128i zero = _mm_set1_epi8('\000');
+ const __m128i el = _mm_set1_epi8('l');
+
+ /* below '*', all characters are forbidden or rare */
+ const __m128i star_ub = _mm_set1_epi8('*' + 1);
+
+ const __m128i colon = _mm_set1_epi8(':');
+ const __m128i question = _mm_set1_epi8('?');
+
+ /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
+ const __m128i bracket_lb = _mm_set1_epi8('[' - 1);
+ const __m128i caret_ub = _mm_set1_epi8('^' + 1);
+
+ /* '~' and above are forbidden */
+ const __m128i tilde_lb = _mm_set1_epi8('~' - 1);
+
+ int component_count = 0;
+
+ if (refname[0] == 0 || refname[0] == '/') {
+ /* entirely empty ref or initial ref component */
+ return -1;
+ }
+
+ /*
+ * Initial ref component of '.'; below we look for /. so we'll
+ * miss this.
+ */
+ if (refname[0] == '.') {
+ if (refname[1] == '/' || refname[1] == '\0')
+ return -1;
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1;
+ }
+ while(1) {
+ __m128i tmp, tmp1, result;
+ uint64_t mask;
+
+ if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1)
+ /*
+ * End-of-page; fall back to slow method for
+ * this entire ref.
+ */
+ return check_refname_format_bytewise(refname, flags);
+
+ tmp = _mm_loadu_si128((__m128i *)cp);
+ tmp1 = _mm_loadu_si128((__m128i *)(cp + 1));
+
+ /*
+ * This range (note the lt) contains some
+ * permissible-but-rare characters (including all
+ * characters >= 128), which we handle later. It also
+ * includes \000.
+ */
+ result = _mm_cmplt_epi8(tmp, star_ub);
+
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question));
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon));
+
+ /* This range contains the permissible ] as bycatch */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpgt_epi8(tmp, bracket_lb),
+ _mm_cmplt_epi8(tmp, caret_ub)));
+
+ result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb));
+
+ /* .. */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, dot),
+ _mm_cmpeq_epi8(tmp1, dot)));
+ /* @{ */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, at),
+ _mm_cmpeq_epi8(tmp1, curly)));
+ /* // */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, slash)));
+ /* trailing / */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, zero)));
+ /* .l, beginning of .lock */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, dot),
+ _mm_cmpeq_epi8(tmp1, el)));
+ /*
+ * Even though /. is not necessarily an error, we flag
+ * it anyway. If we find it, we'll check if it's valid
+ * and if so we'll advance just past it.
+ */
+ result = _mm_or_si128(result, _mm_and_si128(
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, dot)));
+
+ mask = _mm_movemask_epi8(result);
+ if (mask) {
+ /*
+ * We've found either end-of-string, or some
+ * probably-bad character or substring.
+ */
+ int i = __builtin_ctz(mask);
+ switch (refname_disposition[cp[i] & 255]) {
+ case 0: /* fall-through */
+ case 5:
+ /*
+ * bycatch: a good character that's in
+ * one of the ranges of mostly-forbidden
+ * characters
+ */
+ cp += i + 1;
+ break;
+ case 1:
+ if (cp[i + 1] == '{')
+ return -1;
+ cp += i + 1;
+ break;
+ case 2:
+ if (!(flags & REFNAME_ALLOW_ONELEVEL)
+ && !component_count && !strchr(refname, '/'))
+ /* Refname has only one component. */
+ return -1;
+ return 0;
+ case 3:
+ component_count ++;
+ /*
+ * Even if leading dots are allowed, don't
+ * allow "." as a component (".." is
+ * prevented by case 4 below).
+ */
+ if (cp[i + 1] == '.') {
+ if (cp[i + 2] == '\0')
+ return -1;
+ if (flags & REFNAME_DOT_COMPONENT) {
+ /* skip to just after the /. */
+ cp += i + 2;
+ break;
+ }
+ return -1;
+ } else if (cp[i + 1] == '/' || cp[i + 1] == '\0')
+ return -1;
+ break;
+ case 4:
+ if (cp[i + 1] == '.' || cp[i + 1] == '\0')
+ return -1;
+ /* .lock as end-of-component or end-of-string */
+ if ((!strncmp(cp + i, ".lock", 5))
+ && (cp[i + 5] == '/' || cp[i + 5] == 0))
+ return -1;
+ cp += 1;
+ break;
+ case 6:
+ if (((cp == refname + i) || cp[i - 1] == '/')
+ && (cp[i + 1] == '/' || cp[i + 1] == 0))
+ if (flags & REFNAME_REFSPEC_PATTERN) {
+ flags &= ~REFNAME_REFSPEC_PATTERN;
+ /* restart after the * */
+ cp += i + 1;
+ continue;
+ }
+ /* fall-through */
+ case 7:
+ return -1;
+ }
+ } else
+ cp += SSE_VECTOR_BYTES;
+ }
+}
+
+#else
+
+int check_refname_format (const char *refname, int flags)
+{
+ return check_refname_format_bytewise(refname, flags);
+}
+
+#endif
+
struct ref_entry;
/*
struct warn_if_dangling_data {
FILE *fp;
const char *refname;
+ const struct string_list *refnames;
const char *msg_fmt;
};
return 0;
resolves_to = resolve_ref_unsafe(refname, junk, 0, NULL);
- if (!resolves_to || strcmp(resolves_to, d->refname))
+ if (!resolves_to
+ || (d->refname
+ ? strcmp(resolves_to, d->refname)
+ : !string_list_has_string(d->refnames, resolves_to))) {
return 0;
+ }
fprintf(d->fp, d->msg_fmt, refname);
fputc('\n', d->fp);
data.fp = fp;
data.refname = refname;
+ data.refnames = NULL;
+ data.msg_fmt = msg_fmt;
+ for_each_rawref(warn_if_dangling_symref, &data);
+}
+
+void warn_dangling_symrefs(FILE *fp, const char *msg_fmt, const struct string_list *refnames)
+{
+ struct warn_if_dangling_data data;
+
+ data.fp = fp;
+ data.refname = NULL;
+ data.refnames = refnames;
data.msg_fmt = msg_fmt;
for_each_rawref(warn_if_dangling_symref, &data);
}
return 0;
}
-static int repack_without_refs(const char **refnames, int n)
+int repack_without_refs(const char **refnames, int n)
{
struct ref_dir *packed;
struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
return 0;
}
-static char *ref_msg(const char *line, const char *endp)
-{
- const char *ep;
- line += 82;
- ep = memchr(line, '\n', endp - line);
- if (!ep)
- ep = endp;
- return xmemdupz(line, ep - line);
+struct read_ref_at_cb {
+ const char *refname;
+ unsigned long at_time;
+ int cnt;
+ int reccnt;
+ unsigned char *sha1;
+ int found_it;
+
+ unsigned char osha1[20];
+ unsigned char nsha1[20];
+ int tz;
+ unsigned long date;
+ char **msg;
+ unsigned long *cutoff_time;
+ int *cutoff_tz;
+ int *cutoff_cnt;
+};
+
+static int read_ref_at_ent(unsigned char *osha1, unsigned char *nsha1,
+ const char *email, unsigned long timestamp, int tz,
+ const char *message, void *cb_data)
+{
+ struct read_ref_at_cb *cb = cb_data;
+
+ cb->reccnt++;
+ cb->tz = tz;
+ cb->date = timestamp;
+
+ if (timestamp <= cb->at_time || cb->cnt == 0) {
+ if (cb->msg)
+ *cb->msg = xstrdup(message);
+ if (cb->cutoff_time)
+ *cb->cutoff_time = timestamp;
+ if (cb->cutoff_tz)
+ *cb->cutoff_tz = tz;
+ if (cb->cutoff_cnt)
+ *cb->cutoff_cnt = cb->reccnt - 1;
+ /*
+ * we have not yet updated cb->[n|o]sha1 so they still
+ * hold the values for the previous record.
+ */
+ if (!is_null_sha1(cb->osha1)) {
+ hashcpy(cb->sha1, nsha1);
+ if (hashcmp(cb->osha1, nsha1))
+ warning("Log for ref %s has gap after %s.",
+ cb->refname, show_date(cb->date, cb->tz, DATE_RFC2822));
+ }
+ else if (cb->date == cb->at_time)
+ hashcpy(cb->sha1, nsha1);
+ else if (hashcmp(nsha1, cb->sha1))
+ warning("Log for ref %s unexpectedly ended on %s.",
+ cb->refname, show_date(cb->date, cb->tz,
+ DATE_RFC2822));
+ hashcpy(cb->osha1, osha1);
+ hashcpy(cb->nsha1, nsha1);
+ cb->found_it = 1;
+ return 1;
+ }
+ hashcpy(cb->osha1, osha1);
+ hashcpy(cb->nsha1, nsha1);
+ if (cb->cnt > 0)
+ cb->cnt--;
+ return 0;
+}
+
+static int read_ref_at_ent_oldest(unsigned char *osha1, unsigned char *nsha1,
+ const char *email, unsigned long timestamp,
+ int tz, const char *message, void *cb_data)
+{
+ struct read_ref_at_cb *cb = cb_data;
+
+ if (cb->msg)
+ *cb->msg = xstrdup(message);
+ if (cb->cutoff_time)
+ *cb->cutoff_time = timestamp;
+ if (cb->cutoff_tz)
+ *cb->cutoff_tz = tz;
+ if (cb->cutoff_cnt)
+ *cb->cutoff_cnt = cb->reccnt;
+ hashcpy(cb->sha1, osha1);
+ if (is_null_sha1(cb->sha1))
+ hashcpy(cb->sha1, nsha1);
+ /* We just want the first entry */
+ return 1;
}
int read_ref_at(const char *refname, unsigned long at_time, int cnt,
unsigned char *sha1, char **msg,
unsigned long *cutoff_time, int *cutoff_tz, int *cutoff_cnt)
{
- const char *logfile, *logdata, *logend, *rec, *lastgt, *lastrec;
- char *tz_c;
- int logfd, tz, reccnt = 0;
- struct stat st;
- unsigned long date;
- unsigned char logged_sha1[20];
- void *log_mapped;
- size_t mapsz;
+ struct read_ref_at_cb cb;
- logfile = git_path("logs/%s", refname);
- logfd = open(logfile, O_RDONLY, 0);
- if (logfd < 0)
- die_errno("Unable to read log '%s'", logfile);
- fstat(logfd, &st);
- if (!st.st_size)
- die("Log %s is empty.", logfile);
- mapsz = xsize_t(st.st_size);
- log_mapped = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, logfd, 0);
- logdata = log_mapped;
- close(logfd);
+ memset(&cb, 0, sizeof(cb));
+ cb.refname = refname;
+ cb.at_time = at_time;
+ cb.cnt = cnt;
+ cb.msg = msg;
+ cb.cutoff_time = cutoff_time;
+ cb.cutoff_tz = cutoff_tz;
+ cb.cutoff_cnt = cutoff_cnt;
+ cb.sha1 = sha1;
+
+ for_each_reflog_ent_reverse(refname, read_ref_at_ent, &cb);
+
+ if (!cb.reccnt)
+ die("Log for %s is empty.", refname);
+ if (cb.found_it)
+ return 0;
+
+ for_each_reflog_ent(refname, read_ref_at_ent_oldest, &cb);
- lastrec = NULL;
- rec = logend = logdata + st.st_size;
- while (logdata < rec) {
- reccnt++;
- if (logdata < rec && *(rec-1) == '\n')
- rec--;
- lastgt = NULL;
- while (logdata < rec && *(rec-1) != '\n') {
- rec--;
- if (*rec == '>')
- lastgt = rec;
- }
- if (!lastgt)
- die("Log %s is corrupt.", logfile);
- date = strtoul(lastgt + 1, &tz_c, 10);
- if (date <= at_time || cnt == 0) {
- tz = strtoul(tz_c, NULL, 10);
- if (msg)
- *msg = ref_msg(rec, logend);
- if (cutoff_time)
- *cutoff_time = date;
- if (cutoff_tz)
- *cutoff_tz = tz;
- if (cutoff_cnt)
- *cutoff_cnt = reccnt - 1;
- if (lastrec) {
- if (get_sha1_hex(lastrec, logged_sha1))
- die("Log %s is corrupt.", logfile);
- if (get_sha1_hex(rec + 41, sha1))
- die("Log %s is corrupt.", logfile);
- if (hashcmp(logged_sha1, sha1)) {
- warning("Log %s has gap after %s.",
- logfile, show_date(date, tz, DATE_RFC2822));
- }
- }
- else if (date == at_time) {
- if (get_sha1_hex(rec + 41, sha1))
- die("Log %s is corrupt.", logfile);
- }
- else {
- if (get_sha1_hex(rec + 41, logged_sha1))
- die("Log %s is corrupt.", logfile);
- if (hashcmp(logged_sha1, sha1)) {
- warning("Log %s unexpectedly ended on %s.",
- logfile, show_date(date, tz, DATE_RFC2822));
- }
- }
- munmap(log_mapped, mapsz);
- return 0;
- }
- lastrec = rec;
- if (cnt > 0)
- cnt--;
- }
-
- rec = logdata;
- while (rec < logend && *rec != '>' && *rec != '\n')
- rec++;
- if (rec == logend || *rec == '\n')
- die("Log %s is corrupt.", logfile);
- date = strtoul(rec + 1, &tz_c, 10);
- tz = strtoul(tz_c, NULL, 10);
- if (get_sha1_hex(logdata, sha1))
- die("Log %s is corrupt.", logfile);
- if (is_null_sha1(sha1)) {
- if (get_sha1_hex(logdata + 41, sha1))
- die("Log %s is corrupt.", logfile);
- }
- if (msg)
- *msg = ref_msg(logdata, logend);
- munmap(log_mapped, mapsz);
-
- if (cutoff_time)
- *cutoff_time = date;
- if (cutoff_tz)
- *cutoff_tz = tz;
- if (cutoff_cnt)
- *cutoff_cnt = reccnt;
return 1;
}