INSTALL?=install
RM ?= rm -f
DOC_REF = origin/man
+HTML_REF = origin/html
infodir?=$(prefix)/share/info
MAKEINFO=makeinfo
quick-install:
sh ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+quick-install-html:
+ sh ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
+
.PHONY: .FORCE-GIT-VERSION-FILE
* Many commands did not use the correct working tree location when used
with GIT_WORK_TREE environment settings.
+* Some systems needs to use compatibility fnmach and regex libraries
+ independent from each other; the compat/ area has been reorganized to
+ allow this.
+
* "git apply --unidiff-zero" incorrectly applied a -U0 patch that inserts
a new line before the second line.
* "git blame -c" did not exactly work like "git annotate" when range
boundaries are involved.
+* "git checkout file" when file is still unmerged checked out contents from
+ a random high order stage, which was confusing.
+
* "git clone $there $here/" with extra trailing slashes after explicit
local directory name $here did not work as expected.
+* "git diff" on tracked contents with CRLF line endings did not drive "less"
+ intelligently when showing added or removed lines.
+
* "git diff --dirstat -M" did not add changes in subdirectories up
correctly for renamed paths.
* "git gui" translation updates and i18n fixes.
+* "git index-pack" is more careful against disk corruption while completing
+ a thin pack.
+
* "git log -i --grep=pattern" did not ignore case; neither "git log -E
--grep=pattern" triggered extended regexp.
* "git log --pretty="%ad" --date=short" did not use short format when
showing the timestamp.
+* "git log --author=author" match incorrectly matched with the
+ timestamp part of "author " line in commit objects.
+
+* "git log -F --author=author" did not work at all.
+
* Build procedure for "git shell" that used stub versions of some
functions and globals was not understood by linkers on some platforms.
* "git stash" was fooled by a stat-dirty but otherwise unmodified paths
and refused to work until the user refreshed the index.
+* "git svn" was broken on Perl before 5.8 with recent fixes to reduce
+ use of temporary files.
+
* "git verify-pack -v" did not work correctly when given more than one
packfile.
--
exec >/var/tmp/1
-O=v1.6.0.1-61-g1eff26c
+O=v1.6.0.1-78-g3632cfc
echo O=$(git describe maint)
git shortlog --no-merges $O..maint
-
--- /dev/null
+GIT v1.6.0.3 Release Notes
+==========================
+
+Fixes since v1.6.0.2
+--------------------
+
+* "git archive --format=zip" did not honor core.autocrlf while
+ --format=tar did.
+
+* Continuing "git rebase -i" was very confused when the user left modified
+ files in the working tree while resolving conflicts.
+
+* Continuing "git rebase -i" was also very confused when the user left
+ some staged changes in the index after "edit".
+
+* Behaviour of "git diff --quiet" was inconsistent with "diff --exit-code"
+ with the output redirected to /dev/null.
+
+* "git stash apply sash@{1}" was fixed to error out. Prior versions
+ would have applied stash@{0} incorrectly.
+
+* "git for-each-ref --format=%(subject)" fixed for commits with no
+ no newline in the message body.
+
+* "git remote" fixed to protect printf from user input.
+
+* "git checkout -q" once again suppresses the locally modified file list.
+
+* Cross-directory renames are no longer used when creating packs. This
+ allows more graceful behavior on filesystems like sshfs.
+
+* Stale temporary files under $GIT_DIR/objects/pack are now cleaned up
+ automatically by "git prune".
+
+* "Git.pm" tests relied on unnecessarily more recent version of Perl.
+
+* "gitweb" triggered undef warning on commits without log messages.
+
+Many other documentation updates.
+
+--
+exec >/var/tmp/1
+O=v1.6.0.2-41-g7fe4a72
+echo O=$(git describe maint)
+git shortlog --no-merges $O..maint
* Most of the test scripts (but not the ones that try to run servers)
can be run in parallel.
+* Bash completion of refnames in a repository with massive number of
+ refs has been optimized.
+
(usability, bells and whistles)
+* When you mistype a command name, git helpfully suggests what it guesses
+ you might have meant to say. help.autocorrect configuration can be set
+ to a non-zero value to accept the suggestion when git can uniquely
+ guess.
+
+* "git bisect" is careful about a user mistake and suggests testing of
+ merge base first when good is not a strict ancestor of bad.
+
* "git checkout --track origin/hack" used to be a syntax error. It now
DWIMs to create a corresponding local branch "hack", i.e. acts as if you
said "git checkout --track -b hack origin/hack".
* "git diff" learned to put more sensible hunk headers for Python and
HTML contents.
+* "git diff" learned to vary the a/ vs b/ prefix depending on what are
+ being compared, controlled by diff.mnemonicprefix configuration.
+
+* "git for-each-ref" learned "refname:short" token that gives an
+ unambiguously abbreviated refname.
+
* "git help" learned to use GIT_MAN_VIEWER environment variable before
using "man" program.
* "git log" learned --simplify-merges, a milder variant of --full-history;
"gitk --simplify-merges" is easier to view than with --full-history.
+* "git log --pretty=format:" learned "%d" format element that inserts
+ names of tags that point at the commit.
+
* "git merge --squash" and "git merge --no-ff" into an unborn branch are
noticed as user errors.
--
exec >/var/tmp/1
-O=v1.6.0.1-266-gaf9552f
+O=v1.6.0.2-295-g34a5d35
echo O=$(git describe master)
git shortlog --no-merges $O..master ^maint
you want to use an external diff program only on a subset of
your files, you might want to use linkgit:gitattributes[5] instead.
+diff.mnemonicprefix::
+ If set, 'git-diff' uses a prefix pair that is different from the
+ standard "a/" and "b/" depending on what is being compared. When
+ this configuration is in effect, reverse diff output also swaps
+ the order of the prefixes:
+'git-diff';;
+ compares the (i)ndex and the (w)ork tree;
+'git-diff HEAD';;
+ compares a (c)ommit and the (w)ork tree;
+'git diff --cached';;
+ compares a (c)ommit and the (i)ndex;
+'git-diff HEAD:file1 file2';;
+ compares an (o)bject and a (w)ork tree entity;
+'git diff --no-index a b';;
+ compares two non-git things (1) and (2).
+
diff.renameLimit::
The number of files to consider when performing the copy/rename
detection; equivalent to the 'git-diff' option '-l'.
can be set with "--dirstat=limit". Changes in a child directory is not
counted for the parent directory, unless "--cumulative" is used.
+--dirstat-by-file[=limit]::
+ Same as --dirstat, but counts changed files instead of lines.
+
--summary::
Output a condensed summary of extended header information
such as creations, renames and mode changes.
[--allow-binary-replacement | --binary] [--reject] [-z]
[-pNUM] [-CNUM] [--inaccurate-eof] [--recount] [--cached]
[--whitespace=<nowarn|warn|fix|error|error-all>]
- [--exclude=PATH] [--directory=<root>] [--verbose] [<patch>...]
+ [--exclude=PATH] [--include=PATH] [--directory=<root>]
+ [--verbose] [<patch>...]
DESCRIPTION
-----------
be useful when importing patchsets, where you want to exclude certain
files or directories.
+--include=<path-pattern>::
+ Apply changes to files matching the given path pattern. This can
+ be useful when importing patchsets, where you want to include certain
+ files or directories.
++
+When --exclude and --include patterns are used, they are examined in the
+order they appear on the command line, and the first match determines if a
+patch to each path is used. A patch to a path that does not match any
+include/exclude pattern is used by default if there is no include pattern
+on the command line, and ignored if there is any include pattern.
+
--whitespace=<action>::
When applying a patch, detect a new or modified line that has
whitespace errors. What are considered whitespace errors is
refname::
The name of the ref (the part after $GIT_DIR/).
+ For a non-ambiguous short name of the ref append `:short`.
objecttype::
The type of the object (`blob`, `tree`, `commit`, `tag`).
0 nothing nothing nothing (does not happen)
1 nothing nothing exists use M
2 nothing exists nothing remove path from index
- 3 nothing exists exists use M
+ 3 nothing exists exists, use M if "initial checkout"
+ H == M keep index otherwise
+ exists fail
+ H != M
clean I==H I==M
------------------
merge, but it would not show in `git diff-index --cached $M`
output after two-tree merge.
+Case #3 is slightly tricky and needs explanation. The result from this
+rule logically should be to remove the path if the user staged the removal
+of the path and then swiching to a new branch. That however will prevent
+the initial checkout from happening, so the rule is modified to use M (new
+tree) only when the contents of the index is empty. Otherwise the removal
+of the path is kept as long as $H and $M are the same.
3-Way Merge
~~~~~~~~~~~
from the latter branch, using `rebase --onto`.
First let's assume your 'topic' is based on branch 'next'.
-For example feature developed in 'topic' depends on some
+For example, a feature developed in 'topic' depends on some
functionality which is found in 'next'.
------------
o---o---o topic
------------
-We would want to make 'topic' forked from branch 'master',
-for example because the functionality 'topic' branch depend on
-got merged into more stable 'master' branch, like this:
+We want to make 'topic' forked from branch 'master'; for example,
+because the functionality on which 'topic' depends was merged into the
+more stable 'master' branch. We want our tree to look like this:
------------
o---o---o---o---o master
linkgit:git-pack-objects[1].
-f::
- Pass the `--no-reuse-delta` option to 'git-pack-objects'. See
+ Pass the `--no-reuse-object` option to `git-pack-objects`, see
linkgit:git-pack-objects[1].
-q::
* lynx
* dillo
* open (this is the default under Mac OS X GUI)
+* start (this is the default under MinGW)
Custom commands may also be specified.
Note about konqueror
--------------------
-When 'konqueror' is specified by the a command line option or a
+When 'konqueror' is specified by a command line option or a
configuration variable, we launch 'kfmclient' to try to open the HTML
man page on an already opened konqueror in a new tab if possible.
branch of the `git.git` repository.
Documentation for older releases are available here:
-* link:v1.6.1/git.html[documentation for release 1.6.1]
+* link:v1.6.0.2/git.html[documentation for release 1.6.0.2]
* release notes for
- link:RelNotes-1.6.1.txt[1.6.1],
+ link:RelNotes-1.6.0.2.txt[1.6.0.2],
+ link:RelNotes-1.6.0.1.txt[1.6.0.1],
link:RelNotes-1.6.0.txt[1.6.0].
* link:v1.5.6.5/git.html[documentation for release 1.5.6.5]
Defining a custom hunk-header
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Each group of changes (called "hunk") in the textual diff output
+Each group of changes (called a "hunk") in the textual diff output
is prefixed with a line of the form:
@@ -k,l +n,m @@ TEXT
-The text is called 'hunk header', and by default a line that
-begins with an alphabet, an underscore or a dollar sign is used,
-which matches what GNU 'diff -p' output uses. This default
-selection however is not suited for some contents, and you can
-use customized pattern to make a selection.
+This is called a 'hunk header'. The "TEXT" portion is by default a line
+that begins with an alphabet, an underscore or a dollar sign; this
+matches what GNU 'diff -p' output uses. This default selection however
+is not suited for some contents, and you can use a customized pattern
+to make a selection.
-First in .gitattributes, you would assign the `diff` attribute
+First, in .gitattributes, you would assign the `diff` attribute
for paths.
------------------------
*.tex diff=tex
------------------------
-Then, you would define "diff.tex.funcname" configuration to
+Then, you would define a "diff.tex.funcname" configuration to
specify a regular expression that matches a line that you would
-want to appear as the hunk header, like this:
+want to appear as the hunk header "TEXT", like this:
------------------------
[diff "tex"]
- `html` suitable for HTML/XHTML documents.
-- `java` suitable for source code in the Java lanugage.
+- `java` suitable for source code in the Java language.
- `pascal` suitable for source code in the Pascal/Delphi language.
- 'git-diff-tree' compares contents of two "tree" objects;
-In all of these cases, the commands themselves compare
-corresponding paths in the two sets of files. The result of
-comparison is passed from these commands to what is internally
-called "diffcore", in a format similar to what is output when
-the -p option is not used. E.g.
+In all of these cases, the commands themselves first optionally limit
+the two sets of files by any pathspecs given on their command-lines,
+and compare corresponding paths in the two resulting sets of files.
+
+The pathspecs are used to limit the world diff operates in. They remove
+the filepairs outside the specified sets of pathnames. E.g. If the
+input set of filepairs included:
+
+------------------------------------------------
+:100644 100644 bcd1234... 0123456... M junkfile
+------------------------------------------------
+
+but the command invocation was `git diff-files myfile`, then the
+junkfile entry would be removed from the list because only "myfile"
+is under consideration.
+
+The result of comparison is passed from these commands to what is
+internally called "diffcore", in a format similar to what is output
+when the -p option is not used. E.g.
------------------------------------------------
in-place edit :100644 100644 bcd1234... 0123456... M file0
The diffcore mechanism is fed a list of such comparison results
(each of which is called "filepair", although at this point each
of them talks about a single file), and transforms such a list
-into another list. There are currently 6 such transformations:
+into another list. There are currently 5 such transformations:
-- diffcore-pathspec
- diffcore-break
- diffcore-rename
- diffcore-merge-broken
- diffcore-order
These are applied in sequence. The set of filepairs 'git-diff-{asterisk}'
-commands find are used as the input to diffcore-pathspec, and
-the output from diffcore-pathspec is used as the input to the
+commands find are used as the input to diffcore-break, and
+the output from diffcore-break is used as the input to the
next transformation. The final result is then passed to the
output routine and generates either diff-raw format (see Output
format sections of the manual for 'git-diff-{asterisk}' commands) or
diff-patch format.
-diffcore-pathspec: For Ignoring Files Outside Our Consideration
----------------------------------------------------------------
-
-The first transformation in the chain is diffcore-pathspec, and
-is controlled by giving the pathname parameters to the
-'git-diff-{asterisk}' commands on the command line. The pathspec is used
-to limit the world diff operates in. It removes the filepairs
-outside the specified set of pathnames. E.g. If the input set
-of filepairs included:
-
-------------------------------------------------
-:100644 100644 bcd1234... 0123456... M junkfile
-------------------------------------------------
-
-but the command invocation was `git diff-files myfile`, then the
-junkfile entry would be removed from the list because only "myfile"
-is under consideration.
-
-Implementation note. For performance reasons, 'git-diff-tree'
-uses the pathname parameters on the command line to cull set of
-filepairs it feeds the diffcore mechanism itself, and does not
-use diffcore-pathspec, but the end result is the same.
-
-
diffcore-break: For Splitting Up "Complete Rewrites"
----------------------------------------------------
merge.stat::
- Whether to print the diffstat between ORIG_HEAD and merge result
+ Whether to print the diffstat between ORIG_HEAD and the merge result
at the end of the merge. True by default.
merge.log::
- '%cr': committer date, relative
- '%ct': committer date, UNIX timestamp
- '%ci': committer date, ISO 8601 format
+- '%d': ref names, like the --decorate option of linkgit:git-log[1]
- '%e': encoding
- '%s': subject
- '%b': body
#!/bin/sh
GVF=GIT-VERSION-FILE
-DEF_VER=v1.6.0.GIT
+DEF_VER=v1.6.0.2.GIT
LF='
'
to do a global install, you can do
$ make prefix=/usr all doc info ;# as yourself
- # make prefix=/usr install install-doc install-info ;# as root
+ # make prefix=/usr install install-doc install-html install-info ;# as root
(or prefix=/usr/local, of course). Just like any program suite
that uses $prefix, the built results have some paths encoded,
$ make configure ;# as yourself
$ ./configure --prefix=/usr ;# as yourself
$ make all doc ;# as yourself
- # make install install-doc ;# as root
+ # make install install-doc install-html;# as root
Issues of note:
inclined to install the tools, the default build target
("make all") does _not_ build them.
+ "make doc" builds documentation in man and html formats; there are
+ also "make man", "make html" and "make info". Note that "make html"
+ requires asciidoc, but not xmlto. "make man" (and thus make doc)
+ requires both.
+
+ "make install-doc" installs documentation in man format only; there
+ are also "make install-man", "make install-html" and "make
+ install-info".
+
Building and installing the info file additionally requires
makeinfo and docbook2X. Version 0.8.3 is known to work.
The documentation is written for AsciiDoc 7, but "make
ASCIIDOC8=YesPlease doc" will let you format with AsciiDoc 8.
- Alternatively, pre-formatted documentation are available in
+ Alternatively, pre-formatted documentation is available in
"html" and "man" branches of the git repository itself. For
example, you could:
http://www.kernel.org/pub/software/scm/git/docs/
+ There are also "make quick-install-doc" and "make quick-install-html"
+ which install preformatted man pages and html documentation.
+ This does not require asciidoc/xmlto, but it only works from within
+ a cloned checkout of git.git with these two extra branches, and will
+ not work for the maintainer for obvious chicken-and-egg reasons.
+
It has been reported that docbook-xsl version 1.72 and 1.73 are
buggy; 1.72 misformats manual pages for callouts, and 1.73 needs
the patch in contrib/patches/docbook-xsl-manpages-charmap.patch
endif
NO_STRLCPY = YesPlease
NO_MEMMEM = YesPlease
+ COMPAT_CFLAGS += -Icompat/regex
+ COMPAT_OBJS += compat/regex/regex.o
endif
ifeq ($(uname_S),SunOS)
NEEDS_SOCKET = YesPlease
BASIC_LDFLAGS += -L/usr/local/lib
DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease
THREADED_DELTA_SEARCH = YesPlease
+ COMPAT_CFLAGS += -Icompat/regex
+ COMPAT_OBJS += compat/regex/regex.o
endif
ifeq ($(uname_S),OpenBSD)
NO_STRCASESTR = YesPlease
INTERNAL_QSORT = UnfortunatelyYes
NEEDS_LIBICONV=YesPlease
BASIC_CFLAGS += -D_LARGE_FILES
+ COMPAT_CFLAGS += -Icompat/regex
+ COMPAT_OBJS += compat/regex/regex.o
endif
ifeq ($(uname_S),GNU)
# GNU/Hurd
NO_PERL_MAKEMAKER = YesPlease
NO_POSIX_ONLY_PROGRAMS = YesPlease
NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
- COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat
+ COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch
COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
- COMPAT_OBJS += compat/mingw.o compat/fnmatch.o compat/regex.o compat/winansi.o
+ COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/regex/regex.o compat/winansi.o
EXTLIBS += -lws2_32
X = .exe
gitexecdir = ../libexec/git-core
doc:
$(MAKE) -C Documentation all
+man:
+ $(MAKE) -C Documentation man
+
+html:
+ $(MAKE) -C Documentation html
+
info:
$(MAKE) -C Documentation info
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
$(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)'
- $(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X '$(DESTDIR_SQ)$(bindir_SQ)'
+ $(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X git-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)'
$(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install
$(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install
ifndef NO_TCLTK
quick-install-doc:
$(MAKE) -C Documentation quick-install
+quick-install-html:
+ $(MAKE) -C Documentation quick-install-html
+
### Maintainer's dist rules
static struct lock_file lock_file;
-static struct excludes {
- struct excludes *next;
- const char *path;
-} *excludes;
+static struct string_list limit_by_name;
+static int has_include;
+static void add_name_limit(const char *name, int exclude)
+{
+ struct string_list_item *it;
+
+ it = string_list_append(name, &limit_by_name);
+ it->util = exclude ? NULL : (void *) 1;
+}
static int use_patch(struct patch *p)
{
const char *pathname = p->new_name ? p->new_name : p->old_name;
- struct excludes *x = excludes;
- while (x) {
- if (fnmatch(x->path, pathname, 0) == 0)
- return 0;
- x = x->next;
- }
+ int i;
+
+ /* Paths outside are not touched regardless of "--include" */
if (0 < prefix_length) {
int pathlen = strlen(pathname);
if (pathlen <= prefix_length ||
memcmp(prefix, pathname, prefix_length))
return 0;
}
- return 1;
+
+ /* See if it matches any of exclude/include rule */
+ for (i = 0; i < limit_by_name.nr; i++) {
+ struct string_list_item *it = &limit_by_name.items[i];
+ if (!fnmatch(it->string, pathname, 0))
+ return (it->util != NULL);
+ }
+
+ /*
+ * If we had any include, a path that does not match any rule is
+ * not used. Otherwise, we saw bunch of exclude rules (or none)
+ * and such a path is used.
+ */
+ return !has_include;
}
+
static void prefix_one(char **name)
{
char *old_name = *name;
continue;
}
if (!prefixcmp(arg, "--exclude=")) {
- struct excludes *x = xmalloc(sizeof(*x));
- x->path = arg + 10;
- x->next = excludes;
- excludes = x;
+ add_name_limit(arg + 10, 1);
+ continue;
+ }
+ if (!prefixcmp(arg, "--include=")) {
+ add_name_limit(arg + 10, 0);
+ has_include = 1;
continue;
}
if (!prefixcmp(arg, "-p")) {
{
const char *remote = NULL;
+ git_config(git_default_config, NULL);
+
remote = extract_remote_arg(&argc, argv);
if (remote)
return run_remote_archiver(remote, argc, argv);
}
/* 2-way merge to the new branch */
+ topts.initial_checkout = (!active_nr &&
+ (old->commit == new->commit));
topts.update = 1;
topts.merge = 1;
topts.gently = opts->merge;
commit_locked_index(lock_file))
die("unable to write new index file");
- if (!opts->force)
+ if (!opts->force && !opts->quiet)
show_local_changes(&new->commit->object);
return 0;
return checkout_paths(source_tree, pathspec);
}
+ if (opts.new_branch) {
+ struct strbuf buf;
+ strbuf_init(&buf, 0);
+ strbuf_addstr(&buf, "refs/heads/");
+ strbuf_addstr(&buf, opts.new_branch);
+ if (!get_sha1(buf.buf, rev))
+ die("git checkout: branch %s already exists", opts.new_branch);
+ if (check_ref_format(buf.buf))
+ die("git checkout: we do not like '%s' as a branch name.", opts.new_branch);
+ strbuf_release(&buf);
+ }
+
if (new.name && !new.commit) {
die("Cannot switch branch to a non-commit.");
}
OPT_STRING(0, "reference", &option_reference, "repo",
"reference repository"),
OPT_STRING('o', "origin", &option_origin, "branch",
- "use <branch> instead or 'origin' to track upstream"),
+ "use <branch> instead of 'origin' to track upstream"),
OPT_STRING('u', "upload-pack", &option_upload_pack, "path",
"path to git-upload-pack on the remote"),
OPT_STRING(0, "depth", &option_depth, "depth",
"variable i18n.commitencoding to the encoding your project uses.\n";
int commit_tree(const char *msg, unsigned char *tree,
- struct commit_list *parents, unsigned char *ret)
+ struct commit_list *parents, unsigned char *ret,
+ const char *author)
{
int result;
int encoding_is_utf8;
}
/* Person/date information */
- strbuf_addf(&buffer, "author %s\n", git_author_info(IDENT_ERROR_ON_NO_NAME));
+ if (!author)
+ author = git_author_info(IDENT_ERROR_ON_NO_NAME);
+ strbuf_addf(&buffer, "author %s\n", author);
strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
if (!encoding_is_utf8)
strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding);
if (strbuf_read(&buffer, 0, 0) < 0)
die("git commit-tree: read returned %s", strerror(errno));
- if (!commit_tree(buffer.buf, tree_sha1, parents, commit_sha1)) {
+ if (!commit_tree(buffer.buf, tree_sha1, parents, commit_sha1, NULL)) {
printf("%s\n", sha1_to_hex(commit_sha1));
return 0;
}
}
/*
- * Find out if the message starting at position 'start' in the strbuf
- * contains only whitespace and Signed-off-by lines.
+ * Find out if the message in the strbuf contains only whitespace and
+ * Signed-off-by lines.
*/
-static int message_is_empty(struct strbuf *sb, int start)
+static int message_is_empty(struct strbuf *sb)
{
struct strbuf tmpl;
const char *nl;
- int eol, i;
+ int eol, i, start = 0;
if (cleanup_mode == CLEANUP_NONE && sb->len)
return 0;
"You may want to amend it after fixing the message, or set the config\n"
"variable i18n.commitencoding to the encoding your project uses.\n";
-static void add_parent(struct strbuf *sb, const unsigned char *sha1)
-{
- struct object *obj = parse_object(sha1);
- const char *parent = sha1_to_hex(sha1);
- const char *cp;
-
- if (!obj)
- die("Unable to find commit parent %s", parent);
- if (obj->type != OBJ_COMMIT)
- die("Parent %s isn't a proper commit", parent);
-
- for (cp = sb->buf; cp && (cp = strstr(cp, "\nparent ")); cp += 8) {
- if (!memcmp(cp + 8, parent, 40) && cp[48] == '\n') {
- error("duplicate parent %s ignored", parent);
- return;
- }
- }
- strbuf_addf(sb, "parent %s\n", parent);
-}
-
int cmd_commit(int argc, const char **argv, const char *prefix)
{
- int header_len;
struct strbuf sb;
const char *index_file, *reflog_msg;
char *nl, *p;
unsigned char commit_sha1[20];
struct ref_lock *ref_lock;
+ struct commit_list *parents = NULL, **pptr = &parents;
git_config(git_commit_config, NULL);
return 1;
}
- /*
- * The commit object
- */
- strbuf_init(&sb, 0);
- strbuf_addf(&sb, "tree %s\n",
- sha1_to_hex(active_cache_tree->sha1));
-
/* Determine parents */
if (initial_commit) {
reflog_msg = "commit (initial)";
die("could not parse HEAD commit");
for (c = commit->parents; c; c = c->next)
- add_parent(&sb, c->item->object.sha1);
+ pptr = &commit_list_insert(c->item, pptr)->next;
} else if (in_merge) {
struct strbuf m;
FILE *fp;
reflog_msg = "commit (merge)";
- add_parent(&sb, head_sha1);
+ pptr = &commit_list_insert(lookup_commit(head_sha1), pptr)->next;
strbuf_init(&m, 0);
fp = fopen(git_path("MERGE_HEAD"), "r");
if (fp == NULL)
unsigned char sha1[20];
if (get_sha1_hex(m.buf, sha1) < 0)
die("Corrupt MERGE_HEAD file (%s)", m.buf);
- add_parent(&sb, sha1);
+ pptr = &commit_list_insert(lookup_commit(sha1), pptr)->next;
}
fclose(fp);
strbuf_release(&m);
} else {
reflog_msg = "commit";
- strbuf_addf(&sb, "parent %s\n", sha1_to_hex(head_sha1));
+ pptr = &commit_list_insert(lookup_commit(head_sha1), pptr)->next;
}
-
- strbuf_addf(&sb, "author %s\n",
- fmt_ident(author_name, author_email, author_date, IDENT_ERROR_ON_NO_NAME));
- strbuf_addf(&sb, "committer %s\n", git_committer_info(IDENT_ERROR_ON_NO_NAME));
- if (!is_encoding_utf8(git_commit_encoding))
- strbuf_addf(&sb, "encoding %s\n", git_commit_encoding);
- strbuf_addch(&sb, '\n');
+ parents = reduce_heads(parents);
/* Finally, get the commit message */
- header_len = sb.len;
+ strbuf_init(&sb, 0);
if (strbuf_read_file(&sb, git_path(commit_editmsg), 0) < 0) {
rollback_index_files();
die("could not read commit message");
if (cleanup_mode != CLEANUP_NONE)
stripspace(&sb, cleanup_mode == CLEANUP_ALL);
- if (sb.len < header_len || message_is_empty(&sb, header_len)) {
+ if (message_is_empty(&sb)) {
rollback_index_files();
fprintf(stderr, "Aborting commit due to empty commit message.\n");
exit(1);
}
- strbuf_addch(&sb, '\0');
- if (is_encoding_utf8(git_commit_encoding) && !is_utf8(sb.buf))
- fprintf(stderr, commit_utf8_warn);
- if (write_sha1_file(sb.buf, sb.len - 1, commit_type, commit_sha1)) {
+ if (commit_tree(sb.buf, active_cache_tree->sha1, parents, commit_sha1,
+ fmt_ident(author_name, author_email, author_date,
+ IDENT_ERROR_ON_NO_NAME))) {
rollback_index_files();
die("failed to write commit object");
}
initial_commit ? NULL : head_sha1,
0);
- nl = strchr(sb.buf + header_len, '\n');
+ nl = strchr(sb.buf, '\n');
if (nl)
strbuf_setlen(&sb, nl + 1 - sb.buf);
else
strbuf_addch(&sb, '\n');
- strbuf_remove(&sb, 0, header_len);
strbuf_insert(&sb, 0, reflog_msg, strlen(reflog_msg));
strbuf_insert(&sb, strlen(reflog_msg), ": ", 2);
3 < rev.max_count)
usage(diff_files_usage);
- if (rev.max_count == -1 &&
+ /*
+ * "diff-files --base -p" should not combine merges because it
+ * was not asked to. "diff-files -c -p" should not densify
+ * (the user should ask with "diff-files --cc" explicitly).
+ */
+ if (rev.max_count == -1 && !rev.combine_merges &&
(rev.diffopt.output_format & DIFF_FORMAT_PATCH))
rev.combine_merges = rev.dense_combined_merges = 1;
line[len-1] = 0;
if (get_sha1_hex(line, sha1))
return -1;
- obj = lookup_object(sha1);
- obj = obj ? obj : parse_object(sha1);
+ obj = lookup_unknown_object(sha1);
+ if (!obj || !obj->parsed)
+ obj = parse_object(sha1);
if (!obj)
return -1;
if (obj->type == OBJ_COMMIT)
if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
die("'%s': not a regular file or symlink", path);
+ diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/");
+
if (blob[0].mode == S_IFINVALID)
blob[0].mode = canon_mode(st.st_mode);
argv++; argc--;
}
- if (revs->max_count == -1 &&
+ /*
+ * "diff --base" should not combine merges because it was not
+ * asked to. "diff -c" should not densify (if the user wants
+ * dense one, --cc can be explicitly asked for, or just rely
+ * on the default).
+ */
+ if (revs->max_count == -1 && !revs->combine_merges &&
(revs->diffopt.output_format & DIFF_FORMAT_PATCH))
revs->combine_merges = revs->dense_combined_merges = 1;
static const char *copy_line(const char *buf)
{
const char *eol = strchr(buf, '\n');
- if (!eol)
- return "";
+ if (!eol) // simulate strchrnul()
+ eol = buf + strlen(buf);
return xmemdupz(buf, eol - buf);
}
}
}
+/*
+ * generate a format suitable for scanf from a ref_rev_parse_rules
+ * rule, that is replace the "%.*s" spec with a "%s" spec
+ */
+static void gen_scanf_fmt(char *scanf_fmt, const char *rule)
+{
+ char *spec;
+
+ spec = strstr(rule, "%.*s");
+ if (!spec || strstr(spec + 4, "%.*s"))
+ die("invalid rule in ref_rev_parse_rules: %s", rule);
+
+ /* copy all until spec */
+ strncpy(scanf_fmt, rule, spec - rule);
+ scanf_fmt[spec - rule] = '\0';
+ /* copy new spec */
+ strcat(scanf_fmt, "%s");
+ /* copy remaining rule */
+ strcat(scanf_fmt, spec + 4);
+
+ return;
+}
+
+/*
+ * Shorten the refname to an non-ambiguous form
+ */
+static char *get_short_ref(struct refinfo *ref)
+{
+ int i;
+ static char **scanf_fmts;
+ static int nr_rules;
+ char *short_name;
+
+ /* pre generate scanf formats from ref_rev_parse_rules[] */
+ if (!nr_rules) {
+ size_t total_len = 0;
+
+ /* the rule list is NULL terminated, count them first */
+ for (; ref_rev_parse_rules[nr_rules]; nr_rules++)
+ /* no +1 because strlen("%s") < strlen("%.*s") */
+ total_len += strlen(ref_rev_parse_rules[nr_rules]);
+
+ scanf_fmts = xmalloc(nr_rules * sizeof(char *) + total_len);
+
+ total_len = 0;
+ for (i = 0; i < nr_rules; i++) {
+ scanf_fmts[i] = (char *)&scanf_fmts[nr_rules]
+ + total_len;
+ gen_scanf_fmt(scanf_fmts[i], ref_rev_parse_rules[i]);
+ total_len += strlen(ref_rev_parse_rules[i]);
+ }
+ }
+
+ /* bail out if there are no rules */
+ if (!nr_rules)
+ return ref->refname;
+
+ /* buffer for scanf result, at most ref->refname must fit */
+ short_name = xstrdup(ref->refname);
+
+ /* skip first rule, it will always match */
+ for (i = nr_rules - 1; i > 0 ; --i) {
+ int j;
+ int short_name_len;
+
+ if (1 != sscanf(ref->refname, scanf_fmts[i], short_name))
+ continue;
+
+ short_name_len = strlen(short_name);
+
+ /*
+ * check if the short name resolves to a valid ref,
+ * but use only rules prior to the matched one
+ */
+ for (j = 0; j < i; j++) {
+ const char *rule = ref_rev_parse_rules[j];
+ unsigned char short_objectname[20];
+
+ /*
+ * the short name is ambiguous, if it resolves
+ * (with this previous rule) to a valid ref
+ * read_ref() returns 0 on success
+ */
+ if (!read_ref(mkpath(rule, short_name_len, short_name),
+ short_objectname))
+ break;
+ }
+
+ /*
+ * short name is non-ambiguous if all previous rules
+ * haven't resolved to a valid ref
+ */
+ if (j == i)
+ return short_name;
+ }
+
+ free(short_name);
+ return ref->refname;
+}
+
+
/*
* Parse the object referred by ref, and grab needed value.
*/
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &ref->value[i];
- if (!strcmp(name, "refname"))
- v->s = ref->refname;
- else if (!strcmp(name, "*refname")) {
- int len = strlen(ref->refname);
- char *s = xmalloc(len + 4);
- sprintf(s, "%s^{}", ref->refname);
- v->s = s;
+ int deref = 0;
+ if (*name == '*') {
+ deref = 1;
+ name++;
+ }
+ if (!prefixcmp(name, "refname")) {
+ const char *formatp = strchr(name, ':');
+ const char *refname = ref->refname;
+
+ /* look for "short" refname format */
+ if (formatp) {
+ formatp++;
+ if (!strcmp(formatp, "short"))
+ refname = get_short_ref(ref);
+ else
+ die("unknown refname format %s",
+ formatp);
+ }
+
+ if (!deref)
+ v->s = refname;
+ else {
+ int len = strlen(refname);
+ char *s = xmalloc(len + 4);
+ sprintf(s, "%s^{}", refname);
+ v->s = s;
+ }
}
}
/* Note: if ".git/hooks" file exists in the repository being
* re-initialized, /etc/core-git/templates/hooks/update would
- * cause git-init to fail here. I think this is sane but
+ * cause "git init" to fail here. I think this is sane but
* it means that the set of templates we ship by default, along
* with the way the namespace under .git/ is organized, should
* be really carefully chosen.
#include "tag.h"
#include "reflog-walk.h"
#include "patch-ids.h"
-#include "refs.h"
#include "run-command.h"
#include "shortlog.h"
static const char *fmt_patch_subject_prefix = "PATCH";
static const char *fmt_pretty;
-static void add_name_decoration(const char *prefix, const char *name, struct object *obj)
-{
- int plen = strlen(prefix);
- int nlen = strlen(name);
- struct name_decoration *res = xmalloc(sizeof(struct name_decoration) + plen + nlen);
- memcpy(res->name, prefix, plen);
- memcpy(res->name + plen, name, nlen + 1);
- res->next = add_decoration(&name_decoration, obj, res);
-}
-
-static int add_ref_decoration(const char *refname, const unsigned char *sha1, int flags, void *cb_data)
-{
- struct object *obj = parse_object(sha1);
- if (!obj)
- return 0;
- add_name_decoration("", refname, obj);
- while (obj->type == OBJ_TAG) {
- obj = ((struct tag *)obj)->tagged;
- if (!obj)
- break;
- add_name_decoration("tag: ", refname, obj);
- }
- return 0;
-}
-
static void cmd_log_init(int argc, const char **argv, const char *prefix,
struct rev_info *rev)
{
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--decorate")) {
- if (!decorate)
- for_each_ref(add_ref_decoration, NULL);
+ load_ref_decorations();
decorate = 1;
} else
die("unrecognized argument: %s", arg);
committer = git_committer_info(IDENT_ERROR_ON_NO_NAME);
endpos = strchr(committer, '>');
if (!endpos)
- die("bogos committer info %s\n", committer);
+ die("bogus committer info %s\n", committer);
add_signoff = xmemdupz(committer, endpos - committer + 1);
}
else if (!strcmp(argv[i], "--attach")) {
buf = xstrdup(v);
argc = split_cmdline(buf, &argv);
+ if (argc < 0)
+ die("Bad branch.%s.mergeoptions string", branch);
argv = xrealloc(argv, sizeof(*argv) * (argc + 2));
memmove(argv + 1, argv, sizeof(*argv) * (argc + 1));
argc++;
parent->next = xmalloc(sizeof(struct commit_list *));
parent->next->item = remoteheads->item;
parent->next->next = NULL;
- commit_tree(merge_msg.buf, result_tree, parent, result_commit);
+ commit_tree(merge_msg.buf, result_tree, parent, result_commit, NULL);
finish(result_commit, "In-index merge");
drop_save();
return 0;
}
free_commit_list(remoteheads);
strbuf_addch(&merge_msg, '\n');
- commit_tree(merge_msg.buf, result_tree, parents, result_commit);
+ commit_tree(merge_msg.buf, result_tree, parents, result_commit, NULL);
strbuf_addf(&buf, "Merge made by %s.", wt_strategy);
finish(result_commit, buf.buf);
strbuf_release(&buf);
#endif
static const char pack_usage[] = "\
-git-pack-objects [{ -q | --progress | --all-progress }] \n\
+git pack-objects [{ -q | --progress | --all-progress }] \n\
[--max-pack-size=N] [--local] [--incremental] \n\
[--window=N] [--window-memory=N] [--depth=N] \n\
[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
char tmpname[PATH_MAX];
int fd;
snprintf(tmpname, sizeof(tmpname),
- "%s/tmp_pack_XXXXXX", get_object_directory());
+ "%s/pack/tmp_pack_XXXXXX", get_object_directory());
fd = xmkstemp(tmpname);
pack_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, pack_tmp_name);
if (entry->type < 0)
die("unable to get type of object %s",
sha1_to_hex(entry->idx.sha1));
+ } else {
+ if (entry->type < 0) {
+ /*
+ * This object is not found, but we
+ * don't have to include it anyway.
+ */
+ continue;
+ }
}
delta_list[n++] = entry;
/*
* Compare the objects in the offset order, in order to emulate the
- * "git-rev-list --objects" output that produced the pack originally.
+ * "git rev-list --objects" output that produced the pack originally.
*/
static int ofscmp(const void *a_, const void *b_)
{
static int show_only;
static unsigned long expire;
-static int prune_tmp_object(char *path, const char *filename)
+static int prune_tmp_object(const char *path, const char *filename)
{
const char *fullpath = mkpath("%s/%s", path, filename);
if (expire) {
/*
* Write errors (particularly out of space) can result in
* failed temporary packs (and more rarely indexes and other
- * files begining with "tmp_") accumulating in the
- * object directory.
+ * files begining with "tmp_") accumulating in the object
+ * and the pack directories.
*/
-static void remove_temporary_files(void)
+static void remove_temporary_files(const char *path)
{
DIR *dir;
struct dirent *de;
- char* dirname=get_object_directory();
- dir = opendir(dirname);
+ dir = opendir(path);
if (!dir) {
- fprintf(stderr, "Unable to open object directory %s\n",
- dirname);
+ fprintf(stderr, "Unable to open directory %s\n", path);
return;
}
while ((de = readdir(dir)) != NULL)
if (!prefixcmp(de->d_name, "tmp_"))
- prune_tmp_object(dirname, de->d_name);
+ prune_tmp_object(path, de->d_name);
closedir(dir);
}
"expire objects older than <time>"),
OPT_END()
};
+ char *s;
save_commit_buffer = 0;
init_revisions(&revs, prefix);
prune_object_dir(get_object_directory());
prune_packed_objects(show_only);
- remove_temporary_files();
+ remove_temporary_files(get_object_directory());
+ s = xstrdup(mkpath("%s/pack", get_object_directory()));
+ remove_temporary_files(s);
+ free(s);
return 0;
}
}
-static const char read_tree_usage[] = "git-read-tree (<sha> | [[-m [--trivial] [--aggressive] | --reset | --prefix=<prefix>] [-u | -i]] [--exclude-per-directory=<gitignore>] [--index-output=<file>] <sha1> [<sha2> [<sha3>]])";
+static const char read_tree_usage[] = "git read-tree (<sha> | [[-m [--trivial] [--aggressive] | --reset | --prefix=<prefix>] [-u | -i]] [--exclude-per-directory=<gitignore>] [--index-output=<file>] <sha1> [<sha2> [<sha3>]])";
static struct lock_file lock_file;
break;
case 2:
opts.fn = twoway_merge;
+ opts.initial_checkout = !active_nr;
break;
case 3:
default:
return i;
}
-static void show_list(const char *title, struct string_list *list)
+static void show_list(const char *title, struct string_list *list,
+ const char *extra_arg)
{
int i;
if (!list->nr)
return;
- printf(title, list->nr > 1 ? "es" : "");
+ printf(title, list->nr > 1 ? "es" : "", extra_arg);
printf("\n ");
for (i = 0; i < list->nr; i++)
printf("%s%s", i ? " " : "", list->items[i].string);
memset(&states, 0, sizeof(states));
for (; argc; argc--, argv++) {
- struct strbuf buf;
int i;
get_remote_ref_states(*argv, &states, !no_query);
}
if (!no_query) {
- strbuf_init(&buf, 0);
- strbuf_addf(&buf, " New remote branch%%s (next fetch "
- "will store in remotes/%s)", states.remote->name);
- show_list(buf.buf, &states.new);
- strbuf_release(&buf);
+ show_list(" New remote branch%s (next fetch "
+ "will store in remotes/%s)",
+ &states.new, states.remote->name);
show_list(" Stale tracking branch%s (use 'git remote "
- "prune')", &states.stale);
+ "prune')", &states.stale, "");
}
if (no_query)
for_each_ref(append_ref_to_tracked_list, &states);
- show_list(" Tracked remote branch%s", &states.tracked);
+ show_list(" Tracked remote branch%s", &states.tracked, "");
if (states.remote->push_refspec_nr) {
printf(" Local branch%s pushed with 'git push'\n ",
static void show_object(struct object_array_entry *p)
{
/* An object with name "foo\n0000000..." can be used to
- * confuse downstream git-pack-objects very badly.
+ * confuse downstream "git pack-objects" very badly.
*/
const char *ep = strchr(p->name, '\n');
"from both the file and the HEAD\n"
"(use -f to force removal)", name);
else if (!index_only) {
- /* It's not dangerous to git-rm --cached a
+ /* It's not dangerous to "git rm --cached" a
* file if the index matches the file or the
* HEAD, since it means the deleted content is
* still available somewhere.
po.out = fd;
po.git_cmd = 1;
if (start_command(&po))
- die("git-pack-objects failed (%s)", strerror(errno));
+ die("git pack-objects failed (%s)", strerror(errno));
/*
* We feed the pack-objects we just spawned with revision
static const char tar_tree_usage[] =
"git tar-tree [--remote=<repo>] <tree-ish> [basedir]\n"
-"*** Note that this command is now deprecated; use git-archive instead.";
+"*** Note that this command is now deprecated; use \"git archive\" instead.";
int cmd_tar_tree(int argc, const char **argv, const char *prefix)
{
/*
- * git-tar-tree is now a wrapper around git-archive --format=tar
+ * "git tar-tree" is now a wrapper around "git archive --format=tar"
*
* $0 --remote=<repo> arg... ==>
- * git-archive --format=tar --remote=<repo> arg...
+ * git archive --format=tar --remote=<repo> arg...
* $0 tree-ish ==>
- * git-archive --format=tar tree-ish
+ * git archive --format=tar tree-ish
* $0 tree-ish basedir ==>
- * git-archive --format-tar --prefix=basedir tree-ish
+ * git archive --format-tar --prefix=basedir tree-ish
*/
int i;
const char **nargv = xcalloc(sizeof(*nargv), argc + 2);
char *basedir_arg;
int nargc = 0;
- nargv[nargc++] = "git-archive";
+ nargv[nargc++] = "archive";
nargv[nargc++] = "--format=tar";
if (2 <= argc && !prefixcmp(argv[1], "--remote=")) {
nargv[nargc] = NULL;
fprintf(stderr,
- "*** git-tar-tree is now deprecated.\n"
- "*** Running git-archive instead.\n***");
+ "*** \"git tar-tree\" is now deprecated.\n"
+ "*** Running \"git archive\" instead.\n***");
for (i = 0; i < nargc; i++) {
fputc(' ', stderr);
sq_quote_print(stderr, nargv[i]);
#include "fsck.h"
static int dry_run, quiet, recover, has_errors, strict;
-static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
+static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
* Default to not allowing changes to the list of files. The
* tool doesn't actually care, but this makes it harder to add
* files to the revision control by mistake by doing something
- * like "git-update-index *" and suddenly having all the object
+ * like "git update-index *" and suddenly having all the object
* files be revision controlled.
*/
static int allow_add;
/* This reads lines formatted in one of three formats:
*
* (1) mode SP sha1 TAB path
- * The first format is what "git-apply --index-info"
+ * The first format is what "git apply --index-info"
* reports, and used to reconstruct a partial tree
* that is used for phony merge base tree when falling
* back on 3-way merge.
*
* (2) mode SP type SP sha1 TAB path
- * The second format is to stuff git-ls-tree output
+ * The second format is to stuff "git ls-tree" output
* into the index file.
*
* (3) mode SP sha1 SP stage TAB path
* This format is to put higher order stages into the
- * index file and matches git-ls-files --stage output.
+ * index file and matches "git ls-files --stage" output.
*/
errno = 0;
ul = strtoul(buf.buf, &ptr, 8);
extern int fmt_merge_msg(int merge_summary, struct strbuf *in,
struct strbuf *out);
extern int commit_tree(const char *msg, unsigned char *tree,
- struct commit_list *parents, unsigned char *ret);
+ struct commit_list *parents, unsigned char *ret,
+ const char *author);
extern int check_pager_config(const char *cmd);
extern int cmd_add(int argc, const char **argv, const char *prefix);
int i, show_hunks;
int working_tree_file = is_null_sha1(elem->sha1);
int abbrev = DIFF_OPT_TST(opt, FULL_INDEX) ? 40 : DEFAULT_ABBREV;
+ const char *a_prefix, *b_prefix;
mmfile_t result_file;
context = opt->context;
+ a_prefix = opt->a_prefix ? opt->a_prefix : "a/";
+ b_prefix = opt->b_prefix ? opt->b_prefix : "b/";
+
/* Read the result of merge first */
if (!working_tree_file)
result = grab_blob(elem->sha1, &result_size);
dump_quoted_path("--- ", "", "/dev/null",
c_meta, c_reset);
else
- dump_quoted_path("--- ", opt->a_prefix, elem->path,
+ dump_quoted_path("--- ", a_prefix, elem->path,
c_meta, c_reset);
if (deleted)
dump_quoted_path("+++ ", "", "/dev/null",
c_meta, c_reset);
else
- dump_quoted_path("+++ ", opt->b_prefix, elem->path,
+ dump_quoted_path("+++ ", b_prefix, elem->path,
c_meta, c_reset);
dump_sline(sline, cnt, num_parent,
DIFF_OPT_TST(opt, COLOR_DIFF));
+++ /dev/null
-/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-/* Enable GNU extensions in fnmatch.h. */
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-#endif
-
-#include <errno.h>
-#include <fnmatch.h>
-#include <ctype.h>
-
-#if HAVE_STRING_H || defined _LIBC
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-
-#if defined STDC_HEADERS || defined _LIBC
-# include <stdlib.h>
-#endif
-
-/* For platform which support the ISO C amendement 1 functionality we
- support user defined character classes. */
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
-
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#if defined _LIBC || !defined __GNU_LIBRARY__
-
-
-# if defined STDC_HEADERS || !defined isascii
-# define ISASCII(c) 1
-# else
-# define ISASCII(c) isascii(c)
-# endif
-
-# ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
-# else
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-# endif
-# ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
-# else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
-# endif
-
-# define ISPRINT(c) (ISASCII (c) && isprint (c))
-# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-# define ISALNUM(c) (ISASCII (c) && isalnum (c))
-# define ISALPHA(c) (ISASCII (c) && isalpha (c))
-# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-# define ISLOWER(c) (ISASCII (c) && islower (c))
-# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-# define ISSPACE(c) (ISASCII (c) && isspace (c))
-# define ISUPPER(c) (ISASCII (c) && isupper (c))
-# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
-
-# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
-
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-/* The GNU C library provides support for user-defined character classes
- and the functions from ISO C amendement 1. */
-# ifdef CHARCLASS_NAME_MAX
-# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
-/* This shouldn't happen but some implementation might still have this
- problem. Use a reasonable default value. */
-# define CHAR_CLASS_MAX_LENGTH 256
-# endif
-
-# ifdef _LIBC
-# define IS_CHAR_CLASS(string) __wctype (string)
-# else
-# define IS_CHAR_CLASS(string) wctype (string)
-# endif
-# else
-# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-
-# define IS_CHAR_CLASS(string) \
- (STREQ (string, "alpha") || STREQ (string, "upper") \
- || STREQ (string, "lower") || STREQ (string, "digit") \
- || STREQ (string, "alnum") || STREQ (string, "xdigit") \
- || STREQ (string, "space") || STREQ (string, "print") \
- || STREQ (string, "punct") || STREQ (string, "graph") \
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
-# endif
-
-/* Avoid depending on library functions or files
- whose names are inconsistent. */
-
-# if !defined _LIBC && !defined getenv
-extern char *getenv ();
-# endif
-
-# ifndef errno
-extern int errno;
-# endif
-
-/* This function doesn't exist on most systems. */
-
-# if !defined HAVE___STRCHRNUL && !defined _LIBC
-static char *
-__strchrnul (s, c)
- const char *s;
- int c;
-{
- char *result = strchr (s, c);
- if (result == NULL)
- result = strchr (s, '\0');
- return result;
-}
-# endif
-
-# ifndef internal_function
-/* Inside GNU libc we mark some function in a special way. In other
- environments simply ignore the marking. */
-# define internal_function
-# endif
-
-/* Match STRING against the filename pattern PATTERN, returning zero if
- it matches, nonzero if not. */
-static int internal_fnmatch __P ((const char *pattern, const char *string,
- int no_leading_period, int flags))
- internal_function;
-static int
-internal_function
-internal_fnmatch (pattern, string, no_leading_period, flags)
- const char *pattern;
- const char *string;
- int no_leading_period;
- int flags;
-{
- register const char *p = pattern, *n = string;
- register unsigned char c;
-
-/* Note that this evaluates C many times. */
-# ifdef _LIBC
-# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
-# else
-# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
-# endif
-
- while ((c = *p++) != '\0')
- {
- c = FOLD (c);
-
- switch (c)
- {
- case '?':
- if (*n == '\0')
- return FNM_NOMATCH;
- else if (*n == '/' && (flags & FNM_FILE_NAME))
- return FNM_NOMATCH;
- else if (*n == '.' && no_leading_period
- && (n == string
- || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
- return FNM_NOMATCH;
- break;
-
- case '\\':
- if (!(flags & FNM_NOESCAPE))
- {
- c = *p++;
- if (c == '\0')
- /* Trailing \ loses. */
- return FNM_NOMATCH;
- c = FOLD (c);
- }
- if (FOLD ((unsigned char) *n) != c)
- return FNM_NOMATCH;
- break;
-
- case '*':
- if (*n == '.' && no_leading_period
- && (n == string
- || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
- return FNM_NOMATCH;
-
- for (c = *p++; c == '?' || c == '*'; c = *p++)
- {
- if (*n == '/' && (flags & FNM_FILE_NAME))
- /* A slash does not match a wildcard under FNM_FILE_NAME. */
- return FNM_NOMATCH;
- else if (c == '?')
- {
- /* A ? needs to match one character. */
- if (*n == '\0')
- /* There isn't another character; no match. */
- return FNM_NOMATCH;
- else
- /* One character of the string is consumed in matching
- this ? wildcard, so *??? won't match if there are
- less than three characters. */
- ++n;
- }
- }
-
- if (c == '\0')
- /* The wildcard(s) is/are the last element of the pattern.
- If the name is a file name and contains another slash
- this does mean it cannot match. */
- return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL
- ? FNM_NOMATCH : 0);
- else
- {
- const char *endp;
-
- endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0');
-
- if (c == '[')
- {
- int flags2 = ((flags & FNM_FILE_NAME)
- ? flags : (flags & ~FNM_PERIOD));
-
- for (--p; n < endp; ++n)
- if (internal_fnmatch (p, n,
- (no_leading_period
- && (n == string
- || (n[-1] == '/'
- && (flags
- & FNM_FILE_NAME)))),
- flags2)
- == 0)
- return 0;
- }
- else if (c == '/' && (flags & FNM_FILE_NAME))
- {
- while (*n != '\0' && *n != '/')
- ++n;
- if (*n == '/'
- && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD,
- flags) == 0))
- return 0;
- }
- else
- {
- int flags2 = ((flags & FNM_FILE_NAME)
- ? flags : (flags & ~FNM_PERIOD));
-
- if (c == '\\' && !(flags & FNM_NOESCAPE))
- c = *p;
- c = FOLD (c);
- for (--p; n < endp; ++n)
- if (FOLD ((unsigned char) *n) == c
- && (internal_fnmatch (p, n,
- (no_leading_period
- && (n == string
- || (n[-1] == '/'
- && (flags
- & FNM_FILE_NAME)))),
- flags2) == 0))
- return 0;
- }
- }
-
- /* If we come here no match is possible with the wildcard. */
- return FNM_NOMATCH;
-
- case '[':
- {
- /* Nonzero if the sense of the character class is inverted. */
- static int posixly_correct;
- register int not;
- char cold;
-
- if (posixly_correct == 0)
- posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
-
- if (*n == '\0')
- return FNM_NOMATCH;
-
- if (*n == '.' && no_leading_period && (n == string
- || (n[-1] == '/'
- && (flags
- & FNM_FILE_NAME))))
- return FNM_NOMATCH;
-
- if (*n == '/' && (flags & FNM_FILE_NAME))
- /* `/' cannot be matched. */
- return FNM_NOMATCH;
-
- not = (*p == '!' || (posixly_correct < 0 && *p == '^'));
- if (not)
- ++p;
-
- c = *p++;
- for (;;)
- {
- unsigned char fn = FOLD ((unsigned char) *n);
-
- if (!(flags & FNM_NOESCAPE) && c == '\\')
- {
- if (*p == '\0')
- return FNM_NOMATCH;
- c = FOLD ((unsigned char) *p);
- ++p;
-
- if (c == fn)
- goto matched;
- }
- else if (c == '[' && *p == ':')
- {
- /* Leave room for the null. */
- char str[CHAR_CLASS_MAX_LENGTH + 1];
- size_t c1 = 0;
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
- wctype_t wt;
-# endif
- const char *startp = p;
-
- for (;;)
- {
- if (c1 == CHAR_CLASS_MAX_LENGTH)
- /* The name is too long and therefore the pattern
- is ill-formed. */
- return FNM_NOMATCH;
-
- c = *++p;
- if (c == ':' && p[1] == ']')
- {
- p += 2;
- break;
- }
- if (c < 'a' || c >= 'z')
- {
- /* This cannot possibly be a character class name.
- Match it as a normal range. */
- p = startp;
- c = '[';
- goto normal_bracket;
- }
- str[c1++] = c;
- }
- str[c1] = '\0';
-
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
- wt = IS_CHAR_CLASS (str);
- if (wt == 0)
- /* Invalid character class name. */
- return FNM_NOMATCH;
-
- if (__iswctype (__btowc ((unsigned char) *n), wt))
- goto matched;
-# else
- if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n))
- || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n))
- || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n))
- || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n))
- || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n))
- || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n))
- || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n))
- || (STREQ (str, "print") && ISPRINT ((unsigned char) *n))
- || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n))
- || (STREQ (str, "space") && ISSPACE ((unsigned char) *n))
- || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n))
- || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n)))
- goto matched;
-# endif
- }
- else if (c == '\0')
- /* [ (unterminated) loses. */
- return FNM_NOMATCH;
- else
- {
- normal_bracket:
- if (FOLD (c) == fn)
- goto matched;
-
- cold = c;
- c = *p++;
-
- if (c == '-' && *p != ']')
- {
- /* It is a range. */
- unsigned char cend = *p++;
- if (!(flags & FNM_NOESCAPE) && cend == '\\')
- cend = *p++;
- if (cend == '\0')
- return FNM_NOMATCH;
-
- if (cold <= fn && fn <= FOLD (cend))
- goto matched;
-
- c = *p++;
- }
- }
-
- if (c == ']')
- break;
- }
-
- if (!not)
- return FNM_NOMATCH;
- break;
-
- matched:
- /* Skip the rest of the [...] that already matched. */
- while (c != ']')
- {
- if (c == '\0')
- /* [... (unterminated) loses. */
- return FNM_NOMATCH;
-
- c = *p++;
- if (!(flags & FNM_NOESCAPE) && c == '\\')
- {
- if (*p == '\0')
- return FNM_NOMATCH;
- /* XXX 1003.2d11 is unclear if this is right. */
- ++p;
- }
- else if (c == '[' && *p == ':')
- {
- do
- if (*++p == '\0')
- return FNM_NOMATCH;
- while (*p != ':' || p[1] == ']');
- p += 2;
- c = *p;
- }
- }
- if (not)
- return FNM_NOMATCH;
- }
- break;
-
- default:
- if (c != FOLD ((unsigned char) *n))
- return FNM_NOMATCH;
- }
-
- ++n;
- }
-
- if (*n == '\0')
- return 0;
-
- if ((flags & FNM_LEADING_DIR) && *n == '/')
- /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
- return 0;
-
- return FNM_NOMATCH;
-
-# undef FOLD
-}
-
-
-int
-fnmatch (pattern, string, flags)
- const char *pattern;
- const char *string;
- int flags;
-{
- return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
-}
-
-#endif /* _LIBC or not __GNU_LIBRARY__. */
+++ /dev/null
-/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _FNMATCH_H
-#define _FNMATCH_H 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32
-# if !defined __GLIBC__ || !defined __P
-# undef __P
-# define __P(protos) protos
-# endif
-#else /* Not C++ or ANSI C. */
-# undef __P
-# define __P(protos) ()
-/* We can get away without defining `const' here only because in this file
- it is used only inside the prototype for `fnmatch', which is elided in
- non-ANSI C where `const' is problematical. */
-#endif /* C++ or ANSI C. */
-
-#ifndef const
-# if (defined __STDC__ && __STDC__) || defined __cplusplus
-# define __const const
-# else
-# define __const
-# endif
-#endif
-
-/* We #undef these before defining them because some losing systems
- (HP-UX A.08.07 for example) define these in <unistd.h>. */
-#undef FNM_PATHNAME
-#undef FNM_NOESCAPE
-#undef FNM_PERIOD
-
-/* Bits set in the FLAGS argument to `fnmatch'. */
-#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
-#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
-#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
-
-#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE
-# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
-# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
-# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
-#endif
-
-/* Value returned by `fnmatch' if STRING does not match PATTERN. */
-#define FNM_NOMATCH 1
-
-/* This value is returned if the implementation does not support
- `fnmatch'. Since this is not the case here it will never be
- returned but the conformance test suites still require the symbol
- to be defined. */
-#ifdef _XOPEN_SOURCE
-# define FNM_NOSYS (-1)
-#endif
-
-/* Match NAME against the filename pattern PATTERN,
- returning zero if it matches, FNM_NOMATCH if not. */
-extern int fnmatch __P ((__const char *__pattern, __const char *__name,
- int __flags));
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* fnmatch.h */
--- /dev/null
+/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Enable GNU extensions in fnmatch.h. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <ctype.h>
+
+#if HAVE_STRING_H || defined _LIBC
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+#endif
+
+/* For platform which support the ISO C amendement 1 functionality we
+ support user defined character classes. */
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined _LIBC || !defined __GNU_LIBRARY__
+
+
+# if defined STDC_HEADERS || !defined isascii
+# define ISASCII(c) 1
+# else
+# define ISASCII(c) isascii(c)
+# endif
+
+# ifdef isblank
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# endif
+# ifdef isgraph
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# else
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# endif
+
+# define ISPRINT(c) (ISASCII (c) && isprint (c))
+# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+# define ISALNUM(c) (ISASCII (c) && isalnum (c))
+# define ISALPHA(c) (ISASCII (c) && isalpha (c))
+# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+# define ISLOWER(c) (ISASCII (c) && islower (c))
+# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+# define ISSPACE(c) (ISASCII (c) && isspace (c))
+# define ISUPPER(c) (ISASCII (c) && isupper (c))
+# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+# define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+/* The GNU C library provides support for user-defined character classes
+ and the functions from ISO C amendement 1. */
+# ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+ problem. Use a reasonable default value. */
+# define CHAR_CLASS_MAX_LENGTH 256
+# endif
+
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
+# else
+# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+# define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+# endif
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+# if !defined _LIBC && !defined getenv
+extern char *getenv ();
+# endif
+
+# ifndef errno
+extern int errno;
+# endif
+
+/* This function doesn't exist on most systems. */
+
+# if !defined HAVE___STRCHRNUL && !defined _LIBC
+static char *
+__strchrnul (s, c)
+ const char *s;
+ int c;
+{
+ char *result = strchr (s, c);
+ if (result == NULL)
+ result = strchr (s, '\0');
+ return result;
+}
+# endif
+
+# ifndef internal_function
+/* Inside GNU libc we mark some function in a special way. In other
+ environments simply ignore the marking. */
+# define internal_function
+# endif
+
+/* Match STRING against the filename pattern PATTERN, returning zero if
+ it matches, nonzero if not. */
+static int internal_fnmatch __P ((const char *pattern, const char *string,
+ int no_leading_period, int flags))
+ internal_function;
+static int
+internal_function
+internal_fnmatch (pattern, string, no_leading_period, flags)
+ const char *pattern;
+ const char *string;
+ int no_leading_period;
+ int flags;
+{
+ register const char *p = pattern, *n = string;
+ register unsigned char c;
+
+/* Note that this evaluates C many times. */
+# ifdef _LIBC
+# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
+# else
+# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
+# endif
+
+ while ((c = *p++) != '\0')
+ {
+ c = FOLD (c);
+
+ switch (c)
+ {
+ case '?':
+ if (*n == '\0')
+ return FNM_NOMATCH;
+ else if (*n == '/' && (flags & FNM_FILE_NAME))
+ return FNM_NOMATCH;
+ else if (*n == '.' && no_leading_period
+ && (n == string
+ || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
+ return FNM_NOMATCH;
+ break;
+
+ case '\\':
+ if (!(flags & FNM_NOESCAPE))
+ {
+ c = *p++;
+ if (c == '\0')
+ /* Trailing \ loses. */
+ return FNM_NOMATCH;
+ c = FOLD (c);
+ }
+ if (FOLD ((unsigned char) *n) != c)
+ return FNM_NOMATCH;
+ break;
+
+ case '*':
+ if (*n == '.' && no_leading_period
+ && (n == string
+ || (n[-1] == '/' && (flags & FNM_FILE_NAME))))
+ return FNM_NOMATCH;
+
+ for (c = *p++; c == '?' || c == '*'; c = *p++)
+ {
+ if (*n == '/' && (flags & FNM_FILE_NAME))
+ /* A slash does not match a wildcard under FNM_FILE_NAME. */
+ return FNM_NOMATCH;
+ else if (c == '?')
+ {
+ /* A ? needs to match one character. */
+ if (*n == '\0')
+ /* There isn't another character; no match. */
+ return FNM_NOMATCH;
+ else
+ /* One character of the string is consumed in matching
+ this ? wildcard, so *??? won't match if there are
+ less than three characters. */
+ ++n;
+ }
+ }
+
+ if (c == '\0')
+ /* The wildcard(s) is/are the last element of the pattern.
+ If the name is a file name and contains another slash
+ this does mean it cannot match. */
+ return ((flags & FNM_FILE_NAME) && strchr (n, '/') != NULL
+ ? FNM_NOMATCH : 0);
+ else
+ {
+ const char *endp;
+
+ endp = __strchrnul (n, (flags & FNM_FILE_NAME) ? '/' : '\0');
+
+ if (c == '[')
+ {
+ int flags2 = ((flags & FNM_FILE_NAME)
+ ? flags : (flags & ~FNM_PERIOD));
+
+ for (--p; n < endp; ++n)
+ if (internal_fnmatch (p, n,
+ (no_leading_period
+ && (n == string
+ || (n[-1] == '/'
+ && (flags
+ & FNM_FILE_NAME)))),
+ flags2)
+ == 0)
+ return 0;
+ }
+ else if (c == '/' && (flags & FNM_FILE_NAME))
+ {
+ while (*n != '\0' && *n != '/')
+ ++n;
+ if (*n == '/'
+ && (internal_fnmatch (p, n + 1, flags & FNM_PERIOD,
+ flags) == 0))
+ return 0;
+ }
+ else
+ {
+ int flags2 = ((flags & FNM_FILE_NAME)
+ ? flags : (flags & ~FNM_PERIOD));
+
+ if (c == '\\' && !(flags & FNM_NOESCAPE))
+ c = *p;
+ c = FOLD (c);
+ for (--p; n < endp; ++n)
+ if (FOLD ((unsigned char) *n) == c
+ && (internal_fnmatch (p, n,
+ (no_leading_period
+ && (n == string
+ || (n[-1] == '/'
+ && (flags
+ & FNM_FILE_NAME)))),
+ flags2) == 0))
+ return 0;
+ }
+ }
+
+ /* If we come here no match is possible with the wildcard. */
+ return FNM_NOMATCH;
+
+ case '[':
+ {
+ /* Nonzero if the sense of the character class is inverted. */
+ static int posixly_correct;
+ register int not;
+ char cold;
+
+ if (posixly_correct == 0)
+ posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
+
+ if (*n == '\0')
+ return FNM_NOMATCH;
+
+ if (*n == '.' && no_leading_period && (n == string
+ || (n[-1] == '/'
+ && (flags
+ & FNM_FILE_NAME))))
+ return FNM_NOMATCH;
+
+ if (*n == '/' && (flags & FNM_FILE_NAME))
+ /* `/' cannot be matched. */
+ return FNM_NOMATCH;
+
+ not = (*p == '!' || (posixly_correct < 0 && *p == '^'));
+ if (not)
+ ++p;
+
+ c = *p++;
+ for (;;)
+ {
+ unsigned char fn = FOLD ((unsigned char) *n);
+
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ {
+ if (*p == '\0')
+ return FNM_NOMATCH;
+ c = FOLD ((unsigned char) *p);
+ ++p;
+
+ if (c == fn)
+ goto matched;
+ }
+ else if (c == '[' && *p == ':')
+ {
+ /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+ size_t c1 = 0;
+# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+ wctype_t wt;
+# endif
+ const char *startp = p;
+
+ for (;;)
+ {
+ if (c1 == CHAR_CLASS_MAX_LENGTH)
+ /* The name is too long and therefore the pattern
+ is ill-formed. */
+ return FNM_NOMATCH;
+
+ c = *++p;
+ if (c == ':' && p[1] == ']')
+ {
+ p += 2;
+ break;
+ }
+ if (c < 'a' || c >= 'z')
+ {
+ /* This cannot possibly be a character class name.
+ Match it as a normal range. */
+ p = startp;
+ c = '[';
+ goto normal_bracket;
+ }
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+ wt = IS_CHAR_CLASS (str);
+ if (wt == 0)
+ /* Invalid character class name. */
+ return FNM_NOMATCH;
+
+ if (__iswctype (__btowc ((unsigned char) *n), wt))
+ goto matched;
+# else
+ if ((STREQ (str, "alnum") && ISALNUM ((unsigned char) *n))
+ || (STREQ (str, "alpha") && ISALPHA ((unsigned char) *n))
+ || (STREQ (str, "blank") && ISBLANK ((unsigned char) *n))
+ || (STREQ (str, "cntrl") && ISCNTRL ((unsigned char) *n))
+ || (STREQ (str, "digit") && ISDIGIT ((unsigned char) *n))
+ || (STREQ (str, "graph") && ISGRAPH ((unsigned char) *n))
+ || (STREQ (str, "lower") && ISLOWER ((unsigned char) *n))
+ || (STREQ (str, "print") && ISPRINT ((unsigned char) *n))
+ || (STREQ (str, "punct") && ISPUNCT ((unsigned char) *n))
+ || (STREQ (str, "space") && ISSPACE ((unsigned char) *n))
+ || (STREQ (str, "upper") && ISUPPER ((unsigned char) *n))
+ || (STREQ (str, "xdigit") && ISXDIGIT ((unsigned char) *n)))
+ goto matched;
+# endif
+ }
+ else if (c == '\0')
+ /* [ (unterminated) loses. */
+ return FNM_NOMATCH;
+ else
+ {
+ normal_bracket:
+ if (FOLD (c) == fn)
+ goto matched;
+
+ cold = c;
+ c = *p++;
+
+ if (c == '-' && *p != ']')
+ {
+ /* It is a range. */
+ unsigned char cend = *p++;
+ if (!(flags & FNM_NOESCAPE) && cend == '\\')
+ cend = *p++;
+ if (cend == '\0')
+ return FNM_NOMATCH;
+
+ if (cold <= fn && fn <= FOLD (cend))
+ goto matched;
+
+ c = *p++;
+ }
+ }
+
+ if (c == ']')
+ break;
+ }
+
+ if (!not)
+ return FNM_NOMATCH;
+ break;
+
+ matched:
+ /* Skip the rest of the [...] that already matched. */
+ while (c != ']')
+ {
+ if (c == '\0')
+ /* [... (unterminated) loses. */
+ return FNM_NOMATCH;
+
+ c = *p++;
+ if (!(flags & FNM_NOESCAPE) && c == '\\')
+ {
+ if (*p == '\0')
+ return FNM_NOMATCH;
+ /* XXX 1003.2d11 is unclear if this is right. */
+ ++p;
+ }
+ else if (c == '[' && *p == ':')
+ {
+ do
+ if (*++p == '\0')
+ return FNM_NOMATCH;
+ while (*p != ':' || p[1] == ']');
+ p += 2;
+ c = *p;
+ }
+ }
+ if (not)
+ return FNM_NOMATCH;
+ }
+ break;
+
+ default:
+ if (c != FOLD ((unsigned char) *n))
+ return FNM_NOMATCH;
+ }
+
+ ++n;
+ }
+
+ if (*n == '\0')
+ return 0;
+
+ if ((flags & FNM_LEADING_DIR) && *n == '/')
+ /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
+ return 0;
+
+ return FNM_NOMATCH;
+
+# undef FOLD
+}
+
+
+int
+fnmatch (pattern, string, flags)
+ const char *pattern;
+ const char *string;
+ int flags;
+{
+ return internal_fnmatch (pattern, string, flags & FNM_PERIOD, flags);
+}
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
--- /dev/null
+/* Copyright (C) 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _FNMATCH_H
+#define _FNMATCH_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined __cplusplus || (defined __STDC__ && __STDC__) || defined WINDOWS32
+# if !defined __GLIBC__ || !defined __P
+# undef __P
+# define __P(protos) protos
+# endif
+#else /* Not C++ or ANSI C. */
+# undef __P
+# define __P(protos) ()
+/* We can get away without defining `const' here only because in this file
+ it is used only inside the prototype for `fnmatch', which is elided in
+ non-ANSI C where `const' is problematical. */
+#endif /* C++ or ANSI C. */
+
+#ifndef const
+# if (defined __STDC__ && __STDC__) || defined __cplusplus
+# define __const const
+# else
+# define __const
+# endif
+#endif
+
+/* We #undef these before defining them because some losing systems
+ (HP-UX A.08.07 for example) define these in <unistd.h>. */
+#undef FNM_PATHNAME
+#undef FNM_NOESCAPE
+#undef FNM_PERIOD
+
+/* Bits set in the FLAGS argument to `fnmatch'. */
+#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
+#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
+#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
+
+#if !defined _POSIX_C_SOURCE || _POSIX_C_SOURCE < 2 || defined _GNU_SOURCE
+# define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
+# define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
+# define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
+#endif
+
+/* Value returned by `fnmatch' if STRING does not match PATTERN. */
+#define FNM_NOMATCH 1
+
+/* This value is returned if the implementation does not support
+ `fnmatch'. Since this is not the case here it will never be
+ returned but the conformance test suites still require the symbol
+ to be defined. */
+#ifdef _XOPEN_SOURCE
+# define FNM_NOSYS (-1)
+#endif
+
+/* Match NAME against the filename pattern PATTERN,
+ returning zero if it matches, FNM_NOMATCH if not. */
+extern int fnmatch __P ((__const char *__pattern, __const char *__name,
+ int __flags));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* fnmatch.h */
+++ /dev/null
-/* Extended regular expression matching and search library,
- version 0.12.
- (Implements POSIX draft P10003.2/D11.2, except for
- internationalization features.)
-
- Copyright (C) 1993 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-/* AIX requires this to be the first thing in the file. */
-#if defined (_AIX) && !defined (REGEX_MALLOC)
- #pragma alloca
-#endif
-
-#define _GNU_SOURCE
-
-/* We need this for `regex.h', and perhaps for the Emacs include files. */
-#include <sys/types.h>
-
-/* We used to test for `BSTRING' here, but only GCC and Emacs define
- `BSTRING', as far as I know, and neither of them use this code. */
-#include <string.h>
-#ifndef bcmp
-#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
-#endif
-#ifndef bcopy
-#define bcopy(s, d, n) memcpy ((d), (s), (n))
-#endif
-#ifndef bzero
-#define bzero(s, n) memset ((s), 0, (n))
-#endif
-
-#include <stdlib.h>
-
-
-/* Define the syntax stuff for \<, \>, etc. */
-
-/* This must be nonzero for the wordchar and notwordchar pattern
- commands in re_match_2. */
-#ifndef Sword
-#define Sword 1
-#endif
-
-#ifdef SYNTAX_TABLE
-
-extern char *re_syntax_table;
-
-#else /* not SYNTAX_TABLE */
-
-/* How many characters in the character set. */
-#define CHAR_SET_SIZE 256
-
-static char re_syntax_table[CHAR_SET_SIZE];
-
-static void
-init_syntax_once ()
-{
- register int c;
- static int done = 0;
-
- if (done)
- return;
-
- bzero (re_syntax_table, sizeof re_syntax_table);
-
- for (c = 'a'; c <= 'z'; c++)
- re_syntax_table[c] = Sword;
-
- for (c = 'A'; c <= 'Z'; c++)
- re_syntax_table[c] = Sword;
-
- for (c = '0'; c <= '9'; c++)
- re_syntax_table[c] = Sword;
-
- re_syntax_table['_'] = Sword;
-
- done = 1;
-}
-
-#endif /* not SYNTAX_TABLE */
-
-#define SYNTAX(c) re_syntax_table[c]
-
-\f
-/* Get the interface, including the syntax bits. */
-#include "regex.h"
-
-/* isalpha etc. are used for the character classes. */
-#include <ctype.h>
-
-#ifndef isascii
-#define isascii(c) 1
-#endif
-
-#ifdef isblank
-#define ISBLANK(c) (isascii (c) && isblank (c))
-#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-#endif
-#ifdef isgraph
-#define ISGRAPH(c) (isascii (c) && isgraph (c))
-#else
-#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
-#endif
-
-#define ISPRINT(c) (isascii (c) && isprint (c))
-#define ISDIGIT(c) (isascii (c) && isdigit (c))
-#define ISALNUM(c) (isascii (c) && isalnum (c))
-#define ISALPHA(c) (isascii (c) && isalpha (c))
-#define ISCNTRL(c) (isascii (c) && iscntrl (c))
-#define ISLOWER(c) (isascii (c) && islower (c))
-#define ISPUNCT(c) (isascii (c) && ispunct (c))
-#define ISSPACE(c) (isascii (c) && isspace (c))
-#define ISUPPER(c) (isascii (c) && isupper (c))
-#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-/* We remove any previous definition of `SIGN_EXTEND_CHAR',
- since ours (we hope) works properly with all combinations of
- machines, compilers, `char' and `unsigned char' argument types.
- (Per Bothner suggested the basic approach.) */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else /* not __STDC__ */
-/* As in Harbison and Steele. */
-#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
-\f
-/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- use `alloca' instead of `malloc'. This is because using malloc in
- re_search* or re_match* could cause memory leaks when C-g is used in
- Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
-
- Because we sometimes use alloca, some routines have to be macros,
- not functions -- `alloca'-allocated space disappears at the end of the
- function it is called in. */
-
-#ifdef REGEX_MALLOC
-
-#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-
-#else /* not REGEX_MALLOC */
-
-/* Emacs already defines alloca, sometimes. */
-#ifndef alloca
-
-/* Make alloca work the best possible way. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else /* not __GNUC__ or HAVE_ALLOCA_H */
-#ifndef _AIX /* Already did AIX, up at the top. */
-char *alloca ();
-#endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
-
-#endif /* not alloca */
-
-#define REGEX_ALLOCATE alloca
-
-/* Assumes a `char *destination' variable. */
-#define REGEX_REALLOCATE(source, osize, nsize) \
- (destination = (char *) alloca (nsize), \
- bcopy (source, destination, osize), \
- destination)
-
-#endif /* not REGEX_MALLOC */
-
-
-/* True if `size1' is non-NULL and PTR is pointing anywhere inside
- `string1' or just past its end. This works if PTR is NULL, which is
- a good thing. */
-#define FIRST_STRING_P(ptr) \
- (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
-
-/* (Re)Allocate N items of type T using malloc, or fail. */
-#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
-#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
-
-#define BYTEWIDTH 8 /* In bits. */
-
-#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
-
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-typedef char boolean;
-#define false 0
-#define true 1
-\f
-/* These are the command codes that appear in compiled regular
- expressions. Some opcodes are followed by argument bytes. A
- command code can specify any interpretation whatsoever for its
- arguments. Zero bytes may appear in the compiled regular expression.
-
- The value of `exactn' is needed in search.c (search_buffer) in Emacs.
- So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
- `exactn' we use here must also be 1. */
-
-typedef enum
-{
- no_op = 0,
-
- /* Followed by one byte giving n, then by n literal bytes. */
- exactn = 1,
-
- /* Matches any (more or less) character. */
- anychar,
-
- /* Matches any one char belonging to specified set. First
- following byte is number of bitmap bytes. Then come bytes
- for a bitmap saying which chars are in. Bits in each byte
- are ordered low-bit-first. A character is in the set if its
- bit is 1. A character too large to have a bit in the map is
- automatically not in the set. */
- charset,
-
- /* Same parameters as charset, but match any character that is
- not one of those specified. */
- charset_not,
-
- /* Start remembering the text that is matched, for storing in a
- register. Followed by one byte with the register number, in
- the range 0 to one less than the pattern buffer's re_nsub
- field. Then followed by one byte with the number of groups
- inner to this one. (This last has to be part of the
- start_memory only because we need it in the on_failure_jump
- of re_match_2.) */
- start_memory,
-
- /* Stop remembering the text that is matched and store it in a
- memory register. Followed by one byte with the register
- number, in the range 0 to one less than `re_nsub' in the
- pattern buffer, and one byte with the number of inner groups,
- just like `start_memory'. (We need the number of inner
- groups here because we don't have any easy way of finding the
- corresponding start_memory when we're at a stop_memory.) */
- stop_memory,
-
- /* Match a duplicate of something remembered. Followed by one
- byte containing the register number. */
- duplicate,
-
- /* Fail unless at beginning of line. */
- begline,
-
- /* Fail unless at end of line. */
- endline,
-
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- of string to be matched (if not). */
- begbuf,
-
- /* Analogously, for end of buffer/string. */
- endbuf,
-
- /* Followed by two byte relative address to which to jump. */
- jump,
-
- /* Same as jump, but marks the end of an alternative. */
- jump_past_alt,
-
- /* Followed by two-byte relative address of place to resume at
- in case of failure. */
- on_failure_jump,
-
- /* Like on_failure_jump, but pushes a placeholder instead of the
- current string position when executed. */
- on_failure_keep_string_jump,
-
- /* Throw away latest failure point and then jump to following
- two-byte relative address. */
- pop_failure_jump,
-
- /* Change to pop_failure_jump if know won't have to backtrack to
- match; otherwise change to jump. This is used to jump
- back to the beginning of a repeat. If what follows this jump
- clearly won't match what the repeat does, such that we can be
- sure that there is no use backtracking out of repetitions
- already matched, then we change it to a pop_failure_jump.
- Followed by two-byte address. */
- maybe_pop_jump,
-
- /* Jump to following two-byte address, and push a dummy failure
- point. This failure point will be thrown away if an attempt
- is made to use it for a failure. A `+' construct makes this
- before the first repeat. Also used as an intermediary kind
- of jump when compiling an alternative. */
- dummy_failure_jump,
-
- /* Push a dummy failure point and continue. Used at the end of
- alternatives. */
- push_dummy_failure,
-
- /* Followed by two-byte relative address and two-byte number n.
- After matching N times, jump to the address upon failure. */
- succeed_n,
-
- /* Followed by two-byte relative address, and two-byte number n.
- Jump to the address N times, then fail. */
- jump_n,
-
- /* Set the following two-byte relative address to the
- subsequent two-byte number. The address *includes* the two
- bytes of number. */
- set_number_at,
-
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
-
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
-
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound /* Succeeds if not at a word boundary. */
-
-#ifdef emacs
- ,before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
-
- /* Matches any character whose syntax is specified. Followed by
- a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
-
- /* Matches any character whose syntax is not that specified. */
- notsyntaxspec
-#endif /* emacs */
-} re_opcode_t;
-\f
-/* Common operations on the compiled pattern. */
-
-/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
-
-#define STORE_NUMBER(destination, number) \
- do { \
- (destination)[0] = (number) & 0377; \
- (destination)[1] = (number) >> 8; \
- } while (0)
-
-/* Same as STORE_NUMBER, except increment DESTINATION to
- the byte after where the number is stored. Therefore, DESTINATION
- must be an lvalue. */
-
-#define STORE_NUMBER_AND_INCR(destination, number) \
- do { \
- STORE_NUMBER (destination, number); \
- (destination) += 2; \
- } while (0)
-
-/* Put into DESTINATION a number stored in two contiguous bytes starting
- at SOURCE. */
-
-#define EXTRACT_NUMBER(destination, source) \
- do { \
- (destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
- } while (0)
-
-#ifdef DEBUG
-static void
-extract_number (dest, source)
- int *dest;
- unsigned char *source;
-{
- int temp = SIGN_EXTEND_CHAR (*(source + 1));
- *dest = *source & 0377;
- *dest += temp << 8;
-}
-
-#ifndef EXTRACT_MACROS /* To debug the macros. */
-#undef EXTRACT_NUMBER
-#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-#endif /* not EXTRACT_MACROS */
-
-#endif /* DEBUG */
-
-/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
- SOURCE must be an lvalue. */
-
-#define EXTRACT_NUMBER_AND_INCR(destination, source) \
- do { \
- EXTRACT_NUMBER (destination, source); \
- (source) += 2; \
- } while (0)
-
-#ifdef DEBUG
-static void
-extract_number_and_incr (destination, source)
- int *destination;
- unsigned char **source;
-{
- extract_number (destination, *source);
- *source += 2;
-}
-
-#ifndef EXTRACT_MACROS
-#undef EXTRACT_NUMBER_AND_INCR
-#define EXTRACT_NUMBER_AND_INCR(dest, src) \
- extract_number_and_incr (&dest, &src)
-#endif /* not EXTRACT_MACROS */
-
-#endif /* DEBUG */
-\f
-/* If DEBUG is defined, Regex prints many voluminous messages about what
- it is doing (if the variable `debug' is nonzero). If linked with the
- main program in `iregex.c', you can enter patterns and strings
- interactively. And if linked with the main program in `main.c' and
- the other test files, you can run the already-written tests. */
-
-#ifdef DEBUG
-
-/* We use standard I/O for debugging. */
-#include <stdio.h>
-
-/* It is useful to test things that ``must'' be true when debugging. */
-#include <assert.h>
-
-static int debug = 0;
-
-#define DEBUG_STATEMENT(e) e
-#define DEBUG_PRINT1(x) if (debug) printf (x)
-#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
- if (debug) print_partial_compiled_pattern (s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
- if (debug) print_double_string (w, s1, sz1, s2, sz2)
-
-
-extern void printchar ();
-
-/* Print the fastmap in human-readable form. */
-
-void
-print_fastmap (fastmap)
- char *fastmap;
-{
- unsigned was_a_range = 0;
- unsigned i = 0;
-
- while (i < (1 << BYTEWIDTH))
- {
- if (fastmap[i++])
- {
- was_a_range = 0;
- printchar (i - 1);
- while (i < (1 << BYTEWIDTH) && fastmap[i])
- {
- was_a_range = 1;
- i++;
- }
- if (was_a_range)
- {
- printf ("-");
- printchar (i - 1);
- }
- }
- }
- putchar ('\n');
-}
-
-
-/* Print a compiled pattern string in human-readable form, starting at
- the START pointer into it and ending just before the pointer END. */
-
-void
-print_partial_compiled_pattern (start, end)
- unsigned char *start;
- unsigned char *end;
-{
- int mcnt, mcnt2;
- unsigned char *p = start;
- unsigned char *pend = end;
-
- if (start == NULL)
- {
- printf ("(null)\n");
- return;
- }
-
- /* Loop over pattern commands. */
- while (p < pend)
- {
- switch ((re_opcode_t) *p++)
- {
- case no_op:
- printf ("/no_op");
- break;
-
- case exactn:
- mcnt = *p++;
- printf ("/exactn/%d", mcnt);
- do
- {
- putchar ('/');
- printchar (*p++);
- }
- while (--mcnt);
- break;
-
- case start_memory:
- mcnt = *p++;
- printf ("/start_memory/%d/%d", mcnt, *p++);
- break;
-
- case stop_memory:
- mcnt = *p++;
- printf ("/stop_memory/%d/%d", mcnt, *p++);
- break;
-
- case duplicate:
- printf ("/duplicate/%d", *p++);
- break;
-
- case anychar:
- printf ("/anychar");
- break;
-
- case charset:
- case charset_not:
- {
- register int c;
-
- printf ("/charset%s",
- (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
-
- assert (p + *p < pend);
-
- for (c = 0; c < *p; c++)
- {
- unsigned bit;
- unsigned char map_byte = p[1 + c];
-
- putchar ('/');
-
- for (bit = 0; bit < BYTEWIDTH; bit++)
- if (map_byte & (1 << bit))
- printchar (c * BYTEWIDTH + bit);
- }
- p += 1 + *p;
- break;
- }
-
- case begline:
- printf ("/begline");
- break;
-
- case endline:
- printf ("/endline");
- break;
-
- case on_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump/0/%d", mcnt);
- break;
-
- case on_failure_keep_string_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_keep_string_jump/0/%d", mcnt);
- break;
-
- case dummy_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/dummy_failure_jump/0/%d", mcnt);
- break;
-
- case push_dummy_failure:
- printf ("/push_dummy_failure");
- break;
-
- case maybe_pop_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/maybe_pop_jump/0/%d", mcnt);
- break;
-
- case pop_failure_jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/pop_failure_jump/0/%d", mcnt);
- break;
-
- case jump_past_alt:
- extract_number_and_incr (&mcnt, &p);
- printf ("/jump_past_alt/0/%d", mcnt);
- break;
-
- case jump:
- extract_number_and_incr (&mcnt, &p);
- printf ("/jump/0/%d", mcnt);
- break;
-
- case succeed_n:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case jump_n:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case set_number_at:
- extract_number_and_incr (&mcnt, &p);
- extract_number_and_incr (&mcnt2, &p);
- printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
- break;
-
- case wordbound:
- printf ("/wordbound");
- break;
-
- case notwordbound:
- printf ("/notwordbound");
- break;
-
- case wordbeg:
- printf ("/wordbeg");
- break;
-
- case wordend:
- printf ("/wordend");
-
-#ifdef emacs
- case before_dot:
- printf ("/before_dot");
- break;
-
- case at_dot:
- printf ("/at_dot");
- break;
-
- case after_dot:
- printf ("/after_dot");
- break;
-
- case syntaxspec:
- printf ("/syntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
-
- case notsyntaxspec:
- printf ("/notsyntaxspec");
- mcnt = *p++;
- printf ("/%d", mcnt);
- break;
-#endif /* emacs */
-
- case wordchar:
- printf ("/wordchar");
- break;
-
- case notwordchar:
- printf ("/notwordchar");
- break;
-
- case begbuf:
- printf ("/begbuf");
- break;
-
- case endbuf:
- printf ("/endbuf");
- break;
-
- default:
- printf ("?%d", *(p-1));
- }
- }
- printf ("/\n");
-}
-
-
-void
-print_compiled_pattern (bufp)
- struct re_pattern_buffer *bufp;
-{
- unsigned char *buffer = bufp->buffer;
-
- print_partial_compiled_pattern (buffer, buffer + bufp->used);
- printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
-
- if (bufp->fastmap_accurate && bufp->fastmap)
- {
- printf ("fastmap: ");
- print_fastmap (bufp->fastmap);
- }
-
- printf ("re_nsub: %d\t", bufp->re_nsub);
- printf ("regs_alloc: %d\t", bufp->regs_allocated);
- printf ("can_be_null: %d\t", bufp->can_be_null);
- printf ("newline_anchor: %d\n", bufp->newline_anchor);
- printf ("no_sub: %d\t", bufp->no_sub);
- printf ("not_bol: %d\t", bufp->not_bol);
- printf ("not_eol: %d\t", bufp->not_eol);
- printf ("syntax: %d\n", bufp->syntax);
- /* Perhaps we should print the translate table? */
-}
-
-
-void
-print_double_string (where, string1, size1, string2, size2)
- const char *where;
- const char *string1;
- const char *string2;
- int size1;
- int size2;
-{
- unsigned this_char;
-
- if (where == NULL)
- printf ("(null)");
- else
- {
- if (FIRST_STRING_P (where))
- {
- for (this_char = where - string1; this_char < size1; this_char++)
- printchar (string1[this_char]);
-
- where = string2;
- }
-
- for (this_char = where - string2; this_char < size2; this_char++)
- printchar (string2[this_char]);
- }
-}
-
-#else /* not DEBUG */
-
-#undef assert
-#define assert(e)
-
-#define DEBUG_STATEMENT(e)
-#define DEBUG_PRINT1(x)
-#define DEBUG_PRINT2(x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
-
-#endif /* not DEBUG */
-\f
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
-reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
-
-
-/* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
-
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex.h. We return the old syntax. */
-
-reg_syntax_t
-re_set_syntax (syntax)
- reg_syntax_t syntax;
-{
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
-}
-\f
-/* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there. */
-
-static const char *re_error_msg[] =
- { NULL, /* REG_NOERROR */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \\(", /* REG_EPAREN */
- "Unmatched \\{", /* REG_EBRACE */
- "Invalid content of \\{\\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \\)", /* REG_ERPAREN */
- };
-\f
-/* Subroutine declarations and macros for regex_compile. */
-
-static void store_op1 (), store_op2 ();
-static void insert_op1 (), insert_op2 ();
-static boolean at_begline_loc_p (), at_endline_loc_p ();
-static boolean group_in_compile_stack ();
-static reg_errcode_t compile_range ();
-
-/* Fetch the next character in the uncompiled pattern---translating it
- if necessary. Also cast from a signed character in the constant
- string passed to us by the user to an unsigned char that we can use
- as an array index (in, e.g., `translate'). */
-#define PATFETCH(c) \
- do {if (p == pend) return REG_EEND; \
- c = (unsigned char) *p++; \
- if (translate) c = translate[c]; \
- } while (0)
-
-/* Fetch the next character in the uncompiled pattern, with no
- translation. */
-#define PATFETCH_RAW(c) \
- do {if (p == pend) return REG_EEND; \
- c = (unsigned char) *p++; \
- } while (0)
-
-/* Go backwards one character in the pattern. */
-#define PATUNFETCH p--
-
-
-/* If `translate' is non-null, return translate[D], else just D. We
- cast the subscript to translate because some data is declared as
- `char *', to avoid warnings when a string constant is passed. But
- when we use a character as a subscript we must make it unsigned. */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
-
-
-/* Macros for outputting the compiled pattern into `buffer'. */
-
-/* If the buffer isn't allocated when it comes in, use this. */
-#define INIT_BUF_SIZE 32
-
-/* Make sure we have at least N more bytes of space in buffer. */
-#define GET_BUFFER_SPACE(n) \
- while (b - bufp->buffer + (n) > bufp->allocated) \
- EXTEND_BUFFER ()
-
-/* Make sure we have one more byte of buffer space and then add C to it. */
-#define BUF_PUSH(c) \
- do { \
- GET_BUFFER_SPACE (1); \
- *b++ = (unsigned char) (c); \
- } while (0)
-
-
-/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
-#define BUF_PUSH_2(c1, c2) \
- do { \
- GET_BUFFER_SPACE (2); \
- *b++ = (unsigned char) (c1); \
- *b++ = (unsigned char) (c2); \
- } while (0)
-
-
-/* As with BUF_PUSH_2, except for three bytes. */
-#define BUF_PUSH_3(c1, c2, c3) \
- do { \
- GET_BUFFER_SPACE (3); \
- *b++ = (unsigned char) (c1); \
- *b++ = (unsigned char) (c2); \
- *b++ = (unsigned char) (c3); \
- } while (0)
-
-
-/* Store a jump with opcode OP at LOC to location TO. We store a
- relative address offset by the three bytes the jump itself occupies. */
-#define STORE_JUMP(op, loc, to) \
- store_op1 (op, loc, (to) - (loc) - 3)
-
-/* Likewise, for a two-argument jump. */
-#define STORE_JUMP2(op, loc, to, arg) \
- store_op2 (op, loc, (to) - (loc) - 3, arg)
-
-/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
-#define INSERT_JUMP(op, loc, to) \
- insert_op1 (op, loc, (to) - (loc) - 3, b)
-
-/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
-#define INSERT_JUMP2(op, loc, to, arg) \
- insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
-
-
-/* This is not an arbitrary limit: the arguments which represent offsets
- into the pattern are two bytes long. So if 2^16 bytes turns out to
- be too small, many things would have to change. */
-#define MAX_BUF_SIZE (1L << 16)
-
-
-/* Extend the buffer by twice its current size via realloc and
- reset the pointers that pointed into the old block to point to the
- correct places in the new one. If extending the buffer results in it
- being larger than MAX_BUF_SIZE, then flag memory exhausted. */
-#define EXTEND_BUFFER() \
- do { \
- unsigned char *old_buffer = bufp->buffer; \
- if (bufp->allocated == MAX_BUF_SIZE) \
- return REG_ESIZE; \
- bufp->allocated <<= 1; \
- if (bufp->allocated > MAX_BUF_SIZE) \
- bufp->allocated = MAX_BUF_SIZE; \
- bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
- if (bufp->buffer == NULL) \
- return REG_ESPACE; \
- /* If the buffer moved, move all the pointers into it. */ \
- if (old_buffer != bufp->buffer) \
- { \
- b = (b - old_buffer) + bufp->buffer; \
- begalt = (begalt - old_buffer) + bufp->buffer; \
- if (fixup_alt_jump) \
- fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
- if (laststart) \
- laststart = (laststart - old_buffer) + bufp->buffer; \
- if (pending_exact) \
- pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
- } \
- } while (0)
-
-
-/* Since we have one byte reserved for the register number argument to
- {start,stop}_memory, the maximum number of groups we can report
- things about is what fits in that byte. */
-#define MAX_REGNUM 255
-
-/* But patterns can have more than `MAX_REGNUM' registers. We just
- ignore the excess. */
-typedef unsigned regnum_t;
-
-
-/* Macros for the compile stack. */
-
-/* Since offsets can go either forwards or backwards, this type needs to
- be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
-typedef int pattern_offset_t;
-
-typedef struct
-{
- pattern_offset_t begalt_offset;
- pattern_offset_t fixup_alt_jump;
- pattern_offset_t inner_group_offset;
- pattern_offset_t laststart_offset;
- regnum_t regnum;
-} compile_stack_elt_t;
-
-
-typedef struct
-{
- compile_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
-} compile_stack_type;
-
-
-#define INIT_COMPILE_STACK_SIZE 32
-
-#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
-#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
-
-/* The next available element. */
-#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
-
-
-/* Set the bit for character C in a list. */
-#define SET_LIST_BIT(c) \
- (b[((unsigned char) (c)) / BYTEWIDTH] \
- |= 1 << (((unsigned char) c) % BYTEWIDTH))
-
-
-/* Get the next unsigned number in the uncompiled pattern. */
-#define GET_UNSIGNED_NUMBER(num) \
- { if (p != pend) \
- { \
- PATFETCH (c); \
- while (ISDIGIT (c)) \
- { \
- if (num < 0) \
- num = 0; \
- num = num * 10 + c - '0'; \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- } \
- }
-
-#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
-
-#define IS_CHAR_CLASS(string) \
- (STREQ (string, "alpha") || STREQ (string, "upper") \
- || STREQ (string, "lower") || STREQ (string, "digit") \
- || STREQ (string, "alnum") || STREQ (string, "xdigit") \
- || STREQ (string, "space") || STREQ (string, "print") \
- || STREQ (string, "punct") || STREQ (string, "graph") \
- || STREQ (string, "cntrl") || STREQ (string, "blank"))
-\f
-/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
- Returns one of error codes defined in `regex.h', or zero for success.
-
- Assumes the `allocated' (and perhaps `buffer') and `translate'
- fields are set in BUFP on entry.
-
- If it succeeds, results are put in BUFP (if it returns an error, the
- contents of BUFP are undefined):
- `buffer' is the compiled pattern;
- `syntax' is set to SYNTAX;
- `used' is set to the length of the compiled pattern;
- `fastmap_accurate' is zero;
- `re_nsub' is the number of subexpressions in PATTERN;
- `not_bol' and `not_eol' are zero;
-
- The `fastmap' and `newline_anchor' fields are neither
- examined nor set. */
-
-static reg_errcode_t
-regex_compile (pattern, size, syntax, bufp)
- const char *pattern;
- int size;
- reg_syntax_t syntax;
- struct re_pattern_buffer *bufp;
-{
- /* We fetch characters from PATTERN here. Even though PATTERN is
- `char *' (i.e., signed), we declare these variables as unsigned, so
- they can be reliably used as array indices. */
- register unsigned char c, c1;
-
- /* A random tempory spot in PATTERN. */
- const char *p1;
-
- /* Points to the end of the buffer, where we should append. */
- register unsigned char *b;
-
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
-
- /* Points to the current (ending) position in the pattern. */
- const char *p = pattern;
- const char *pend = pattern + size;
-
- /* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
-
- /* Address of the count-byte of the most recently inserted `exactn'
- command. This makes it possible to tell if a new exact-match
- character can be added to that command or if the character requires
- a new `exactn' command. */
- unsigned char *pending_exact = 0;
-
- /* Address of start of the most recently finished expression.
- This tells, e.g., postfix * where to find the start of its
- operand. Reset at the beginning of groups and alternatives. */
- unsigned char *laststart = 0;
-
- /* Address of beginning of regexp, or inside of last group. */
- unsigned char *begalt;
-
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
- const char *beg_interval;
-
- /* Address of the place where a forward jump should go to the end of
- the containing expression. Each alternative of an `or' -- except the
- last -- ends with a forward jump of this sort. */
- unsigned char *fixup_alt_jump = 0;
-
- /* Counts open-groups as they are encountered. Remembered for the
- matching close-group on the compile stack, so the same register
- number is put in the stop_memory as the start_memory. */
- regnum_t regnum = 0;
-
-#ifdef DEBUG
- DEBUG_PRINT1 ("\nCompiling pattern: ");
- if (debug)
- {
- unsigned debug_count;
-
- for (debug_count = 0; debug_count < size; debug_count++)
- printchar (pattern[debug_count]);
- putchar ('\n');
- }
-#endif /* DEBUG */
-
- /* Initialize the compile stack. */
- compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
-
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
-
- /* Initialize the pattern buffer. */
- bufp->syntax = syntax;
- bufp->fastmap_accurate = 0;
- bufp->not_bol = bufp->not_eol = 0;
-
- /* Set `used' to zero, so that if we return an error, the pattern
- printer (for debugging) will think there's no pattern. We reset it
- at the end. */
- bufp->used = 0;
-
- /* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
-
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
- /* Initialize the syntax table. */
- init_syntax_once ();
-#endif
-
- if (bufp->allocated == 0)
- {
- if (bufp->buffer)
- { /* If zero allocated, but buffer is non-null, try to realloc
- enough space. This loses if buffer's address is bogus, but
- that is the user's responsibility. */
- RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
- }
- else
- { /* Caller did not allocate a buffer. Do it for them. */
- bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
- }
- if (!bufp->buffer) return REG_ESPACE;
-
- bufp->allocated = INIT_BUF_SIZE;
- }
-
- begalt = b = bufp->buffer;
-
- /* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend)
- {
- PATFETCH (c);
-
- switch (c)
- {
- case '^':
- {
- if ( /* If at start of pattern, it's an operator. */
- p == pattern + 1
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's come before. */
- || at_begline_loc_p (pattern, p, syntax))
- BUF_PUSH (begline);
- else
- goto normal_char;
- }
- break;
-
-
- case '$':
- {
- if ( /* If at end of pattern, it's an operator. */
- p == pend
- /* If context independent, it's an operator. */
- || syntax & RE_CONTEXT_INDEP_ANCHORS
- /* Otherwise, depends on what's next. */
- || at_endline_loc_p (p, pend, syntax))
- BUF_PUSH (endline);
- else
- goto normal_char;
- }
- break;
-
-
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (!laststart)
- {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
- }
-
- {
- /* Are we optimizing this jump? */
- boolean keep_string_p = false;
-
- /* 1 means zero (many) matches is allowed. */
- char zero_times_ok = 0, many_times_ok = 0;
-
- /* If there is a sequence of repetition chars, collapse it
- down to just one (the right one). We can't combine
- interval operators with these because of, e.g., `a{2}*',
- which should only match an even number of `a's. */
-
- for (;;)
- {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
-
- if (p == pend)
- break;
-
- PATFETCH (c);
-
- if (c == '*'
- || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
- ;
-
- else if (syntax & RE_BK_PLUS_QM && c == '\\')
- {
- if (p == pend) return REG_EESCAPE;
-
- PATFETCH (c1);
- if (!(c1 == '+' || c1 == '?'))
- {
- PATUNFETCH;
- PATUNFETCH;
- break;
- }
-
- c = c1;
- }
- else
- {
- PATUNFETCH;
- break;
- }
-
- /* If we get here, we found another repeat character. */
- }
-
- /* Star, etc. applied to an empty pattern is equivalent
- to an empty pattern. */
- if (!laststart)
- break;
-
- /* Now we know whether or not zero matches is allowed
- and also whether or not two or more matches is allowed. */
- if (many_times_ok)
- { /* More than one repetition is allowed, so put in at the
- end a backward relative jump from `b' to before the next
- jump we're going to put in below (which jumps from
- laststart to after this jump).
-
- But if we are at the `*' in the exact sequence `.*\n',
- insert an unconditional jump backwards to the .,
- instead of the beginning of the loop. This way we only
- push a failure point once, instead of every time
- through the loop. */
- assert (p - 1 > pattern);
-
- /* Allocate the space for the jump. */
- GET_BUFFER_SPACE (3);
-
- /* We know we are not at the first character of the pattern,
- because laststart was nonzero. And we've already
- incremented `p', by the way, to be the character after
- the `*'. Do we have to do something analogous here
- for null bytes, because of RE_DOT_NOT_NULL? */
- if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
- && zero_times_ok
- && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
- && !(syntax & RE_DOT_NEWLINE))
- { /* We have .*\n. */
- STORE_JUMP (jump, b, laststart);
- keep_string_p = true;
- }
- else
- /* Anything else. */
- STORE_JUMP (maybe_pop_jump, b, laststart - 3);
-
- /* We've added more stuff to the buffer. */
- b += 3;
- }
-
- /* On failure, jump from laststart to b + 3, which will be the
- end of the buffer after this jump is inserted. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
- : on_failure_jump,
- laststart, b + 3);
- pending_exact = 0;
- b += 3;
-
- if (!zero_times_ok)
- {
- /* At least one repetition is required, so insert a
- `dummy_failure_jump' before the initial
- `on_failure_jump' instruction of the loop. This
- effects a skip over that instruction the first time
- we hit that loop. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
- b += 3;
- }
- }
- break;
-
-
- case '.':
- laststart = b;
- BUF_PUSH (anychar);
- break;
-
-
- case '[':
- {
- boolean had_char_class = false;
-
- if (p == pend) return REG_EBRACK;
-
- /* Ensure that we have enough space to push a charset: the
- opcode, the length count, and the bitset; 34 bytes in all. */
- GET_BUFFER_SPACE (34);
-
- laststart = b;
-
- /* We test `*p == '^' twice, instead of using an if
- statement, so we only need one BUF_PUSH. */
- BUF_PUSH (*p == '^' ? charset_not : charset);
- if (*p == '^')
- p++;
-
- /* Remember the first position in the bracket expression. */
- p1 = p;
-
- /* Push the number of bytes in the bitmap. */
- BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
-
- /* Clear the whole map. */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
-
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-2] == charset_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
- SET_LIST_BIT ('\n');
-
- /* Read in characters and ranges, setting map bits. */
- for (;;)
- {
- if (p == pend) return REG_EBRACK;
-
- PATFETCH (c);
-
- /* \ might escape characters inside [...] and [^...]. */
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
- {
- if (p == pend) return REG_EESCAPE;
-
- PATFETCH (c1);
- SET_LIST_BIT (c1);
- continue;
- }
-
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- break;
-
- /* Look ahead to see if it's a range when the last thing
- was a character class. */
- if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
-
- /* Look ahead to see if it's a range when the last thing
- was a character: if this is a hyphen not at the
- beginning or the end of a list, then it's the range
- operator. */
- if (c == '-'
- && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
- && *p != ']')
- {
- reg_errcode_t ret
- = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
- }
-
- else if (p[0] == '-' && p[1] != ']')
- { /* This handles ranges made up of characters only. */
- reg_errcode_t ret;
-
- /* Move past the `-'. */
- PATFETCH (c1);
-
- ret = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
- }
-
- /* See if we're at the beginning of a possible character
- class. */
-
- else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
- { /* Leave room for the null. */
- char str[CHAR_CLASS_MAX_LENGTH + 1];
-
- PATFETCH (c);
- c1 = 0;
-
- /* If pattern is `[[:'. */
- if (p == pend) return REG_EBRACK;
-
- for (;;)
- {
- PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = c;
- }
- str[c1] = '\0';
-
- /* If isn't a word bracketed by `[:' and:`]':
- undo the ending character, the letters, and leave
- the leading `:' and `[' (but set bits for them). */
- if (c == ':' && *p == ']')
- {
- int ch;
- boolean is_alnum = STREQ (str, "alnum");
- boolean is_alpha = STREQ (str, "alpha");
- boolean is_blank = STREQ (str, "blank");
- boolean is_cntrl = STREQ (str, "cntrl");
- boolean is_digit = STREQ (str, "digit");
- boolean is_graph = STREQ (str, "graph");
- boolean is_lower = STREQ (str, "lower");
- boolean is_print = STREQ (str, "print");
- boolean is_punct = STREQ (str, "punct");
- boolean is_space = STREQ (str, "space");
- boolean is_upper = STREQ (str, "upper");
- boolean is_xdigit = STREQ (str, "xdigit");
-
- if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
-
- /* Throw away the ] at the end of the character
- class. */
- PATFETCH (c);
-
- if (p == pend) return REG_EBRACK;
-
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
- {
- if ( (is_alnum && ISALNUM (ch))
- || (is_alpha && ISALPHA (ch))
- || (is_blank && ISBLANK (ch))
- || (is_cntrl && ISCNTRL (ch))
- || (is_digit && ISDIGIT (ch))
- || (is_graph && ISGRAPH (ch))
- || (is_lower && ISLOWER (ch))
- || (is_print && ISPRINT (ch))
- || (is_punct && ISPUNCT (ch))
- || (is_space && ISSPACE (ch))
- || (is_upper && ISUPPER (ch))
- || (is_xdigit && ISXDIGIT (ch)))
- SET_LIST_BIT (ch);
- }
- had_char_class = true;
- }
- else
- {
- c1++;
- while (c1--)
- PATUNFETCH;
- SET_LIST_BIT ('[');
- SET_LIST_BIT (':');
- had_char_class = false;
- }
- }
- else
- {
- had_char_class = false;
- SET_LIST_BIT (c);
- }
- }
-
- /* Discard any (non)matching list bytes that are all 0 at the
- end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- }
- break;
-
-
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_open;
- else
- goto normal_char;
-
-
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_close;
- else
- goto normal_char;
-
-
- case '\n':
- if (syntax & RE_NEWLINE_ALT)
- goto handle_alt;
- else
- goto normal_char;
-
-
- case '|':
- if (syntax & RE_NO_BK_VBAR)
- goto handle_alt;
- else
- goto normal_char;
-
-
- case '{':
- if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
- goto handle_interval;
- else
- goto normal_char;
-
-
- case '\\':
- if (p == pend) return REG_EESCAPE;
-
- /* Do not translate the character after the \, so that we can
- distinguish, e.g., \B from \b, even if we normally would
- translate, e.g., B to b. */
- PATFETCH_RAW (c);
-
- switch (c)
- {
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
-
- handle_open:
- bufp->re_nsub++;
- regnum++;
-
- if (COMPILE_STACK_FULL)
- {
- RETALLOC (compile_stack.stack, compile_stack.size << 1,
- compile_stack_elt_t);
- if (compile_stack.stack == NULL) return REG_ESPACE;
-
- compile_stack.size <<= 1;
- }
-
- /* These are the values to restore when we hit end of this
- group. They are all relative offsets, so that if the
- whole pattern moves because of realloc, they will still
- be valid. */
- COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
- COMPILE_STACK_TOP.fixup_alt_jump
- = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
- COMPILE_STACK_TOP.regnum = regnum;
-
- /* We will eventually replace the 0 with the number of
- groups inner to this one. But do not push a
- start_memory for groups beyond the last one we can
- represent in the compiled pattern. */
- if (regnum <= MAX_REGNUM)
- {
- COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
- BUF_PUSH_3 (start_memory, regnum, 0);
- }
-
- compile_stack.avail++;
-
- fixup_alt_jump = 0;
- laststart = 0;
- begalt = b;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
- break;
-
-
- case ')':
- if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
-
- if (COMPILE_STACK_EMPTY)
- {
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backslash;
- else
- return REG_ERPAREN;
- }
-
- handle_close:
- if (fixup_alt_jump)
- { /* Push a dummy failure point at the end of the
- alternative for a possible future
- `pop_failure_jump' to pop. See comments at
- `push_dummy_failure' in `re_match_2'. */
- BUF_PUSH (push_dummy_failure);
-
- /* We allocated space for this jump when we assigned
- to `fixup_alt_jump', in the `handle_alt' case below. */
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
- }
-
- /* See similar code for backslashed left paren above. */
- if (COMPILE_STACK_EMPTY)
- {
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- return REG_ERPAREN;
- }
-
- /* Since we just checked for an empty stack above, this
- ``can't happen''. */
- assert (compile_stack.avail != 0);
- {
- /* We don't just want to restore into `regnum', because
- later groups should continue to be numbered higher,
- as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
-
- compile_stack.avail--;
- begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
- fixup_alt_jump
- = COMPILE_STACK_TOP.fixup_alt_jump
- ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
- : 0;
- laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
-
- /* We're at the end of the group, so now we know how many
- groups were inside this one. */
- if (this_group_regnum <= MAX_REGNUM)
- {
- unsigned char *inner_group_loc
- = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
-
- *inner_group_loc = regnum - this_group_regnum;
- BUF_PUSH_3 (stop_memory, this_group_regnum,
- regnum - this_group_regnum);
- }
- }
- break;
-
-
- case '|': /* `\|'. */
- if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
- goto normal_backslash;
- handle_alt:
- if (syntax & RE_LIMITED_OPS)
- goto normal_char;
-
- /* Insert before the previous alternative a jump which
- jumps to this alternative if the former fails. */
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (on_failure_jump, begalt, b + 6);
- pending_exact = 0;
- b += 3;
-
- /* The alternative before this one has a jump after it
- which gets executed if it gets matched. Adjust that
- jump so it will jump to this alternative's analogous
- jump (put in below, which in turn will jump to the next
- (if any) alternative's such jump, etc.). The last such
- jump jumps to the correct final destination. A picture:
- _____ _____
- | | | |
- | v | v
- a | b | c
-
- If we are at `b', then fixup_alt_jump right now points to a
- three-byte space after `a'. We'll put in the jump, set
- fixup_alt_jump to right after `b', and leave behind three
- bytes which we'll fill in when we get to after `c'. */
-
- if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
-
- /* Mark and leave space for a jump after this alternative,
- to be filled in later either by next alternative or
- when know we're at the end of a series of alternatives. */
- fixup_alt_jump = b;
- GET_BUFFER_SPACE (3);
- b += 3;
-
- laststart = 0;
- begalt = b;
- break;
-
-
- case '{':
- /* If \{ is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at `\{' and it's not the open-interval
- operator. */
- || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- || (p - 2 == pattern && p == pend))
- goto normal_backslash;
-
- handle_interval:
- {
- /* If got here, then the syntax allows intervals. */
-
- /* At least (most) this many matches must be made. */
- int lower_bound = -1, upper_bound = -1;
-
- beg_interval = p - 1;
-
- if (p == pend)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_EBRACE;
- }
-
- GET_UNSIGNED_NUMBER (lower_bound);
-
- if (c == ',')
- {
- GET_UNSIGNED_NUMBER (upper_bound);
- if (upper_bound < 0) upper_bound = RE_DUP_MAX;
- }
- else
- /* Interval such as `{1}' => match exactly once. */
- upper_bound = lower_bound;
-
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
-
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (c != '\\') return REG_EBRACE;
-
- PATFETCH (c);
- }
-
- if (c != '}')
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- return REG_BADBR;
- }
-
- /* We just parsed a valid interval. */
-
- /* If it's invalid to have no preceding re. */
- if (!laststart)
- {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
- else
- goto unfetch_interval;
- }
-
- /* If the upper bound is zero, don't want to succeed at
- all; jump from `laststart' to `b + 3', which will be
- the end of the buffer after we insert the jump. */
- if (upper_bound == 0)
- {
- GET_BUFFER_SPACE (3);
- INSERT_JUMP (jump, laststart, b + 3);
- b += 3;
- }
-
- /* Otherwise, we have a nontrivial interval. When
- we're all done, the pattern will look like:
- set_number_at <jump count> <upper bound>
- set_number_at <succeed_n count> <lower bound>
- succeed_n <after jump addr> <succed_n count>
- <body of loop>
- jump_n <succeed_n addr> <jump count>
- (The upper bound and `jump_n' are omitted if
- `upper_bound' is 1, though.) */
- else
- { /* If the upper bound is > 1, we need to insert
- more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
-
- GET_BUFFER_SPACE (nbytes);
-
- /* Initialize lower bound of the `succeed_n', even
- though it will be set during matching by its
- attendant `set_number_at' (inserted next),
- because `re_compile_fastmap' needs to know.
- Jump to the `jump_n' we might insert below. */
- INSERT_JUMP2 (succeed_n, laststart,
- b + 5 + (upper_bound > 1) * 5,
- lower_bound);
- b += 5;
-
- /* Code to initialize the lower bound. Insert
- before the `succeed_n'. The `5' is the last two
- bytes of this `set_number_at', plus 3 bytes of
- the following `succeed_n'. */
- insert_op2 (set_number_at, laststart, 5, lower_bound, b);
- b += 5;
-
- if (upper_bound > 1)
- { /* More than one repetition is allowed, so
- append a backward jump to the `succeed_n'
- that starts this interval.
-
- When we've reached this during matching,
- we'll have matched the interval once, so
- jump back only `upper_bound - 1' times. */
- STORE_JUMP2 (jump_n, b, laststart + 5,
- upper_bound - 1);
- b += 5;
-
- /* The location we want to set is the second
- parameter of the `jump_n'; that is `b-2' as
- an absolute address. `laststart' will be
- the `set_number_at' we're about to insert;
- `laststart+3' the number to set, the source
- for the relative address. But we are
- inserting into the middle of the pattern --
- so everything is getting moved up by 5.
- Conclusion: (b - 2) - (laststart + 3) + 5,
- i.e., b - laststart.
-
- We insert this at the beginning of the loop
- so that if we fail during matching, we'll
- reinitialize the bounds. */
- insert_op2 (set_number_at, laststart, b - laststart,
- upper_bound - 1, b);
- b += 5;
- }
- }
- pending_exact = 0;
- beg_interval = NULL;
- }
- break;
-
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert (beg_interval);
- p = beg_interval;
- beg_interval = NULL;
-
- /* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
-
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (p > pattern && p[-1] == '\\')
- goto normal_backslash;
- }
- goto normal_char;
-
-#ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- operators. rms says this is ok. --karl */
- case '=':
- BUF_PUSH (at_dot);
- break;
-
- case 's':
- laststart = b;
- PATFETCH (c);
- BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
- break;
-
- case 'S':
- laststart = b;
- PATFETCH (c);
- BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
- break;
-#endif /* emacs */
-
-
- case 'w':
- laststart = b;
- BUF_PUSH (wordchar);
- break;
-
-
- case 'W':
- laststart = b;
- BUF_PUSH (notwordchar);
- break;
-
-
- case '<':
- BUF_PUSH (wordbeg);
- break;
-
- case '>':
- BUF_PUSH (wordend);
- break;
-
- case 'b':
- BUF_PUSH (wordbound);
- break;
-
- case 'B':
- BUF_PUSH (notwordbound);
- break;
-
- case '`':
- BUF_PUSH (begbuf);
- break;
-
- case '\'':
- BUF_PUSH (endbuf);
- break;
-
- case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
-
- c1 = c - '0';
-
- if (c1 > regnum)
- return REG_ESUBREG;
-
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack (compile_stack, c1))
- goto normal_char;
-
- laststart = b;
- BUF_PUSH_2 (duplicate, c1);
- break;
-
-
- case '+':
- case '?':
- if (syntax & RE_BK_PLUS_QM)
- goto handle_plus;
- else
- goto normal_backslash;
-
- default:
- normal_backslash:
- /* You might think it would be useful for \ to mean
- not to translate; but if we don't translate it
- it will never match anything. */
- c = TRANSLATE (c);
- goto normal_char;
- }
- break;
-
-
- default:
- /* Expects the character in `c'. */
- normal_char:
- /* If no exactn currently being built. */
- if (!pending_exact
-
- /* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
-
- /* We have only one byte following the exactn for the count. */
- || *pending_exact == (1 << BYTEWIDTH) - 1
-
- /* If followed by a repetition operator. */
- || *p == '*' || *p == '^'
- || ((syntax & RE_BK_PLUS_QM)
- ? *p == '\\' && (p[1] == '+' || p[1] == '?')
- : (*p == '+' || *p == '?'))
- || ((syntax & RE_INTERVALS)
- && ((syntax & RE_NO_BK_BRACES)
- ? *p == '{'
- : (p[0] == '\\' && p[1] == '{'))))
- {
- /* Start building a new exactn. */
-
- laststart = b;
-
- BUF_PUSH_2 (exactn, 0);
- pending_exact = b - 1;
- }
-
- BUF_PUSH (c);
- (*pending_exact)++;
- break;
- } /* switch (c) */
- } /* while p != pend */
-
-
- /* Through the pattern now. */
-
- if (fixup_alt_jump)
- STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
-
- if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
-
- free (compile_stack.stack);
-
- /* We have succeeded; set the length of the buffer. */
- bufp->used = b - bufp->buffer;
-
-#ifdef DEBUG
- if (debug)
- {
- DEBUG_PRINT1 ("\nCompiled pattern: ");
- print_compiled_pattern (bufp);
- }
-#endif /* DEBUG */
-
- return REG_NOERROR;
-} /* regex_compile */
-\f
-/* Subroutines for `regex_compile'. */
-
-/* Store OP at LOC followed by two-byte integer parameter ARG. */
-
-static void
-store_op1 (op, loc, arg)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
-{
- *loc = (unsigned char) op;
- STORE_NUMBER (loc + 1, arg);
-}
-
-
-/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
-
-static void
-store_op2 (op, loc, arg1, arg2)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
-{
- *loc = (unsigned char) op;
- STORE_NUMBER (loc + 1, arg1);
- STORE_NUMBER (loc + 3, arg2);
-}
-
-
-/* Copy the bytes from LOC to END to open up three bytes of space at LOC
- for OP followed by two-byte integer parameter ARG. */
-
-static void
-insert_op1 (op, loc, arg, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
- unsigned char *end;
-{
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 3;
-
- while (pfrom != loc)
- *--pto = *--pfrom;
-
- store_op1 (op, loc, arg);
-}
-
-
-/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
-
-static void
-insert_op2 (op, loc, arg1, arg2, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
- unsigned char *end;
-{
- register unsigned char *pfrom = end;
- register unsigned char *pto = end + 5;
-
- while (pfrom != loc)
- *--pto = *--pfrom;
-
- store_op2 (op, loc, arg1, arg2);
-}
-
-
-/* P points to just after a ^ in PATTERN. Return true if that ^ comes
- after an alternative or a begin-subexpression. We assume there is at
- least one character before the ^. */
-
-static boolean
-at_begline_loc_p (pattern, p, syntax)
- const char *pattern, *p;
- reg_syntax_t syntax;
-{
- const char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
-
- return
- /* After a subexpression? */
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
- /* After an alternative? */
- || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
-}
-
-
-/* The dual of at_begline_loc_p. This one is for $. We assume there is
- at least one character after the $, i.e., `P < PEND'. */
-
-static boolean
-at_endline_loc_p (p, pend, syntax)
- const char *p, *pend;
- int syntax;
-{
- const char *next = p;
- boolean next_backslash = *next == '\\';
- const char *next_next = p + 1 < pend ? p + 1 : NULL;
-
- return
- /* Before a subexpression? */
- (syntax & RE_NO_BK_PARENS ? *next == ')'
- : next_backslash && next_next && *next_next == ')')
- /* Before an alternative? */
- || (syntax & RE_NO_BK_VBAR ? *next == '|'
- : next_backslash && next_next && *next_next == '|');
-}
-
-
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
- false if it's not. */
-
-static boolean
-group_in_compile_stack (compile_stack, regnum)
- compile_stack_type compile_stack;
- regnum_t regnum;
-{
- int this_element;
-
- for (this_element = compile_stack.avail - 1;
- this_element >= 0;
- this_element--)
- if (compile_stack.stack[this_element].regnum == regnum)
- return true;
-
- return false;
-}
-
-
-/* Read the ending character of a range (in a bracket expression) from the
- uncompiled pattern *P_PTR (which ends at PEND). We assume the
- starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
- Then we set the translation of all bits between the starting and
- ending characters (inclusive) in the compiled pattern B.
-
- Return an error code.
-
- We use these short variable names so we can use the same macros as
- `regex_compile' itself. */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
- const char **p_ptr, *pend;
- char *translate;
- reg_syntax_t syntax;
- unsigned char *b;
-{
- unsigned this_char;
-
- const char *p = *p_ptr;
- int range_start, range_end;
-
- if (p == pend)
- return REG_ERANGE;
-
- /* Even though the pattern is a signed `char *', we need to fetch
- with unsigned char *'s; if the high bit of the pattern character
- is set, the range endpoints will be negative if we fetch using a
- signed char *.
-
- We also want to fetch the endpoints without translating them; the
- appropriate translation is done in the bit-setting loop below. */
- range_start = ((unsigned char *) p)[-2];
- range_end = ((unsigned char *) p)[0];
-
- /* Have to increment the pointer into the pattern string, so the
- caller isn't still at the ending character. */
- (*p_ptr)++;
-
- /* If the start is after the end, the range is empty. */
- if (range_start > range_end)
- return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
- /* Here we see why `this_char' has to be larger than an `unsigned
- char' -- the range is inclusive, so if `range_end' == 0xff
- (assuming 8-bit characters), we would otherwise go into an infinite
- loop, since all characters <= 0xff. */
- for (this_char = range_start; this_char <= range_end; this_char++)
- {
- SET_LIST_BIT (TRANSLATE (this_char));
- }
-
- return REG_NOERROR;
-}
-\f
-/* Failure stack declarations and macros; both re_compile_fastmap and
- re_match_2 use a failure stack. These have to be macros because of
- REGEX_ALLOCATE. */
-
-
-/* Number of failure points for which to initially allocate space
- when matching. If this number is exceeded, we allocate more
- space, so it is not a hard limit. */
-#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
-#endif
-
-/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_SPACE each time we failed.
- This is a variable only so users of regex can assign to it; we never
- change it ourselves. */
-int re_max_failures = 2000;
-
-typedef const unsigned char *fail_stack_elt_t;
-
-typedef struct
-{
- fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
-} fail_stack_type;
-
-#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
-#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
-#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
-#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
-
-
-/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
-
-#define INIT_FAIL_STACK() \
- do { \
- fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
- \
- if (fail_stack.stack == NULL) \
- return -2; \
- \
- fail_stack.size = INIT_FAILURE_ALLOC; \
- fail_stack.avail = 0; \
- } while (0)
-
-
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
-
- Return 1 if succeeds, and 0 if either ran out of memory
- allocating space for it or it was already too large.
-
- REGEX_REALLOCATE requires `destination' be declared. */
-
-#define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
- ? 0 \
- : ((fail_stack).stack = (fail_stack_elt_t *) \
- REGEX_REALLOCATE ((fail_stack).stack, \
- (fail_stack).size * sizeof (fail_stack_elt_t), \
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
- \
- (fail_stack).stack == NULL \
- ? 0 \
- : ((fail_stack).size <<= 1, \
- 1)))
-
-
-/* Push PATTERN_OP on FAIL_STACK.
-
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so. */
-#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
- ((FAIL_STACK_FULL () \
- && !DOUBLE_FAIL_STACK (fail_stack)) \
- ? 0 \
- : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
- 1))
-
-/* This pushes an item onto the failure stack. Must be a four-byte
- value. Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
-#define PUSH_FAILURE_ITEM(item) \
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
-
-/* The complement operation. Assumes `fail_stack' is nonempty. */
-#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
-
-/* Used to omit pushing failure point id's when we're not debugging. */
-#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_ITEM
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
-#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
-#endif
-
-
-/* Push the information about the state we will need
- if we ever fail back to it.
-
- Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
-
- Does `return FAILURE_CODE' if runs out of memory. */
-
-#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
- do { \
- char *destination; \
- /* Must be int, so when we don't save any registers, the arithmetic \
- of 0 + -1 isn't done as unsigned. */ \
- int this_reg; \
- \
- DEBUG_STATEMENT (failure_id++); \
- DEBUG_STATEMENT (nfailure_points_pushed++); \
- DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
- \
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
- DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
- \
- /* Ensure we have enough space allocated for what we will push. */ \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- { \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
- return failure_code; \
- \
- DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (fail_stack).size); \
- DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
- } \
- \
- /* Push the info, starting with the registers. */ \
- DEBUG_PRINT1 ("\n"); \
- \
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
- \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
- PUSH_FAILURE_ITEM (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
- PUSH_FAILURE_ITEM (regend[this_reg]); \
- \
- DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
- } \
- \
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
- PUSH_FAILURE_ITEM (lowest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
- PUSH_FAILURE_ITEM (highest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
- PUSH_FAILURE_ITEM (pattern_place); \
- \
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
- size2); \
- DEBUG_PRINT1 ("'\n"); \
- PUSH_FAILURE_ITEM (string_place); \
- \
- DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
- DEBUG_PUSH (failure_id); \
- } while (0)
-
-/* This is the number of items that are pushed and popped on the stack
- for each register. */
-#define NUM_REG_ITEMS 3
-
-/* Individual items aside from the registers. */
-#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
-#else
-#define NUM_NONREG_ITEMS 4
-#endif
-
-/* We push at most this many items on the stack. */
-#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
-
-/* We actually push this many items. */
-#define NUM_FAILURE_ITEMS \
- ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
- + NUM_NONREG_ITEMS)
-
-/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
-
-
-/* Pops what PUSH_FAIL_STACK pushes.
-
- We restore into the parameters, all of which should be lvalues:
- STR -- the saved data position.
- PAT -- the saved pattern position.
- LOW_REG, HIGH_REG -- the highest and lowest active registers.
- REGSTART, REGEND -- arrays of string positions.
- REG_INFO -- array of information about each subexpression.
-
- Also assumes the variables `fail_stack' and (if debugging), `bufp',
- `pend', `string1', `size1', `string2', and `size2'. */
-
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{ \
- DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
- int this_reg; \
- const unsigned char *string_temp; \
- \
- assert (!FAIL_STACK_EMPTY ()); \
- \
- /* Remove failure points and point to how many regs pushed. */ \
- DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
- DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
- \
- assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
- \
- DEBUG_POP (&failure_id); \
- DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
- \
- /* If the saved string location is NULL, it came from an \
- on_failure_keep_string_jump opcode, and we want to throw away the \
- saved NULL, thus retaining our current position in the string. */ \
- string_temp = POP_FAILURE_ITEM (); \
- if (string_temp != NULL) \
- str = (const char *) string_temp; \
- \
- DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
- DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
- DEBUG_PRINT1 ("'\n"); \
- \
- pat = (unsigned char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
- \
- /* Restore register info. */ \
- high_reg = (unsigned) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
- \
- low_reg = (unsigned) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
- \
- for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
- { \
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
- \
- reg_info[this_reg].word = POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
- \
- regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
- \
- regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
- } \
- \
- DEBUG_STATEMENT (nfailure_points_popped++); \
-} /* POP_FAILURE_POINT */
-\f
-/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
- BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
- characters can start a string that matches the pattern. This fastmap
- is used by re_search to skip quickly over impossible starting points.
-
- The caller must supply the address of a (1 << BYTEWIDTH)-byte data
- area as BUFP->fastmap.
-
- We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
- the pattern buffer.
-
- Returns 0 if we succeed, -2 if an internal error. */
-
-int
-re_compile_fastmap (bufp)
- struct re_pattern_buffer *bufp;
-{
- int j, k;
- fail_stack_type fail_stack;
-#ifndef REGEX_MALLOC
- char *destination;
-#endif
- /* We don't push any register information onto the failure stack. */
- unsigned num_regs = 0;
-
- register char *fastmap = bufp->fastmap;
- unsigned char *pattern = bufp->buffer;
- unsigned long size = bufp->used;
- const unsigned char *p = pattern;
- register unsigned char *pend = pattern + size;
-
- /* Assume that each path through the pattern can be null until
- proven otherwise. We set this false at the bottom of switch
- statement, to which we get only if a particular path doesn't
- match the empty string. */
- boolean path_can_be_null = true;
-
- /* We aren't doing a `succeed_n' to begin with. */
- boolean succeed_n_p = false;
-
- assert (fastmap != NULL && p != NULL);
-
- INIT_FAIL_STACK ();
- bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
- bufp->fastmap_accurate = 1; /* It will be when we're done. */
- bufp->can_be_null = 0;
-
- while (p != pend || !FAIL_STACK_EMPTY ())
- {
- if (p == pend)
- {
- bufp->can_be_null |= path_can_be_null;
-
- /* Reset for next path. */
- path_can_be_null = true;
-
- p = fail_stack.stack[--fail_stack.avail];
- }
-
- /* We should never be about to go beyond the end of the pattern. */
- assert (p < pend);
-
-#ifdef SWITCH_ENUM_BUG
- switch ((int) ((re_opcode_t) *p++))
-#else
- switch ((re_opcode_t) *p++)
-#endif
- {
-
- /* I guess the idea here is to simply not bother with a fastmap
- if a backreference is used, since it's too hard to figure out
- the fastmap for the corresponding group. Setting
- `can_be_null' stops `re_search_2' from using the fastmap, so
- that is all we do. */
- case duplicate:
- bufp->can_be_null = 1;
- return 0;
-
-
- /* Following are the cases which match a character. These end
- with `break'. */
-
- case exactn:
- fastmap[p[1]] = 1;
- break;
-
-
- case charset:
- for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
- fastmap[j] = 1;
- break;
-
-
- case charset_not:
- /* Chars beyond end of map must be allowed. */
- for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
-
- for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
- fastmap[j] = 1;
- break;
-
-
- case wordchar:
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) == Sword)
- fastmap[j] = 1;
- break;
-
-
- case notwordchar:
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) != Sword)
- fastmap[j] = 1;
- break;
-
-
- case anychar:
- /* `.' matches anything ... */
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
-
- /* ... except perhaps newline. */
- if (!(bufp->syntax & RE_DOT_NEWLINE))
- fastmap['\n'] = 0;
-
- /* Return if we have already set `can_be_null'; if we have,
- then the fastmap is irrelevant. Something's wrong here. */
- else if (bufp->can_be_null)
- return 0;
-
- /* Otherwise, have to check alternative paths. */
- break;
-
-
-#ifdef emacs
- case syntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) == (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-
-
- case notsyntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) != (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-
-
- /* All cases after this match the empty string. These end with
- `continue'. */
-
-
- case before_dot:
- case at_dot:
- case after_dot:
- continue;
-#endif /* not emacs */
-
-
- case no_op:
- case begline:
- case endline:
- case begbuf:
- case endbuf:
- case wordbound:
- case notwordbound:
- case wordbeg:
- case wordend:
- case push_dummy_failure:
- continue;
-
-
- case jump_n:
- case pop_failure_jump:
- case maybe_pop_jump:
- case jump:
- case jump_past_alt:
- case dummy_failure_jump:
- EXTRACT_NUMBER_AND_INCR (j, p);
- p += j;
- if (j > 0)
- continue;
-
- /* Jump backward implies we just went through the body of a
- loop and matched nothing. Opcode jumped to should be
- `on_failure_jump' or `succeed_n'. Just treat it like an
- ordinary jump. For a * loop, it has pushed its failure
- point already; if so, discard that as redundant. */
- if ((re_opcode_t) *p != on_failure_jump
- && (re_opcode_t) *p != succeed_n)
- continue;
-
- p++;
- EXTRACT_NUMBER_AND_INCR (j, p);
- p += j;
-
- /* If what's on the stack is where we are now, pop it. */
- if (!FAIL_STACK_EMPTY ()
- && fail_stack.stack[fail_stack.avail - 1] == p)
- fail_stack.avail--;
-
- continue;
-
-
- case on_failure_jump:
- case on_failure_keep_string_jump:
- handle_on_failure_jump:
- EXTRACT_NUMBER_AND_INCR (j, p);
-
- /* For some patterns, e.g., `(a?)?', `p+j' here points to the
- end of the pattern. We don't want to push such a point,
- since when we restore it above, entering the switch will
- increment `p' past the end of the pattern. We don't need
- to push such a point since we obviously won't find any more
- fastmap entries beyond `pend'. Such a pattern can match
- the null string, though. */
- if (p + j < pend)
- {
- if (!PUSH_PATTERN_OP (p + j, fail_stack))
- return -2;
- }
- else
- bufp->can_be_null = 1;
-
- if (succeed_n_p)
- {
- EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
- succeed_n_p = false;
- }
-
- continue;
-
-
- case succeed_n:
- /* Get to the number of times to succeed. */
- p += 2;
-
- /* Increment p past the n for when k != 0. */
- EXTRACT_NUMBER_AND_INCR (k, p);
- if (k == 0)
- {
- p -= 4;
- succeed_n_p = true; /* Spaghetti code alert. */
- goto handle_on_failure_jump;
- }
- continue;
-
-
- case set_number_at:
- p += 4;
- continue;
-
-
- case start_memory:
- case stop_memory:
- p += 2;
- continue;
-
-
- default:
- abort (); /* We have listed all the cases. */
- } /* switch *p++ */
-
- /* Getting here means we have found the possible starting
- characters for one path of the pattern -- and that the empty
- string does not match. We need not follow this path further.
- Instead, look at the next alternative (remembered on the
- stack), or quit if no more. The test at the top of the loop
- does these things. */
- path_can_be_null = false;
- p = pend;
- } /* while p */
-
- /* Set `can_be_null' for the last path (also the first path, if the
- pattern is empty). */
- bufp->can_be_null |= path_can_be_null;
- return 0;
-} /* re_compile_fastmap */
-\f
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
- this memory for recording register information. STARTS and ENDS
- must be allocated using the malloc library routine, and must each
- be at least NUM_REGS * sizeof (regoff_t) bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-
-void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
-{
- if (num_regs)
- {
- bufp->regs_allocated = REGS_REALLOCATE;
- regs->num_regs = num_regs;
- regs->start = starts;
- regs->end = ends;
- }
- else
- {
- bufp->regs_allocated = REGS_UNALLOCATED;
- regs->num_regs = 0;
- regs->start = regs->end = (regoff_t) 0;
- }
-}
-\f
-/* Searching routines. */
-
-/* Like re_search_2, below, but only one string is specified, and
- doesn't let you say where to stop matching. */
-
-int
-re_search (bufp, string, size, startpos, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, startpos, range;
- struct re_registers *regs;
-{
- return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
- regs, size);
-}
-
-
-/* Using the compiled pattern in BUFP->buffer, first tries to match the
- virtual concatenation of STRING1 and STRING2, starting first at index
- STARTPOS, then at STARTPOS + 1, and so on.
-
- STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
-
- RANGE is how far to scan while trying to match. RANGE = 0 means try
- only at STARTPOS; in general, the last start tried is STARTPOS +
- RANGE.
-
- In REGS, return the indices of the virtual concatenation of STRING1
- and STRING2 that matched the entire BUFP->buffer and its contained
- subexpressions.
-
- Do not consider matching one past the index STOP in the virtual
- concatenation of STRING1 and STRING2.
-
- We return either the position in the strings at which the match was
- found, -1 if no match, or -2 if error (such as failure
- stack overflow). */
-
-int
-re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int size1, size2;
- int startpos;
- int range;
- struct re_registers *regs;
- int stop;
-{
- int val;
- register char *fastmap = bufp->fastmap;
- register char *translate = bufp->translate;
- int total_size = size1 + size2;
- int endpos = startpos + range;
-
- /* Check for out-of-range STARTPOS. */
- if (startpos < 0 || startpos > total_size)
- return -1;
-
- /* Fix up RANGE if it might eventually take us outside
- the virtual concatenation of STRING1 and STRING2. */
- if (endpos < -1)
- range = -1 - startpos;
- else if (endpos > total_size)
- range = total_size - startpos;
-
- /* If the search isn't to be a backwards one, don't waste time in a
- search for a pattern that must be anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
- {
- if (startpos > 0)
- return -1;
- else
- range = 1;
- }
-
- /* Update the fastmap now if not correct already. */
- if (fastmap && !bufp->fastmap_accurate)
- if (re_compile_fastmap (bufp) == -2)
- return -2;
-
- /* Loop through the string, looking for a place to start matching. */
- for (;;)
- {
- /* If a fastmap is supplied, skip quickly over characters that
- cannot be the start of a match. If the pattern can match the
- null string, however, we don't need to skip characters; we want
- the first null string. */
- if (fastmap && startpos < total_size && !bufp->can_be_null)
- {
- if (range > 0) /* Searching forwards. */
- {
- register const char *d;
- register int lim = 0;
- int irange = range;
-
- if (startpos < size1 && startpos + range >= size1)
- lim = range - (size1 - startpos);
-
- d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
-
- /* Written out as an if-else to avoid testing `translate'
- inside the loop. */
- if (translate)
- while (range > lim
- && !fastmap[(unsigned char)
- translate[(unsigned char) *d++]])
- range--;
- else
- while (range > lim && !fastmap[(unsigned char) *d++])
- range--;
-
- startpos += irange - range;
- }
- else /* Searching backwards. */
- {
- register char c = (size1 == 0 || startpos >= size1
- ? string2[startpos - size1]
- : string1[startpos]);
-
- if (!fastmap[(unsigned char) TRANSLATE (c)])
- goto advance;
- }
- }
-
- /* If can't match the null string, and that's all we have left, fail. */
- if (range >= 0 && startpos == total_size && fastmap
- && !bufp->can_be_null)
- return -1;
-
- val = re_match_2 (bufp, string1, size1, string2, size2,
- startpos, regs, stop);
- if (val >= 0)
- return startpos;
-
- if (val == -2)
- return -2;
-
- advance:
- if (!range)
- break;
- else if (range > 0)
- {
- range--;
- startpos++;
- }
- else
- {
- range++;
- startpos--;
- }
- }
- return -1;
-} /* re_search_2 */
-\f
-/* Declarations and macros for re_match_2. */
-
-static int bcmp_translate ();
-static boolean alt_match_null_string_p (),
- common_op_match_null_string_p (),
- group_match_null_string_p ();
-
-/* Structure for per-register (a.k.a. per-group) information.
- This must not be longer than one word, because we push this value
- onto the failure stack. Other register information, such as the
- starting and ending positions (which are addresses), and the list of
- inner groups (which is a bits list) are maintained in separate
- variables.
-
- We are making a (strictly speaking) nonportable assumption here: that
- the compiler will pack our bit fields into something that fits into
- the type of `word', i.e., is something that fits into one item on the
- failure stack. */
-typedef union
-{
- fail_stack_elt_t word;
- struct
- {
- /* This field is one if this group can match the empty string,
- zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
-#define MATCH_NULL_UNSET_VALUE 3
- unsigned match_null_string_p : 2;
- unsigned is_active : 1;
- unsigned matched_something : 1;
- unsigned ever_matched_something : 1;
- } bits;
-} register_info_type;
-
-#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
-#define IS_ACTIVE(R) ((R).bits.is_active)
-#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
-#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
-
-
-/* Call this when have matched a real character; it sets `matched' flags
- for the subexpressions which we are currently inside. Also records
- that those subexprs have matched. */
-#define SET_REGS_MATCHED() \
- do \
- { \
- unsigned r; \
- for (r = lowest_active_reg; r <= highest_active_reg; r++) \
- { \
- MATCHED_SOMETHING (reg_info[r]) \
- = EVER_MATCHED_SOMETHING (reg_info[r]) \
- = 1; \
- } \
- } \
- while (0)
-
-
-/* This converts PTR, a pointer into one of the search strings `string1'
- and `string2' into an offset from the beginning of that string. */
-#define POINTER_TO_OFFSET(ptr) \
- (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
-
-/* Registers are set to a sentinel when they haven't yet matched. */
-#define REG_UNSET_VALUE ((char *) -1)
-#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
-
-
-/* Macros for dealing with the split strings in re_match_2. */
-
-#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
-
-/* Call before fetching a character with *d. This switches over to
- string2 if necessary. */
-#define PREFETCH() \
- while (d == dend) \
- { \
- /* End of string2 => fail. */ \
- if (dend == end_match_2) \
- goto fail; \
- /* End of string1 => advance to string2. */ \
- d = string2; \
- dend = end_match_2; \
- }
-
-
-/* Test if at very beginning or at very end of the virtual concatenation
- of `string1' and `string2'. If only one string, it's `string2'. */
-#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
-#define AT_STRINGS_END(d) ((d) == end2)
-
-
-/* Test if D points to a character which is word-constituent. We have
- two special cases to check for: if past the end of string1, look at
- the first character in string2; and if before the beginning of
- string2, look at the last character in string1. */
-#define WORDCHAR_P(d) \
- (SYNTAX ((d) == end1 ? *string2 \
- : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
- == Sword)
-
-/* Test if the character before D and the one at D differ with respect
- to being word-constituent. */
-#define AT_WORD_BOUNDARY(d) \
- (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
- || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
-
-
-/* Free everything we malloc. */
-#ifdef REGEX_MALLOC
-#define FREE_VAR(var) if (var) free (var); var = NULL
-#define FREE_VARIABLES() \
- do { \
- FREE_VAR (fail_stack.stack); \
- FREE_VAR (regstart); \
- FREE_VAR (regend); \
- FREE_VAR (old_regstart); \
- FREE_VAR (old_regend); \
- FREE_VAR (best_regstart); \
- FREE_VAR (best_regend); \
- FREE_VAR (reg_info); \
- FREE_VAR (reg_dummy); \
- FREE_VAR (reg_info_dummy); \
- } while (0)
-#else /* not REGEX_MALLOC */
-/* Some MIPS systems (at least) want this to free alloca'd storage. */
-#define FREE_VARIABLES() alloca (0)
-#endif /* not REGEX_MALLOC */
-
-
-/* These values must meet several constraints. They must not be valid
- register values; since we have a limit of 255 registers (because
- we use only one byte in the pattern for the register number), we can
- use numbers larger than 255. They must differ by 1, because of
- NUM_FAILURE_ITEMS above. And the value for the lowest register must
- be larger than the value for the highest register, so we do not try
- to actually save any registers when none are active. */
-#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
-#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
-\f
-/* Matching routines. */
-
-#ifndef emacs /* Emacs never uses this. */
-/* re_match is like re_match_2 except it takes only a single string. */
-
-int
-re_match (bufp, string, size, pos, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, pos;
- struct re_registers *regs;
- {
- return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
-}
-#endif /* not emacs */
-
-
-/* re_match_2 matches the compiled pattern in BUFP against the
- the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
- and SIZE2, respectively). We start matching at POS, and stop
- matching at STOP.
-
- If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
- store offsets for the substring each group matched in REGS. See the
- documentation for exactly how many groups we fill.
-
- We return -1 if no match, -2 if an internal error (such as the
- failure stack overflowing). Otherwise, we return the length of the
- matched substring. */
-
-int
-re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int size1, size2;
- int pos;
- struct re_registers *regs;
- int stop;
-{
- /* General temporaries. */
- int mcnt;
- unsigned char *p1;
-
- /* Just past the end of the corresponding string. */
- const char *end1, *end2;
-
- /* Pointers into string1 and string2, just past the last characters in
- each to consider matching. */
- const char *end_match_1, *end_match_2;
-
- /* Where we are in the data, and the end of the current string. */
- const char *d, *dend;
-
- /* Where we are in the pattern, and the end of the pattern. */
- unsigned char *p = bufp->buffer;
- register unsigned char *pend = p + bufp->used;
-
- /* We use this to map every character in the string. */
- char *translate = bufp->translate;
-
- /* Failure point stack. Each place that can handle a failure further
- down the line pushes a failure point on this stack. It consists of
- restart, regend, and reg_info for all registers corresponding to
- the subexpressions we're currently inside, plus the number of such
- registers, and, finally, two char *'s. The first char * is where
- to resume scanning the pattern; the second one is where to resume
- scanning the strings. If the latter is zero, the failure point is
- a ``dummy''; if a failure happens and the failure point is a dummy,
- it gets discarded and the next next one is tried. */
- fail_stack_type fail_stack;
-#ifdef DEBUG
- static unsigned failure_id = 0;
- unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
-#endif
-
- /* We fill all the registers internally, independent of what we
- return, for use in backreferences. The number here includes
- an element for register zero. */
- unsigned num_regs = bufp->re_nsub + 1;
-
- /* The currently active registers. */
- unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
-
- /* Information on the contents of registers. These are pointers into
- the input strings; they record just what was matched (on this
- attempt) by a subexpression part of the pattern, that is, the
- regnum-th regstart pointer points to where in the pattern we began
- matching and the regnum-th regend points to right after where we
- stopped matching the regnum-th subexpression. (The zeroth register
- keeps track of what the whole pattern matches.) */
- const char **regstart = NULL, **regend = NULL;
-
- /* If a group that's operated upon by a repetition operator fails to
- match anything, then the register for its start will need to be
- restored because it will have been set to wherever in the string we
- are when we last see its open-group operator. Similarly for a
- register's end. */
- const char **old_regstart = NULL, **old_regend = NULL;
-
- /* The is_active field of reg_info helps us keep track of which (possibly
- nested) subexpressions we are currently in. The matched_something
- field of reg_info[reg_num] helps us tell whether or not we have
- matched any of the pattern so far this time through the reg_num-th
- subexpression. These two fields get reset each time through any
- loop their register is in. */
- register_info_type *reg_info = NULL;
-
- /* The following record the register info as found in the above
- variables when we find a match better than any we've seen before.
- This happens as we backtrack through the failure points, which in
- turn happens only if we have not yet matched the entire string. */
- unsigned best_regs_set = false;
- const char **best_regstart = NULL, **best_regend = NULL;
-
- /* Logically, this is `best_regend[0]'. But we don't want to have to
- allocate space for that if we're not allocating space for anything
- else (see below). Also, we never need info about register 0 for
- any of the other register vectors, and it seems rather a kludge to
- treat `best_regend' differently than the rest. So we keep track of
- the end of the best match so far in a separate variable. We
- initialize this to NULL so that when we backtrack the first time
- and need to test it, it's not garbage. */
- const char *match_end = NULL;
-
- /* Used when we pop values we don't care about. */
- const char **reg_dummy = NULL;
- register_info_type *reg_info_dummy = NULL;
-
-#ifdef DEBUG
- /* Counts the total number of registers pushed. */
- unsigned num_regs_pushed = 0;
-#endif
-
- DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
-
- INIT_FAIL_STACK ();
-
- /* Do not bother to initialize all the register variables if there are
- no groups in the pattern, as it takes a fair amount of time. If
- there are groups, we include space for register 0 (the whole
- pattern), even though we never use it, since it simplifies the
- array indexing. We should fix this. */
- if (bufp->re_nsub)
- {
- regstart = REGEX_TALLOC (num_regs, const char *);
- regend = REGEX_TALLOC (num_regs, const char *);
- old_regstart = REGEX_TALLOC (num_regs, const char *);
- old_regend = REGEX_TALLOC (num_regs, const char *);
- best_regstart = REGEX_TALLOC (num_regs, const char *);
- best_regend = REGEX_TALLOC (num_regs, const char *);
- reg_info = REGEX_TALLOC (num_regs, register_info_type);
- reg_dummy = REGEX_TALLOC (num_regs, const char *);
- reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
-
- if (!(regstart && regend && old_regstart && old_regend && reg_info
- && best_regstart && best_regend && reg_dummy && reg_info_dummy))
- {
- FREE_VARIABLES ();
- return -2;
- }
- }
-#ifdef REGEX_MALLOC
- else
- {
- /* We must initialize all our variables to NULL, so that
- `FREE_VARIABLES' doesn't try to free them. */
- regstart = regend = old_regstart = old_regend = best_regstart
- = best_regend = reg_dummy = NULL;
- reg_info = reg_info_dummy = (register_info_type *) NULL;
- }
-#endif /* REGEX_MALLOC */
-
- /* The starting position is bogus. */
- if (pos < 0 || pos > size1 + size2)
- {
- FREE_VARIABLES ();
- return -1;
- }
-
- /* Initialize subexpression text positions to -1 to mark ones that no
- start_memory/stop_memory has been seen for. Also initialize the
- register information struct. */
- for (mcnt = 1; mcnt < num_regs; mcnt++)
- {
- regstart[mcnt] = regend[mcnt]
- = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
-
- REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
- IS_ACTIVE (reg_info[mcnt]) = 0;
- MATCHED_SOMETHING (reg_info[mcnt]) = 0;
- EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
- }
-
- /* We move `string1' into `string2' if the latter's empty -- but not if
- `string1' is null. */
- if (size2 == 0 && string1 != NULL)
- {
- string2 = string1;
- size2 = size1;
- string1 = 0;
- size1 = 0;
- }
- end1 = string1 + size1;
- end2 = string2 + size2;
-
- /* Compute where to stop matching, within the two strings. */
- if (stop <= size1)
- {
- end_match_1 = string1 + stop;
- end_match_2 = string2;
- }
- else
- {
- end_match_1 = end1;
- end_match_2 = string2 + stop - size1;
- }
-
- /* `p' scans through the pattern as `d' scans through the data.
- `dend' is the end of the input string that `d' points within. `d'
- is advanced into the following input string whenever necessary, but
- this happens before fetching; therefore, at the beginning of the
- loop, `d' can be pointing at the end of a string, but it cannot
- equal `string2'. */
- if (size1 > 0 && pos <= size1)
- {
- d = string1 + pos;
- dend = end_match_1;
- }
- else
- {
- d = string2 + pos - size1;
- dend = end_match_2;
- }
-
- DEBUG_PRINT1 ("The compiled pattern is: ");
- DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
- DEBUG_PRINT1 ("The string to match is: `");
- DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
- DEBUG_PRINT1 ("'\n");
-
- /* This loops over pattern commands. It exits by returning from the
- function if the match is complete, or it drops through if the match
- fails at this starting point in the input data. */
- for (;;)
- {
- DEBUG_PRINT2 ("\n0x%x: ", p);
-
- if (p == pend)
- { /* End of pattern means we might have succeeded. */
- DEBUG_PRINT1 ("end of pattern ... ");
-
- /* If we haven't matched the entire string, and we want the
- longest match, try backtracking. */
- if (d != end_match_2)
- {
- DEBUG_PRINT1 ("backtracking.\n");
-
- if (!FAIL_STACK_EMPTY ())
- { /* More failure points to try. */
- boolean same_str_p = (FIRST_STRING_P (match_end)
- == MATCHING_IN_FIRST_STRING);
-
- /* If exceeds best match so far, save it. */
- if (!best_regs_set
- || (same_str_p && d > match_end)
- || (!same_str_p && !MATCHING_IN_FIRST_STRING))
- {
- best_regs_set = true;
- match_end = d;
-
- DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
-
- for (mcnt = 1; mcnt < num_regs; mcnt++)
- {
- best_regstart[mcnt] = regstart[mcnt];
- best_regend[mcnt] = regend[mcnt];
- }
- }
- goto fail;
- }
-
- /* If no failure points, don't restore garbage. */
- else if (best_regs_set)
- {
- restore_best_regs:
- /* Restore best match. It may happen that `dend ==
- end_match_1' while the restored d is in string2.
- For example, the pattern `x.*y.*z' against the
- strings `x-' and `y-z-', if the two strings are
- not consecutive in memory. */
- DEBUG_PRINT1 ("Restoring best registers.\n");
-
- d = match_end;
- dend = ((d >= string1 && d <= end1)
- ? end_match_1 : end_match_2);
-
- for (mcnt = 1; mcnt < num_regs; mcnt++)
- {
- regstart[mcnt] = best_regstart[mcnt];
- regend[mcnt] = best_regend[mcnt];
- }
- }
- } /* d != end_match_2 */
-
- DEBUG_PRINT1 ("Accepting match.\n");
-
- /* If caller wants register contents data back, do it. */
- if (regs && !bufp->no_sub)
- {
- /* Have the register data arrays been allocated? */
- if (bufp->regs_allocated == REGS_UNALLOCATED)
- { /* No. So allocate them with malloc. We need one
- extra element beyond `num_regs' for the `-1' marker
- GNU code uses. */
- regs->num_regs = MAX (RE_NREGS, num_regs + 1);
- regs->start = TALLOC (regs->num_regs, regoff_t);
- regs->end = TALLOC (regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- return -2;
- bufp->regs_allocated = REGS_REALLOCATE;
- }
- else if (bufp->regs_allocated == REGS_REALLOCATE)
- { /* Yes. If we need more elements than were already
- allocated, reallocate them. If we need fewer, just
- leave it alone. */
- if (regs->num_regs < num_regs + 1)
- {
- regs->num_regs = num_regs + 1;
- RETALLOC (regs->start, regs->num_regs, regoff_t);
- RETALLOC (regs->end, regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- return -2;
- }
- }
- else
- assert (bufp->regs_allocated == REGS_FIXED);
-
- /* Convert the pointer data in `regstart' and `regend' to
- indices. Register zero has to be set differently,
- since we haven't kept track of any info for it. */
- if (regs->num_regs > 0)
- {
- regs->start[0] = pos;
- regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
- : d - string2 + size1);
- }
-
- /* Go through the first `min (num_regs, regs->num_regs)'
- registers, since that is all we initialized. */
- for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
- {
- if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
- regs->start[mcnt] = regs->end[mcnt] = -1;
- else
- {
- regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
- regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
- }
- }
-
- /* If the regs structure we return has more elements than
- were in the pattern, set the extra elements to -1. If
- we (re)allocated the registers, this is the case,
- because we always allocate enough to have at least one
- -1 at the end. */
- for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
- regs->start[mcnt] = regs->end[mcnt] = -1;
- } /* regs && !bufp->no_sub */
-
- FREE_VARIABLES ();
- DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
- nfailure_points_pushed, nfailure_points_popped,
- nfailure_points_pushed - nfailure_points_popped);
- DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
-
- mcnt = d - pos - (MATCHING_IN_FIRST_STRING
- ? string1
- : string2 - size1);
-
- DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
-
- return mcnt;
- }
-
- /* Otherwise match next pattern command. */
-#ifdef SWITCH_ENUM_BUG
- switch ((int) ((re_opcode_t) *p++))
-#else
- switch ((re_opcode_t) *p++)
-#endif
- {
- /* Ignore these. Used to ignore the n of succeed_n's which
- currently have n == 0. */
- case no_op:
- DEBUG_PRINT1 ("EXECUTING no_op.\n");
- break;
-
-
- /* Match the next n pattern characters exactly. The following
- byte in the pattern defines n, and the n bytes after that
- are the characters to match. */
- case exactn:
- mcnt = *p++;
- DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
-
- /* This is written out as an if-else so we don't waste time
- testing `translate' inside the loop. */
- if (translate)
- {
- do
- {
- PREFETCH ();
- if (translate[(unsigned char) *d++] != (char) *p++)
- goto fail;
- }
- while (--mcnt);
- }
- else
- {
- do
- {
- PREFETCH ();
- if (*d++ != (char) *p++) goto fail;
- }
- while (--mcnt);
- }
- SET_REGS_MATCHED ();
- break;
-
-
- /* Match any character except possibly a newline or a null. */
- case anychar:
- DEBUG_PRINT1 ("EXECUTING anychar.\n");
-
- PREFETCH ();
-
- if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
- || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
- goto fail;
-
- SET_REGS_MATCHED ();
- DEBUG_PRINT2 (" Matched `%d'.\n", *d);
- d++;
- break;
-
-
- case charset:
- case charset_not:
- {
- register unsigned char c;
- boolean not = (re_opcode_t) *(p - 1) == charset_not;
-
- DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
-
- PREFETCH ();
- c = TRANSLATE (*d); /* The character to match. */
-
- /* Cast to `unsigned' instead of `unsigned char' in case the
- bit list is a full 32 bytes long. */
- if (c < (unsigned) (*p * BYTEWIDTH)
- && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
-
- p += 1 + *p;
-
- if (!not) goto fail;
-
- SET_REGS_MATCHED ();
- d++;
- break;
- }
-
-
- /* The beginning of a group is represented by start_memory.
- The arguments are the register number in the next byte, and the
- number of groups inner to this one in the next. The text
- matched within the group is recorded (in the internal
- registers data structure) under the register number. */
- case start_memory:
- DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
-
- /* Find out if this group can match the empty string. */
- p1 = p; /* To send to group_match_null_string_p. */
-
- if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
- REG_MATCH_NULL_STRING_P (reg_info[*p])
- = group_match_null_string_p (&p1, pend, reg_info);
-
- /* Save the position in the string where we were the last time
- we were at this open-group operator in case the group is
- operated upon by a repetition operator, e.g., with `(a*)*b'
- against `ab'; then we want to ignore where we are now in
- the string in case this attempt to match fails. */
- old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
- ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
- : regstart[*p];
- DEBUG_PRINT2 (" old_regstart: %d\n",
- POINTER_TO_OFFSET (old_regstart[*p]));
-
- regstart[*p] = d;
- DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
-
- IS_ACTIVE (reg_info[*p]) = 1;
- MATCHED_SOMETHING (reg_info[*p]) = 0;
-
- /* This is the new highest active register. */
- highest_active_reg = *p;
-
- /* If nothing was active before, this is the new lowest active
- register. */
- if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
- lowest_active_reg = *p;
-
- /* Move past the register number and inner group count. */
- p += 2;
- break;
-
-
- /* The stop_memory opcode represents the end of a group. Its
- arguments are the same as start_memory's: the register
- number, and the number of inner groups. */
- case stop_memory:
- DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
-
- /* We need to save the string position the last time we were at
- this close-group operator in case the group is operated
- upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
- against `aba'; then we want to ignore where we are now in
- the string in case this attempt to match fails. */
- old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
- ? REG_UNSET (regend[*p]) ? d : regend[*p]
- : regend[*p];
- DEBUG_PRINT2 (" old_regend: %d\n",
- POINTER_TO_OFFSET (old_regend[*p]));
-
- regend[*p] = d;
- DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
-
- /* This register isn't active anymore. */
- IS_ACTIVE (reg_info[*p]) = 0;
-
- /* If this was the only register active, nothing is active
- anymore. */
- if (lowest_active_reg == highest_active_reg)
- {
- lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- highest_active_reg = NO_HIGHEST_ACTIVE_REG;
- }
- else
- { /* We must scan for the new highest active register, since
- it isn't necessarily one less than now: consider
- (a(b)c(d(e)f)g). When group 3 ends, after the f), the
- new highest active register is 1. */
- unsigned char r = *p - 1;
- while (r > 0 && !IS_ACTIVE (reg_info[r]))
- r--;
-
- /* If we end up at register zero, that means that we saved
- the registers as the result of an `on_failure_jump', not
- a `start_memory', and we jumped to past the innermost
- `stop_memory'. For example, in ((.)*) we save
- registers 1 and 2 as a result of the *, but when we pop
- back to the second ), we are at the stop_memory 1.
- Thus, nothing is active. */
- if (r == 0)
- {
- lowest_active_reg = NO_LOWEST_ACTIVE_REG;
- highest_active_reg = NO_HIGHEST_ACTIVE_REG;
- }
- else
- highest_active_reg = r;
- }
-
- /* If just failed to match something this time around with a
- group that's operated on by a repetition operator, try to
- force exit from the ``loop'', and restore the register
- information for this group that we had before trying this
- last match. */
- if ((!MATCHED_SOMETHING (reg_info[*p])
- || (re_opcode_t) p[-3] == start_memory)
- && (p + 2) < pend)
- {
- boolean is_a_jump_n = false;
-
- p1 = p + 2;
- mcnt = 0;
- switch ((re_opcode_t) *p1++)
- {
- case jump_n:
- is_a_jump_n = true;
- case pop_failure_jump:
- case maybe_pop_jump:
- case jump:
- case dummy_failure_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- if (is_a_jump_n)
- p1 += 2;
- break;
-
- default:
- /* do nothing */ ;
- }
- p1 += mcnt;
-
- /* If the next operation is a jump backwards in the pattern
- to an on_failure_jump right before the start_memory
- corresponding to this stop_memory, exit from the loop
- by forcing a failure after pushing on the stack the
- on_failure_jump's jump in the pattern, and d. */
- if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
- && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
- {
- /* If this group ever matched anything, then restore
- what its registers were before trying this last
- failed match, e.g., with `(a*)*b' against `ab' for
- regstart[1], and, e.g., with `((a*)*(b*)*)*'
- against `aba' for regend[3].
-
- Also restore the registers for inner groups for,
- e.g., `((a*)(b*))*' against `aba' (register 3 would
- otherwise get trashed). */
-
- if (EVER_MATCHED_SOMETHING (reg_info[*p]))
- {
- unsigned r;
-
- EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
-
- /* Restore this and inner groups' (if any) registers. */
- for (r = *p; r < *p + *(p + 1); r++)
- {
- regstart[r] = old_regstart[r];
-
- /* xx why this test? */
- if ((int) old_regend[r] >= (int) regstart[r])
- regend[r] = old_regend[r];
- }
- }
- p1++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
-
- goto fail;
- }
- }
-
- /* Move past the register number and the inner group count. */
- p += 2;
- break;
-
-
- /* \<digit> has been turned into a `duplicate' command which is
- followed by the numeric value of <digit> as the register number. */
- case duplicate:
- {
- register const char *d2, *dend2;
- int regno = *p++; /* Get which register to match against. */
- DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
-
- /* Can't back reference a group which we've never matched. */
- if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
- goto fail;
-
- /* Where in input to try to start matching. */
- d2 = regstart[regno];
-
- /* Where to stop matching; if both the place to start and
- the place to stop matching are in the same string, then
- set to the place to stop, otherwise, for now have to use
- the end of the first string. */
-
- dend2 = ((FIRST_STRING_P (regstart[regno])
- == FIRST_STRING_P (regend[regno]))
- ? regend[regno] : end_match_1);
- for (;;)
- {
- /* If necessary, advance to next segment in register
- contents. */
- while (d2 == dend2)
- {
- if (dend2 == end_match_2) break;
- if (dend2 == regend[regno]) break;
-
- /* End of string1 => advance to string2. */
- d2 = string2;
- dend2 = regend[regno];
- }
- /* At end of register contents => success */
- if (d2 == dend2) break;
-
- /* If necessary, advance to next segment in data. */
- PREFETCH ();
-
- /* How many characters left in this segment to match. */
- mcnt = dend - d;
-
- /* Want how many consecutive characters we can match in
- one shot, so, if necessary, adjust the count. */
- if (mcnt > dend2 - d2)
- mcnt = dend2 - d2;
-
- /* Compare that many; failure if mismatch, else move
- past them. */
- if (translate
- ? bcmp_translate (d, d2, mcnt, translate)
- : bcmp (d, d2, mcnt))
- goto fail;
- d += mcnt, d2 += mcnt;
- }
- }
- break;
-
-
- /* begline matches the empty string at the beginning of the string
- (unless `not_bol' is set in `bufp'), and, if
- `newline_anchor' is set, after newlines. */
- case begline:
- DEBUG_PRINT1 ("EXECUTING begline.\n");
-
- if (AT_STRINGS_BEG (d))
- {
- if (!bufp->not_bol) break;
- }
- else if (d[-1] == '\n' && bufp->newline_anchor)
- {
- break;
- }
- /* In all other cases, we fail. */
- goto fail;
-
-
- /* endline is the dual of begline. */
- case endline:
- DEBUG_PRINT1 ("EXECUTING endline.\n");
-
- if (AT_STRINGS_END (d))
- {
- if (!bufp->not_eol) break;
- }
-
- /* We have to ``prefetch'' the next character. */
- else if ((d == end1 ? *string2 : *d) == '\n'
- && bufp->newline_anchor)
- {
- break;
- }
- goto fail;
-
-
- /* Match at the very beginning of the data. */
- case begbuf:
- DEBUG_PRINT1 ("EXECUTING begbuf.\n");
- if (AT_STRINGS_BEG (d))
- break;
- goto fail;
-
-
- /* Match at the very end of the data. */
- case endbuf:
- DEBUG_PRINT1 ("EXECUTING endbuf.\n");
- if (AT_STRINGS_END (d))
- break;
- goto fail;
-
-
- /* on_failure_keep_string_jump is used to optimize `.*\n'. It
- pushes NULL as the value for the string on the stack. Then
- `pop_failure_point' will keep the current value for the
- string, instead of restoring it. To see why, consider
- matching `foo\nbar' against `.*\n'. The .* matches the foo;
- then the . fails against the \n. But the next thing we want
- to do is match the \n against the \n; if we restored the
- string value, we would be back at the foo.
-
- Because this is used only in specific cases, we don't need to
- check all the things that `on_failure_jump' does, to make
- sure the right things get saved on the stack. Hence we don't
- share its code. The only reason to push anything on the
- stack at all is that otherwise we would have to change
- `anychar's code to do something besides goto fail in this
- case; that seems worse than this. */
- case on_failure_keep_string_jump:
- DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
-
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
-
- PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
- break;
-
-
- /* Uses of on_failure_jump:
-
- Each alternative starts with an on_failure_jump that points
- to the beginning of the next alternative. Each alternative
- except the last ends with a jump that in effect jumps past
- the rest of the alternatives. (They really jump to the
- ending jump of the following alternative, because tensioning
- these jumps is a hassle.)
-
- Repeats start with an on_failure_jump that points past both
- the repetition text and either the following jump or
- pop_failure_jump back to this on_failure_jump. */
- case on_failure_jump:
- on_failure:
- DEBUG_PRINT1 ("EXECUTING on_failure_jump");
-
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
-
- /* If this on_failure_jump comes right before a group (i.e.,
- the original * applied to a group), save the information
- for that group and all inner ones, so that if we fail back
- to this point, the group's information will be correct.
- For example, in \(a*\)*\1, we need the preceding group,
- and in \(\(a*\)b*\)\2, we need the inner group. */
-
- /* We can't use `p' to check ahead because we push
- a failure point to `p + mcnt' after we do this. */
- p1 = p;
-
- /* We need to skip no_op's before we look for the
- start_memory in case this on_failure_jump is happening as
- the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
- against aba. */
- while (p1 < pend && (re_opcode_t) *p1 == no_op)
- p1++;
-
- if (p1 < pend && (re_opcode_t) *p1 == start_memory)
- {
- /* We have a new highest active register now. This will
- get reset at the start_memory we are about to get to,
- but we will have saved all the registers relevant to
- this repetition op, as described above. */
- highest_active_reg = *(p1 + 1) + *(p1 + 2);
- if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
- lowest_active_reg = *(p1 + 1);
- }
-
- DEBUG_PRINT1 (":\n");
- PUSH_FAILURE_POINT (p + mcnt, d, -2);
- break;
-
-
- /* A smart repeat ends with `maybe_pop_jump'.
- We change it to either `pop_failure_jump' or `jump'. */
- case maybe_pop_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
- {
- register unsigned char *p2 = p;
-
- /* Compare the beginning of the repeat with what in the
- pattern follows its end. If we can establish that there
- is nothing that they would both match, i.e., that we
- would have to backtrack because of (as in, e.g., `a*a')
- then we can change to pop_failure_jump, because we'll
- never have to backtrack.
-
- This is not true in the case of alternatives: in
- `(a|ab)*' we do need to backtrack to the `ab' alternative
- (e.g., if the string was `ab'). But instead of trying to
- detect that here, the alternative has put on a dummy
- failure point which is what we will end up popping. */
-
- /* Skip over open/close-group commands. */
- while (p2 + 2 < pend
- && ((re_opcode_t) *p2 == stop_memory
- || (re_opcode_t) *p2 == start_memory))
- p2 += 3; /* Skip over args, too. */
-
- /* If we're at the end of the pattern, we can change. */
- if (p2 == pend)
- {
- /* Consider what happens when matching ":\(.*\)"
- against ":/". I don't really understand this code
- yet. */
- p[-3] = (unsigned char) pop_failure_jump;
- DEBUG_PRINT1
- (" End of pattern: change to `pop_failure_jump'.\n");
- }
-
- else if ((re_opcode_t) *p2 == exactn
- || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
- {
- register unsigned char c
- = *p2 == (unsigned char) endline ? '\n' : p2[2];
- p1 = p + mcnt;
-
- /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
- to the `maybe_finalize_jump' of this case. Examine what
- follows. */
- if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
- {
- p[-3] = (unsigned char) pop_failure_jump;
- DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
- c, p1[5]);
- }
-
- else if ((re_opcode_t) p1[3] == charset
- || (re_opcode_t) p1[3] == charset_not)
- {
- int not = (re_opcode_t) p1[3] == charset_not;
-
- if (c < (unsigned char) (p1[4] * BYTEWIDTH)
- && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- not = !not;
-
- /* `not' is equal to 1 if c would match, which means
- that we can't change to pop_failure_jump. */
- if (!not)
- {
- p[-3] = (unsigned char) pop_failure_jump;
- DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
- }
- }
- }
- }
- p -= 2; /* Point at relative address again. */
- if ((re_opcode_t) p[-1] != pop_failure_jump)
- {
- p[-1] = (unsigned char) jump;
- DEBUG_PRINT1 (" Match => jump.\n");
- goto unconditional_jump;
- }
- /* Note fall through. */
-
-
- /* The end of a simple repeat has a pop_failure_jump back to
- its matching on_failure_jump, where the latter will push a
- failure point. The pop_failure_jump takes off failure
- points put on by this pop_failure_jump's matching
- on_failure_jump; we got through the pattern to here from the
- matching on_failure_jump, so didn't fail. */
- case pop_failure_jump:
- {
- /* We need to pass separate storage for the lowest and
- highest registers, even though we don't care about the
- actual values. Otherwise, we will restore only one
- register from the stack, since lowest will == highest in
- `pop_failure_point'. */
- unsigned dummy_low_reg, dummy_high_reg;
- unsigned char *pdummy;
- const char *sdummy;
-
- DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
- POP_FAILURE_POINT (sdummy, pdummy,
- dummy_low_reg, dummy_high_reg,
- reg_dummy, reg_dummy, reg_info_dummy);
- }
- /* Note fall through. */
-
-
- /* Unconditionally jump (without popping any failure points). */
- case jump:
- unconditional_jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
- DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
- p += mcnt; /* Do the jump. */
- DEBUG_PRINT2 ("(to 0x%x).\n", p);
- break;
-
-
- /* We need this opcode so we can detect where alternatives end
- in `group_match_null_string_p' et al. */
- case jump_past_alt:
- DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
- goto unconditional_jump;
-
-
- /* Normally, the on_failure_jump pushes a failure point, which
- then gets popped at pop_failure_jump. We will end up at
- pop_failure_jump, also, and with a pattern of, say, `a+', we
- are skipping over the on_failure_jump, so we have to push
- something meaningless for pop_failure_jump to pop. */
- case dummy_failure_jump:
- DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
- /* It doesn't matter what we push for the string here. What
- the code at `fail' tests is the value for the pattern. */
- PUSH_FAILURE_POINT (0, 0, -2);
- goto unconditional_jump;
-
-
- /* At the end of an alternative, we need to push a dummy failure
- point in case we are followed by a `pop_failure_jump', because
- we don't want the failure point for the alternative to be
- popped. For example, matching `(a|ab)*' against `aab'
- requires that we match the `ab' alternative. */
- case push_dummy_failure:
- DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
- /* See comments just above at `dummy_failure_jump' about the
- two zeroes. */
- PUSH_FAILURE_POINT (0, 0, -2);
- break;
-
- /* Have to succeed matching what follows at least n times.
- After that, handle like `on_failure_jump'. */
- case succeed_n:
- EXTRACT_NUMBER (mcnt, p + 2);
- DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
-
- assert (mcnt >= 0);
- /* Originally, this is how many times we HAVE to succeed. */
- if (mcnt > 0)
- {
- mcnt--;
- p += 2;
- STORE_NUMBER_AND_INCR (p, mcnt);
- DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
- }
- else if (mcnt == 0)
- {
- DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
- p[2] = (unsigned char) no_op;
- p[3] = (unsigned char) no_op;
- goto on_failure;
- }
- break;
-
- case jump_n:
- EXTRACT_NUMBER (mcnt, p + 2);
- DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
-
- /* Originally, this is how many times we CAN jump. */
- if (mcnt)
- {
- mcnt--;
- STORE_NUMBER (p + 2, mcnt);
- goto unconditional_jump;
- }
- /* If don't have to jump any more, skip over the rest of command. */
- else
- p += 4;
- break;
-
- case set_number_at:
- {
- DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
-
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- p1 = p + mcnt;
- EXTRACT_NUMBER_AND_INCR (mcnt, p);
- DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
- STORE_NUMBER (p1, mcnt);
- break;
- }
-
- case wordbound:
- DEBUG_PRINT1 ("EXECUTING wordbound.\n");
- if (AT_WORD_BOUNDARY (d))
- break;
- goto fail;
-
- case notwordbound:
- DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
- if (AT_WORD_BOUNDARY (d))
- goto fail;
- break;
-
- case wordbeg:
- DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
- if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
- break;
- goto fail;
-
- case wordend:
- DEBUG_PRINT1 ("EXECUTING wordend.\n");
- if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
- && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
- break;
- goto fail;
-
-#ifdef emacs
-#ifdef emacs19
- case before_dot:
- DEBUG_PRINT1 ("EXECUTING before_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) >= point)
- goto fail;
- break;
-
- case at_dot:
- DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) != point)
- goto fail;
- break;
-
- case after_dot:
- DEBUG_PRINT1 ("EXECUTING after_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) <= point)
- goto fail;
- break;
-#else /* not emacs19 */
- case at_dot:
- DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
- goto fail;
- break;
-#endif /* not emacs19 */
-
- case syntaxspec:
- DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
- mcnt = *p++;
- goto matchsyntax;
-
- case wordchar:
- DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
- mcnt = (int) Sword;
- matchsyntax:
- PREFETCH ();
- if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
- goto fail;
- SET_REGS_MATCHED ();
- break;
-
- case notsyntaxspec:
- DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
- mcnt = *p++;
- goto matchnotsyntax;
-
- case notwordchar:
- DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
- mcnt = (int) Sword;
- matchnotsyntax:
- PREFETCH ();
- if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
- goto fail;
- SET_REGS_MATCHED ();
- break;
-
-#else /* not emacs */
- case wordchar:
- DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
- PREFETCH ();
- if (!WORDCHAR_P (d))
- goto fail;
- SET_REGS_MATCHED ();
- d++;
- break;
-
- case notwordchar:
- DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
- PREFETCH ();
- if (WORDCHAR_P (d))
- goto fail;
- SET_REGS_MATCHED ();
- d++;
- break;
-#endif /* not emacs */
-
- default:
- abort ();
- }
- continue; /* Successfully executed one pattern command; keep going. */
-
-
- /* We goto here if a matching operation fails. */
- fail:
- if (!FAIL_STACK_EMPTY ())
- { /* A restart point is known. Restore to that state. */
- DEBUG_PRINT1 ("\nFAIL:\n");
- POP_FAILURE_POINT (d, p,
- lowest_active_reg, highest_active_reg,
- regstart, regend, reg_info);
-
- /* If this failure point is a dummy, try the next one. */
- if (!p)
- goto fail;
-
- /* If we failed to the end of the pattern, don't examine *p. */
- assert (p <= pend);
- if (p < pend)
- {
- boolean is_a_jump_n = false;
-
- /* If failed to a backwards jump that's part of a repetition
- loop, need to pop this failure point and use the next one. */
- switch ((re_opcode_t) *p)
- {
- case jump_n:
- is_a_jump_n = true;
- case maybe_pop_jump:
- case pop_failure_jump:
- case jump:
- p1 = p + 1;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- p1 += mcnt;
-
- if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
- || (!is_a_jump_n
- && (re_opcode_t) *p1 == on_failure_jump))
- goto fail;
- break;
- default:
- /* do nothing */ ;
- }
- }
-
- if (d >= string1 && d <= end1)
- dend = end_match_1;
- }
- else
- break; /* Matching at this starting point really fails. */
- } /* for (;;) */
-
- if (best_regs_set)
- goto restore_best_regs;
-
- FREE_VARIABLES ();
-
- return -1; /* Failure to match. */
-} /* re_match_2 */
-\f
-/* Subroutine definitions for re_match_2. */
-
-
-/* We are passed P pointing to a register number after a start_memory.
-
- Return true if the pattern up to the corresponding stop_memory can
- match the empty string, and false otherwise.
-
- If we find the matching stop_memory, sets P to point to one past its number.
- Otherwise, sets P to an undefined byte less than or equal to END.
-
- We don't handle duplicates properly (yet). */
-
-static boolean
-group_match_null_string_p (p, end, reg_info)
- unsigned char **p, *end;
- register_info_type *reg_info;
-{
- int mcnt;
- /* Point to after the args to the start_memory. */
- unsigned char *p1 = *p + 2;
-
- while (p1 < end)
- {
- /* Skip over opcodes that can match nothing, and return true or
- false, as appropriate, when we get to one that can't, or to the
- matching stop_memory. */
-
- switch ((re_opcode_t) *p1)
- {
- /* Could be either a loop or a series of alternatives. */
- case on_failure_jump:
- p1++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-
- /* If the next operation is not a jump backwards in the
- pattern. */
-
- if (mcnt >= 0)
- {
- /* Go through the on_failure_jumps of the alternatives,
- seeing if any of the alternatives cannot match nothing.
- The last alternative starts with only a jump,
- whereas the rest start with on_failure_jump and end
- with a jump, e.g., here is the pattern for `a|b|c':
-
- /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
- /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
- /exactn/1/c
-
- So, we have to first go through the first (n-1)
- alternatives and then deal with the last one separately. */
-
-
- /* Deal with the first (n-1) alternatives, which start
- with an on_failure_jump (see above) that jumps to right
- past a jump_past_alt. */
-
- while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
- {
- /* `mcnt' holds how many bytes long the alternative
- is, including the ending `jump_past_alt' and
- its number. */
-
- if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
- reg_info))
- return false;
-
- /* Move to right after this alternative, including the
- jump_past_alt. */
- p1 += mcnt;
-
- /* Break if it's the beginning of an n-th alternative
- that doesn't begin with an on_failure_jump. */
- if ((re_opcode_t) *p1 != on_failure_jump)
- break;
-
- /* Still have to check that it's not an n-th
- alternative that starts with an on_failure_jump. */
- p1++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
- {
- /* Get to the beginning of the n-th alternative. */
- p1 -= 3;
- break;
- }
- }
-
- /* Deal with the last alternative: go back and get number
- of the `jump_past_alt' just before it. `mcnt' contains
- the length of the alternative. */
- EXTRACT_NUMBER (mcnt, p1 - 2);
-
- if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
- return false;
-
- p1 += mcnt; /* Get past the n-th alternative. */
- } /* if mcnt > 0 */
- break;
-
-
- case stop_memory:
- assert (p1[1] == **p);
- *p = p1 + 2;
- return true;
-
-
- default:
- if (!common_op_match_null_string_p (&p1, end, reg_info))
- return false;
- }
- } /* while p1 < end */
-
- return false;
-} /* group_match_null_string_p */
-
-
-/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
- It expects P to be the first byte of a single alternative and END one
- byte past the last. The alternative can contain groups. */
-
-static boolean
-alt_match_null_string_p (p, end, reg_info)
- unsigned char *p, *end;
- register_info_type *reg_info;
-{
- int mcnt;
- unsigned char *p1 = p;
-
- while (p1 < end)
- {
- /* Skip over opcodes that can match nothing, and break when we get
- to one that can't. */
-
- switch ((re_opcode_t) *p1)
- {
- /* It's a loop. */
- case on_failure_jump:
- p1++;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- p1 += mcnt;
- break;
-
- default:
- if (!common_op_match_null_string_p (&p1, end, reg_info))
- return false;
- }
- } /* while p1 < end */
-
- return true;
-} /* alt_match_null_string_p */
-
-
-/* Deals with the ops common to group_match_null_string_p and
- alt_match_null_string_p.
-
- Sets P to one after the op and its arguments, if any. */
-
-static boolean
-common_op_match_null_string_p (p, end, reg_info)
- unsigned char **p, *end;
- register_info_type *reg_info;
-{
- int mcnt;
- boolean ret;
- int reg_no;
- unsigned char *p1 = *p;
-
- switch ((re_opcode_t) *p1++)
- {
- case no_op:
- case begline:
- case endline:
- case begbuf:
- case endbuf:
- case wordbeg:
- case wordend:
- case wordbound:
- case notwordbound:
-#ifdef emacs
- case before_dot:
- case at_dot:
- case after_dot:
-#endif
- break;
-
- case start_memory:
- reg_no = *p1;
- assert (reg_no > 0 && reg_no <= MAX_REGNUM);
- ret = group_match_null_string_p (&p1, end, reg_info);
-
- /* Have to set this here in case we're checking a group which
- contains a group and a back reference to it. */
-
- if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
- REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
-
- if (!ret)
- return false;
- break;
-
- /* If this is an optimized succeed_n for zero times, make the jump. */
- case jump:
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- if (mcnt >= 0)
- p1 += mcnt;
- else
- return false;
- break;
-
- case succeed_n:
- /* Get to the number of times to succeed. */
- p1 += 2;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-
- if (mcnt == 0)
- {
- p1 -= 4;
- EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- p1 += mcnt;
- }
- else
- return false;
- break;
-
- case duplicate:
- if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
- return false;
- break;
-
- case set_number_at:
- p1 += 4;
-
- default:
- /* All other opcodes mean we cannot match the empty string. */
- return false;
- }
-
- *p = p1;
- return true;
-} /* common_op_match_null_string_p */
-
-
-/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
- bytes; nonzero otherwise. */
-
-static int
-bcmp_translate(
- unsigned char *s1,
- unsigned char *s2,
- int len,
- char *translate
-)
-{
- register unsigned char *p1 = s1, *p2 = s2;
- while (len)
- {
- if (translate[*p1++] != translate[*p2++]) return 1;
- len--;
- }
- return 0;
-}
-\f
-/* Entry points for GNU code. */
-
-/* re_compile_pattern is the GNU regular expression compiler: it
- compiles PATTERN (of length SIZE) and puts the result in BUFP.
- Returns 0 if the pattern was valid, otherwise an error string.
-
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
- are set in BUFP on entry.
-
- We call regex_compile to do the actual compilation. */
-
-const char *
-re_compile_pattern (pattern, length, bufp)
- const char *pattern;
- int length;
- struct re_pattern_buffer *bufp;
-{
- reg_errcode_t ret;
-
- /* GNU code is written to assume at least RE_NREGS registers will be set
- (and at least one extra will be -1). */
- bufp->regs_allocated = REGS_UNALLOCATED;
-
- /* And GNU code determines whether or not to get register information
- by passing null for the REGS argument to re_match, etc., not by
- setting no_sub. */
- bufp->no_sub = 0;
-
- /* Match anchors at newline. */
- bufp->newline_anchor = 1;
-
- ret = regex_compile (pattern, length, re_syntax_options, bufp);
-
- return re_error_msg[(int) ret];
-}
-\f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them if this is an Emacs or POSIX compilation. */
-
-#if !defined (emacs) && !defined (_POSIX_SOURCE)
-
-/* BSD has one and only one pattern buffer. */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-re_comp (s)
- const char *s;
-{
- reg_errcode_t ret;
-
- if (!s)
- {
- if (!re_comp_buf.buffer)
- return "No previous regular expression";
- return 0;
- }
-
- if (!re_comp_buf.buffer)
- {
- re_comp_buf.buffer = (unsigned char *) malloc (200);
- if (re_comp_buf.buffer == NULL)
- return "Memory exhausted";
- re_comp_buf.allocated = 200;
-
- re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
- if (re_comp_buf.fastmap == NULL)
- return "Memory exhausted";
- }
-
- /* Since `re_exec' always passes NULL for the `regs' argument, we
- don't need to initialize the pattern buffer fields which affect it. */
-
- /* Match anchors at newlines. */
- re_comp_buf.newline_anchor = 1;
-
- ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
-
- /* Yes, we're discarding `const' here. */
- return (char *) re_error_msg[(int) ret];
-}
-
-
-int
-re_exec (s)
- const char *s;
-{
- const int len = strlen (s);
- return
- 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
-}
-#endif /* not emacs and not _POSIX_SOURCE */
-\f
-/* POSIX.2 functions. Don't define these for Emacs. */
-
-#ifndef emacs
-
-/* regcomp takes a regular expression as a string and compiles it.
-
- PREG is a regex_t *. We do not expect any fields to be initialized,
- since POSIX says we shouldn't. Thus, we set
-
- `buffer' to the compiled pattern;
- `used' to the length of the compiled pattern;
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
- RE_SYNTAX_POSIX_BASIC;
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
- `re_nsub' to the number of subexpressions in PATTERN.
-
- PATTERN is the address of the pattern string.
-
- CFLAGS is a series of bits which affect compilation.
-
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
- use POSIX basic syntax.
-
- If REG_NEWLINE is set, then . and [^...] don't match newline.
- Also, regexec will try a match beginning after every newline.
-
- If REG_ICASE is set, then we considers upper- and lowercase
- versions of letters to be equivalent when matching.
-
- If REG_NOSUB is set, then when PREG is passed to regexec, that
- routine will report only success or failure, and nothing about the
- registers.
-
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
- the return codes and their meanings.) */
-
-int
-regcomp (preg, pattern, cflags)
- regex_t *preg;
- const char *pattern;
- int cflags;
-{
- reg_errcode_t ret;
- unsigned syntax
- = (cflags & REG_EXTENDED) ?
- RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
-
- /* regex_compile will allocate the space for the compiled pattern. */
- preg->buffer = 0;
- preg->allocated = 0;
-
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
-
- if (cflags & REG_ICASE)
- {
- unsigned i;
-
- preg->translate = (char *) malloc (CHAR_SET_SIZE);
- if (preg->translate == NULL)
- return (int) REG_ESPACE;
-
- /* Map uppercase characters to corresponding lowercase ones. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
- }
- else
- preg->translate = NULL;
-
- /* If REG_NEWLINE is set, newlines are treated differently. */
- if (cflags & REG_NEWLINE)
- { /* REG_NEWLINE implies neither . nor [^...] match newline. */
- syntax &= ~RE_DOT_NEWLINE;
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
- /* It also changes the matching behavior. */
- preg->newline_anchor = 1;
- }
- else
- preg->newline_anchor = 0;
-
- preg->no_sub = !!(cflags & REG_NOSUB);
-
- /* POSIX says a null character in the pattern terminates it, so we
- can use strlen here in compiling the pattern. */
- ret = regex_compile (pattern, strlen (pattern), syntax, preg);
-
- /* POSIX doesn't distinguish between an unmatched open-group and an
- unmatched close-group: both are REG_EPAREN. */
- if (ret == REG_ERPAREN) ret = REG_EPAREN;
-
- return (int) ret;
-}
-
-
-/* regexec searches for a given pattern, specified by PREG, in the
- string STRING.
-
- If NMATCH is zero or REG_NOSUB was set in the cflags argument to
- `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
- least NMATCH elements, and we set them to the offsets of the
- corresponding matched substrings.
-
- EFLAGS specifies `execution flags' which affect matching: if
- REG_NOTBOL is set, then ^ does not match at the beginning of the
- string; if REG_NOTEOL is set, then $ does not match at the end.
-
- We return 0 if we find a match and REG_NOMATCH if not. */
-
-int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *preg;
- const char *string;
- size_t nmatch;
- regmatch_t pmatch[];
- int eflags;
-{
- int ret;
- struct re_registers regs;
- regex_t private_preg;
- int len = strlen (string);
- boolean want_reg_info = !preg->no_sub && nmatch > 0;
-
- private_preg = *preg;
-
- private_preg.not_bol = !!(eflags & REG_NOTBOL);
- private_preg.not_eol = !!(eflags & REG_NOTEOL);
-
- /* The user has told us exactly how many registers to return
- information about, via `nmatch'. We have to pass that on to the
- matching routines. */
- private_preg.regs_allocated = REGS_FIXED;
-
- if (want_reg_info)
- {
- regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch, regoff_t);
- regs.end = TALLOC (nmatch, regoff_t);
- if (regs.start == NULL || regs.end == NULL)
- return (int) REG_NOMATCH;
- }
-
- /* Perform the searching operation. */
- ret = re_search (&private_preg, string, len,
- /* start: */ 0, /* range: */ len,
- want_reg_info ? ®s : (struct re_registers *) 0);
-
- /* Copy the register information to the POSIX structure. */
- if (want_reg_info)
- {
- if (ret >= 0)
- {
- unsigned r;
-
- for (r = 0; r < nmatch; r++)
- {
- pmatch[r].rm_so = regs.start[r];
- pmatch[r].rm_eo = regs.end[r];
- }
- }
-
- /* If we needed the temporary register info, free the space now. */
- free (regs.start);
- free (regs.end);
- }
-
- /* We want zero return to mean success, unlike `re_search'. */
- return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
-}
-
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
- from either regcomp or regexec. We don't use PREG here. */
-
-size_t
-regerror (errcode, preg, errbuf, errbuf_size)
- int errcode;
- const regex_t *preg;
- char *errbuf;
- size_t errbuf_size;
-{
- const char *msg;
- size_t msg_size;
-
- if (errcode < 0
- || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
- /* Only error codes returned by the rest of the code should be passed
- to this routine. If we are given anything else, or if other regex
- code generates an invalid error code, then the program has a bug.
- Dump core so we can fix it. */
- abort ();
-
- msg = re_error_msg[errcode];
-
- /* POSIX doesn't require that we do anything in this case, but why
- not be nice. */
- if (! msg)
- msg = "Success";
-
- msg_size = strlen (msg) + 1; /* Includes the null. */
-
- if (errbuf_size != 0)
- {
- if (msg_size > errbuf_size)
- {
- strncpy (errbuf, msg, errbuf_size - 1);
- errbuf[errbuf_size - 1] = 0;
- }
- else
- strcpy (errbuf, msg);
- }
-
- return msg_size;
-}
-
-
-/* Free dynamically allocated space used by PREG. */
-
-void
-regfree (preg)
- regex_t *preg;
-{
- if (preg->buffer != NULL)
- free (preg->buffer);
- preg->buffer = NULL;
-
- preg->allocated = 0;
- preg->used = 0;
-
- if (preg->fastmap != NULL)
- free (preg->fastmap);
- preg->fastmap = NULL;
- preg->fastmap_accurate = 0;
-
- if (preg->translate != NULL)
- free (preg->translate);
- preg->translate = NULL;
-}
-
-#endif /* not emacs */
-\f
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
+++ /dev/null
-/* Definitions for data structures and routines for the regular
- expression library, version 0.12.
-
- Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-#ifndef __REGEXP_LIBRARY_H__
-#define __REGEXP_LIBRARY_H__
-
-/* POSIX says that <sys/types.h> must be included (by the caller) before
- <regex.h>. */
-
-#ifdef VMS
-/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
- should be there. */
-#include <stddef.h>
-#endif
-
-
-/* The following bits are used to determine the regexp syntax we
- recognize. The set/not-set meanings are chosen so that Emacs syntax
- remains the value 0. The bits are given in alphabetical order, and
- the definitions shifted by one from the previous bit; thus, when we
- add or remove a bit, only one other definition need change. */
-typedef unsigned reg_syntax_t;
-
-/* If this bit is not set, then \ inside a bracket expression is literal.
- If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
-
-/* If this bit is not set, then + and ? are operators, and \+ and \? are
- literals.
- If set, then \+ and \? are operators and + and ? are literals. */
-#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
-
-/* If this bit is set, then character classes are supported. They are:
- [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
- [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
- If not set, then character classes are not supported. */
-#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
-
-/* If this bit is set, then ^ and $ are always anchors (outside bracket
- expressions, of course).
- If this bit is not set, then it depends:
- ^ is an anchor if it is at the beginning of a regular
- expression or after an open-group or an alternation operator;
- $ is an anchor if it is at the end of a regular expression, or
- before a close-group or an alternation operator.
-
- This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
- POSIX draft 11.2 says that * etc. in leading positions is undefined.
- We already implemented a previous draft which made those constructs
- invalid, though, so we haven't changed the code back. */
-#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
-
-/* If this bit is set, then special characters are always special
- regardless of where they are in the pattern.
- If this bit is not set, then special characters are special only in
- some contexts; otherwise they are ordinary. Specifically,
- * + ? and intervals are only special when not after the beginning,
- open-group, or alternation operator. */
-#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
-
-/* If this bit is set, then *, +, ?, and { cannot be first in an re or
- immediately after an alternation or begin-group operator. */
-#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
-
-/* If this bit is set, then . matches newline.
- If not set, then it doesn't. */
-#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
-
-/* If this bit is set, then . doesn't match NUL.
- If not set, then it does. */
-#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
-
-/* If this bit is set, nonmatching lists [^...] do not match newline.
- If not set, they do. */
-#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
-
-/* If this bit is set, either \{...\} or {...} defines an
- interval, depending on RE_NO_BK_BRACES.
- If not set, \{, \}, {, and } are literals. */
-#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
-
-/* If this bit is set, +, ? and | aren't recognized as operators.
- If not set, they are. */
-#define RE_LIMITED_OPS (RE_INTERVALS << 1)
-
-/* If this bit is set, newline is an alternation operator.
- If not set, newline is literal. */
-#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
-
-/* If this bit is set, then `{...}' defines an interval, and \{ and \}
- are literals.
- If not set, then `\{...\}' defines an interval. */
-#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
-
-/* If this bit is set, (...) defines a group, and \( and \) are literals.
- If not set, \(...\) defines a group, and ( and ) are literals. */
-#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
-
-/* If this bit is set, then \<digit> matches <digit>.
- If not set, then \<digit> is a back-reference. */
-#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
-
-/* If this bit is set, then | is an alternation operator, and \| is literal.
- If not set, then \| is an alternation operator, and | is literal. */
-#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
-
-/* If this bit is set, then an ending range point collating higher
- than the starting range point, as in [z-a], is invalid.
- If not set, then when ending range point collates higher than the
- starting range point, the range is ignored. */
-#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
-
-/* If this bit is set, then an unmatched ) is ordinary.
- If not set, then an unmatched ) is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
-
-/* This global variable defines the particular regexp syntax to use (for
- some interfaces). When a regexp is compiled, the syntax used is
- stored in the pattern buffer, so changing this does not affect
- already-compiled regexps. */
-extern reg_syntax_t re_syntax_options;
-\f
-/* Define combinations of the above bits for the standard possibilities.
- (The [[[ comments delimit what gets put into the Texinfo file, so
- don't delete them!) */
-/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS 0
-
-#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
-
-#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
- | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
-
-#define RE_SYNTAX_EGREP \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
-#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
-
-/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
-#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
-
-#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
-
-/* Syntax bits common to both basic and extended POSIX regex syntax. */
-#define _RE_SYNTAX_POSIX_COMMON \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
-
-#define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
-
-/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
- RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
- isn't minimal, since other operators, such as \`, aren't disabled. */
-#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
-
-#define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
- replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
-#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
-/* [[[end syntaxes]]] */
-\f
-/* Maximum number of duplicates an interval can allow. Some systems
- (erroneously) define this in other header files, but we want our
- value, so remove any previous define. */
-#ifdef RE_DUP_MAX
-#undef RE_DUP_MAX
-#endif
-#define RE_DUP_MAX ((1 << 15) - 1)
-
-
-/* POSIX `cflags' bits (i.e., information for `regcomp'). */
-
-/* If this bit is set, then use extended regular expression syntax.
- If not set, then use basic regular expression syntax. */
-#define REG_EXTENDED 1
-
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define REG_ICASE (REG_EXTENDED << 1)
-
-/* If this bit is set, then anchors do not match at newline
- characters in the string.
- If not set, then anchors do match at newlines. */
-#define REG_NEWLINE (REG_ICASE << 1)
-
-/* If this bit is set, then report only success or fail in regexec.
- If not set, then returns differ between not matching and errors. */
-#define REG_NOSUB (REG_NEWLINE << 1)
-
-
-/* POSIX `eflags' bits (i.e., information for regexec). */
-
-/* If this bit is set, then the beginning-of-line operator doesn't match
- the beginning of the string (presumably because it's not the
- beginning of a line).
- If not set, then the beginning-of-line operator does match the
- beginning of the string. */
-#define REG_NOTBOL 1
-
-/* Like REG_NOTBOL, except for the end-of-line. */
-#define REG_NOTEOL (1 << 1)
-
-
-/* If any error codes are removed, changed, or added, update the
- `re_error_msg' table in regex.c. */
-typedef enum
-{
- REG_NOERROR = 0, /* Success. */
- REG_NOMATCH, /* Didn't find a match (for regexec). */
-
- /* POSIX regcomp return error codes. (In the order listed in the
- standard.) */
- REG_BADPAT, /* Invalid pattern. */
- REG_ECOLLATE, /* Not implemented. */
- REG_ECTYPE, /* Invalid character class name. */
- REG_EESCAPE, /* Trailing backslash. */
- REG_ESUBREG, /* Invalid back reference. */
- REG_EBRACK, /* Unmatched left bracket. */
- REG_EPAREN, /* Parenthesis imbalance. */
- REG_EBRACE, /* Unmatched \{. */
- REG_BADBR, /* Invalid contents of \{\}. */
- REG_ERANGE, /* Invalid range end. */
- REG_ESPACE, /* Ran out of memory. */
- REG_BADRPT, /* No preceding re for repetition op. */
-
- /* Error codes we've added. */
- REG_EEND, /* Premature end. */
- REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
- REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
-} reg_errcode_t;
-\f
-/* This data structure represents a compiled pattern. Before calling
- the pattern compiler, the fields `buffer', `allocated', `fastmap',
- `translate', and `no_sub' can be set. After the pattern has been
- compiled, the `re_nsub' field is available. All other fields are
- private to the regex routines. */
-
-struct re_pattern_buffer
-{
-/* [[[begin pattern_buffer]]] */
- /* Space that holds the compiled pattern. It is declared as
- `unsigned char *' because its elements are
- sometimes used as array indexes. */
- unsigned char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- unsigned long allocated;
-
- /* Number of bytes actually used in `buffer'. */
- unsigned long used;
-
- /* Syntax setting with which the pattern was compiled. */
- reg_syntax_t syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- char *translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Zero if this pattern cannot match the empty string, one else.
- Well, in truth it's used only in `re_search_2', to see
- whether or not we should use the fastmap, so we don't set
- this absolutely perfectly; see `re_compile_fastmap' (the
- `duplicate' case). */
- unsigned can_be_null : 1;
-
- /* If REGS_UNALLOCATED, allocate space in the `regs' structure
- for `max (RE_NREGS, re_nsub + 1)' groups.
- If REGS_REALLOCATE, reallocate space if necessary.
- If REGS_FIXED, use what's there. */
-#define REGS_UNALLOCATED 0
-#define REGS_REALLOCATE 1
-#define REGS_FIXED 2
- unsigned regs_allocated : 2;
-
- /* Set to zero when `regex_compile' compiles a pattern; set to one
- by `re_compile_fastmap' if it updates the fastmap. */
- unsigned fastmap_accurate : 1;
-
- /* If set, `re_match_2' does not return information about
- subexpressions. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor doesn't match at the
- beginning of the string. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If true, an anchor at a newline matches. */
- unsigned newline_anchor : 1;
-
-/* [[[end pattern_buffer]]] */
-};
-
-typedef struct re_pattern_buffer regex_t;
-
-
-/* search.c (search_buffer) in Emacs needs this one opcode value. It is
- defined both in `regex.c' and here. */
-#define RE_EXACTN_VALUE 1
-\f
-/* Type for byte offsets within the string. POSIX mandates this. */
-typedef int regoff_t;
-
-
-/* This is the structure we store register match data in. See
- regex.texinfo for a full description of what registers match. */
-struct re_registers
-{
- unsigned num_regs;
- regoff_t *start;
- regoff_t *end;
-};
-
-
-/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
- `re_match_2' returns information about at least this many registers
- the first time a `regs' structure is passed. */
-#ifndef RE_NREGS
-#define RE_NREGS 30
-#endif
-
-
-/* POSIX specification for registers. Aside from the different names than
- `re_registers', POSIX uses an array of structures, instead of a
- structure of arrays. */
-typedef struct
-{
- regoff_t rm_so; /* Byte offset from string's start to substring's start. */
- regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
-} regmatch_t;
-\f
-/* Declarations for routines. */
-
-/* To avoid duplicating every routine declaration -- once with a
- prototype (if we are ANSI), and once without (if we aren't) -- we
- use the following macro to declare argument types. This
- unfortunately clutters up the declarations a bit, but I think it's
- worth it. */
-
-#if __STDC__
-
-#define _RE_ARGS(args) args
-
-#else /* not __STDC__ */
-
-#define _RE_ARGS(args) ()
-
-#endif /* not __STDC__ */
-
-/* Sets the current default syntax to SYNTAX, and return the old syntax.
- You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
-
-/* Compile the regular expression PATTERN, with length LENGTH
- and syntax given by the global `re_syntax_options', into the buffer
- BUFFER. Return NULL if successful, and an error string if not. */
-extern const char *re_compile_pattern
- _RE_ARGS ((const char *pattern, int length,
- struct re_pattern_buffer *buffer));
-
-
-/* Compile a fastmap for the compiled pattern in BUFFER; used to
- accelerate searches. Return 0 if successful and -2 if was an
- internal error. */
-extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
-
-
-/* Search in the string STRING (with length LENGTH) for the pattern
- compiled into BUFFER. Start searching at position START, for RANGE
- characters. Return the starting position of the match, -1 for no
- match, or -2 for an internal error. Also return register
- information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, int range, struct re_registers *regs));
-
-
-/* Like `re_search', but search in the concatenation of STRING1 and
- STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, int range, struct re_registers *regs, int stop));
-
-
-/* Like `re_search', but return how many characters in STRING the regexp
- in BUFFER matched, starting at position START. */
-extern int re_match
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
- int length, int start, struct re_registers *regs));
-
-
-/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2
- _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, struct re_registers *regs, int stop));
-
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- ENDS. Subsequent matches using BUFFER and REGS will use this memory
- for recording register information. STARTS and ENDS must be
- allocated with malloc, and must each be at least `NUM_REGS * sizeof
- (regoff_t)' bytes long.
-
- If NUM_REGS == 0, then subsequent matches should allocate their own
- register data.
-
- Unless this function is called, the first search or match using
- PATTERN_BUFFER will allocate its own register data, without
- freeing the old data. */
-extern void re_set_registers
- _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
- unsigned num_regs, regoff_t *starts, regoff_t *ends));
-
-/* 4.2 bsd compatibility. */
-extern char *re_comp _RE_ARGS ((const char *));
-extern int re_exec _RE_ARGS ((const char *));
-
-/* POSIX compatibility. */
-extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
-extern int regexec
- _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags));
-extern size_t regerror
- _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
- size_t errbuf_size));
-extern void regfree _RE_ARGS ((regex_t *preg));
-
-#endif /* not __REGEXP_LIBRARY_H__ */
-\f
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
--- /dev/null
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
+
+ Copyright (C) 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
+
+#define _GNU_SOURCE
+
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+
+#include <stdlib.h>
+
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+\f
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+\f
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+\f
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn = 1,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+\f
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+\f
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ printchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c;
+
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < *p; c++)
+ {
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
+ }
+ p += 1 + *p;
+ break;
+ }
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump/0/%d", mcnt);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump/0/%d", mcnt);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump/0/%d", mcnt);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump/0/%d", mcnt);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt/0/%d", mcnt);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump/0/%d", mcnt);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+ }
+ printf ("/\n");
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ printchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ printchar (string2[this_char]);
+ }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+\f
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+\f
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+static const char *re_error_msg[] =
+ { NULL, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+ };
+\f
+/* Subroutine declarations and macros for regex_compile. */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = translate[c]; \
+ } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while (b - bufp->buffer + (n) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+\f
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ char *translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ printchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) return REG_ESPACE;
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) return REG_EBRACK;
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ return REG_ERPAREN;
+ }
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+ }
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: ");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+ return REG_NOERROR;
+} /* regex_compile */
+\f
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ int syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : NULL;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ char *translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start = ((unsigned char *) p)[-2];
+ range_end = ((unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+\f
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+int re_max_failures = 2000;
+
+typedef const unsigned char *fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
+
+
+/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
+
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push PATTERN_OP on FAIL_STACK.
+
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (fail_stack)) \
+ ? 0 \
+ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
+ 1))
+
+/* This pushes an item onto the failure stack. Must be a four-byte
+ value. Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ITEM(item) \
+ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+
+/* The complement operation. Assumes `fail_stack' is nonempty. */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_ITEM (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_ITEM (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_ITEM (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_ITEM (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_ITEM (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_ITEM (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_ITEM (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+\f
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+ fail_stack_type fail_stack;
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+ /* We don't push any register information onto the failure stack. */
+ unsigned num_regs = 0;
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned long size = bufp->used;
+ const unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (p != pend || !FAIL_STACK_EMPTY ())
+ {
+ if (p == pend)
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail];
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ return 0;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = 0;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* not emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1] == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ return -2;
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+ return 0;
+} /* re_compile_fastmap */
+\f
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+\f
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register char *translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2 (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+\f
+/* Declarations and macros for re_match_2. */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+ common_op_match_null_string_p (),
+ group_match_null_string_p ();
+
+/* Structure for per-register (a.k.a. per-group) information.
+ This must not be longer than one word, because we push this value
+ onto the failure stack. Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ unsigned r; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ while (0)
+
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+
+
+/* Free everything we malloc. */
+#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ FREE_VAR (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else /* not REGEX_MALLOC */
+/* Some MIPS systems (at least) want this to free alloca'd storage. */
+#define FREE_VARIABLES() alloca (0)
+#endif /* not REGEX_MALLOC */
+
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+\f
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+ {
+ return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* We use this to map every character in the string. */
+ char *translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+ fail_stack_type fail_stack;
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ unsigned num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+ const char **regstart = NULL, **regend = NULL;
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+ const char **old_regstart = NULL, **old_regend = NULL;
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+ register_info_type *reg_info = NULL;
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+ const char **best_regstart = NULL, **best_regend = NULL;
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* Used when we pop values we don't care about. */
+ const char **reg_dummy = NULL;
+ register_info_type *reg_info_dummy = NULL;
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+#ifdef REGEX_MALLOC
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* REGEX_MALLOC */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set
+ || (same_str_p && d > match_end)
+ || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ }
+ }
+ else
+ assert (bufp->regs_allocated == REGS_FIXED);
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ : d - string2 + size1);
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ FREE_VARIABLES ();
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if (translate[(unsigned char) *d++] != (char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || (re_opcode_t) p[-3] == start_memory)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if ((int) old_regend[r] >= (int) regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands. */
+ while (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3; /* Skip over args, too. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ p1 = p + mcnt;
+
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+#ifdef emacs19
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+#else /* not emacs19 */
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+ goto fail;
+ break;
+#endif /* not emacs19 */
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+\f
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate(
+ unsigned char *s1,
+ unsigned char *s2,
+ int len,
+ char *translate
+)
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+\f
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ int length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ return re_error_msg[(int) ret];
+}
+\f
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return "Memory exhausted";
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return "Memory exhausted";
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ /* Yes, we're discarding `const' here. */
+ return (char *) re_error_msg[(int) ret];
+}
+
+
+int
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+\f
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (char *) malloc (CHAR_SET_SIZE);
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? ®s : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = re_error_msg[errcode];
+
+ /* POSIX doesn't require that we do anything in this case, but why
+ not be nice. */
+ if (! msg)
+ msg = "Success";
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
+\f
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
--- /dev/null
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+
+ Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+\f
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+\f
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+\f
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value. It is
+ defined both in `regex.c' and here. */
+#define RE_EXACTN_VALUE 1
+\f
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+\f
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+\f
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
{
local cmd i is_hash=y dir="$(__gitdir "$1")"
if [ -d "$dir" ]; then
- for i in $(git --git-dir="$dir" \
- for-each-ref --format='%(refname)' \
- refs/heads ); do
- echo "${i#refs/heads/}"
- done
+ git --git-dir="$dir" for-each-ref --format='%(refname:short)' \
+ refs/heads
return
fi
for i in $(git ls-remote "$1" 2>/dev/null); do
{
local cmd i is_hash=y dir="$(__gitdir "$1")"
if [ -d "$dir" ]; then
- for i in $(git --git-dir="$dir" \
- for-each-ref --format='%(refname)' \
- refs/tags ); do
- echo "${i#refs/tags/}"
- done
+ git --git-dir="$dir" for-each-ref --format='%(refname:short)' \
+ refs/tags
return
fi
for i in $(git ls-remote "$1" 2>/dev/null); do
local cmd i is_hash=y dir="$(__gitdir "$1")"
if [ -d "$dir" ]; then
if [ -e "$dir/HEAD" ]; then echo HEAD; fi
- for i in $(git --git-dir="$dir" \
- for-each-ref --format='%(refname)' \
- refs/tags refs/heads refs/remotes); do
- case "$i" in
- refs/tags/*) echo "${i#refs/tags/}" ;;
- refs/heads/*) echo "${i#refs/heads/}" ;;
- refs/remotes/*) echo "${i#refs/remotes/}" ;;
- *) echo "$i" ;;
- esac
- done
+ git --git-dir="$dir" for-each-ref --format='%(refname:short)' \
+ refs/tags refs/heads refs/remotes
return
fi
for i in $(git ls-remote "$dir" 2>/dev/null); do
--*)
__gitcomp "
--all --author= --signoff --verify --no-verify
- --edit --amend --include --only
+ --edit --amend --include --only --interactive
"
return
esac
)) { die $usage; }
die $usage unless ($read_mode xor $write_mode);
-my $topdir = `git-rev-parse --show-cdup` or die "\n"; chomp $topdir;
+my $topdir = `git rev-parse --show-cdup` or die "\n"; chomp $topdir;
my $gitdir = $topdir . '.git';
my $gitmeta = $topdir . '.gitmeta';
open (OUT, ">$gitmeta.tmp") or die "Could not open $gitmeta.tmp for writing: $!\n";
}
- my @files = `git-ls-files`;
+ my @files = `git ls-files`;
my %dirs;
foreach my $path (@files) {
#include "progress.h"
#include "csum-file.h"
-static void sha1flush(struct sha1file *f, unsigned int count)
+static void sha1flush(struct sha1file *f, void *buf, unsigned int count)
{
- void *buf = f->buffer;
-
for (;;) {
int ret = xwrite(f->fd, buf, count);
if (ret > 0) {
if (offset) {
SHA1_Update(&f->ctx, f->buffer, offset);
- sha1flush(f, offset);
+ sha1flush(f, f->buffer, offset);
f->offset = 0;
}
SHA1_Final(f->buffer, &f->ctx);
hashcpy(result, f->buffer);
if (flags & (CSUM_CLOSE | CSUM_FSYNC)) {
/* write checksum and close fd */
- sha1flush(f, 20);
+ sha1flush(f, f->buffer, 20);
if (flags & CSUM_FSYNC)
fsync_or_die(f->fd, f->name);
if (close(f->fd))
int sha1write(struct sha1file *f, void *buf, unsigned int count)
{
- if (f->do_crc)
- f->crc32 = crc32(f->crc32, buf, count);
while (count) {
unsigned offset = f->offset;
unsigned left = sizeof(f->buffer) - offset;
unsigned nr = count > left ? left : count;
+ void *data;
+
+ if (f->do_crc)
+ f->crc32 = crc32(f->crc32, buf, nr);
+
+ if (nr == sizeof(f->buffer)) {
+ /* process full buffer directly without copy */
+ data = buf;
+ } else {
+ memcpy(f->buffer + offset, buf, nr);
+ data = f->buffer;
+ }
- memcpy(f->buffer + offset, buf, nr);
count -= nr;
offset += nr;
buf = (char *) buf + nr;
left -= nr;
if (!left) {
- SHA1_Update(&f->ctx, f->buffer, offset);
- sha1flush(f, offset);
+ SHA1_Update(&f->ctx, data, offset);
+ sha1flush(f, data, offset);
offset = 0;
}
f->offset = offset;
openlog("git-daemon", LOG_PID, LOG_DAEMON);
set_die_routine(daemon_die);
} else
- setlinebuf(stderr); /* avoid splitting a message in the middle */
+ /* avoid splitting a message in the middle */
+ setvbuf(stderr, NULL, _IOLBF, 0);
if (inetd_mode && (group_name || user_name))
die("--user and --group are incompatible with --inetd");
? CE_MATCH_RACY_IS_DIRTY : 0);
char symcache[PATH_MAX];
+ diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/");
+
if (diff_unmerged_stage < 0)
diff_unmerged_stage = 2;
entries = active_nr;
if (unpack_trees(1, &t, &opts))
exit(128);
+ diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/");
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
return 0;
if (queue_diff(&revs->diffopt, revs->diffopt.paths[0],
revs->diffopt.paths[1]))
exit(1);
+ diff_set_mnemonic_prefix(&revs->diffopt, "1/", "2/");
diffcore_std(&revs->diffopt);
diff_flush(&revs->diffopt);
int diff_use_color_default = -1;
static const char *external_diff_cmd_cfg;
int diff_auto_refresh_index = 1;
+static int diff_mnemonic_prefix;
static char diff_colors[][COLOR_MAXLEN] = {
"\033[m", /* reset */
diff_auto_refresh_index = git_config_bool(var, value);
return 0;
}
+ if (!strcmp(var, "diff.mnemonicprefix")) {
+ diff_mnemonic_prefix = git_config_bool(var, value);
+ return 0;
+ }
if (!strcmp(var, "diff.external"))
return git_config_string(&external_diff_cmd_cfg, var, value);
if (!prefixcmp(var, "diff.")) {
const char *new = diff_get_color(color_diff, DIFF_FILE_NEW);
const char *reset = diff_get_color(color_diff, DIFF_RESET);
static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT;
+ const char *a_prefix, *b_prefix;
+
+ if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
+ a_prefix = o->b_prefix;
+ b_prefix = o->a_prefix;
+ } else {
+ a_prefix = o->a_prefix;
+ b_prefix = o->b_prefix;
+ }
name_a += (*name_a == '/');
name_b += (*name_b == '/');
strbuf_reset(&a_name);
strbuf_reset(&b_name);
- quote_two_c_style(&a_name, o->a_prefix, name_a, 0);
- quote_two_c_style(&b_name, o->b_prefix, name_b, 0);
+ quote_two_c_style(&a_name, a_prefix, name_a, 0);
+ quote_two_c_style(&b_name, b_prefix, name_b, 0);
diff_populate_filespec(one, 0);
diff_populate_filespec(two, 0);
/*
* Original minus copied is the removed material,
* added is the new material. They are both damages
- * made to the preimage.
+ * made to the preimage. In --dirstat-by-file mode, count
+ * damaged files, not damaged lines. This is done by
+ * counting only a single damaged line per file.
*/
damage = (p->one->size - copied) + added;
+ if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0)
+ damage = 1;
ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
dir.files[dir.nr].name = name;
return NULL;
}
+void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
+{
+ if (!options->a_prefix)
+ options->a_prefix = a;
+ if (!options->b_prefix)
+ options->b_prefix = b;
+}
+
static void builtin_diff(const char *name_a,
const char *name_b,
struct diff_filespec *one,
char *a_one, *b_two;
const char *set = diff_get_color_opt(o, DIFF_METAINFO);
const char *reset = diff_get_color_opt(o, DIFF_RESET);
+ const char *a_prefix, *b_prefix;
+
+ diff_set_mnemonic_prefix(o, "a/", "b/");
+ if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
+ a_prefix = o->b_prefix;
+ b_prefix = o->a_prefix;
+ } else {
+ a_prefix = o->a_prefix;
+ b_prefix = o->b_prefix;
+ }
- a_one = quote_two(o->a_prefix, name_a + (*name_a == '/'));
- b_two = quote_two(o->b_prefix, name_b + (*name_b == '/'));
+ a_one = quote_two(a_prefix, name_a + (*name_a == '/'));
+ b_two = quote_two(b_prefix, name_b + (*name_b == '/'));
lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
fprintf(o->file, "%sdiff --git %s %s%s\n", set, a_one, b_two, reset);
DIFF_OPT_CLR(options, COLOR_DIFF);
options->detect_rename = diff_detect_rename_default;
- options->a_prefix = "a/";
- options->b_prefix = "b/";
+ if (!diff_mnemonic_prefix) {
+ options->a_prefix = "a/";
+ options->b_prefix = "b/";
+ }
}
int diff_setup_done(struct diff_options *options)
DIFF_OPT_SET(options, EXIT_WITH_STATUS);
}
- /*
- * If we postprocess in diffcore, we cannot simply return
- * upon the first hit. We need to run diff as usual.
- */
- if (options->pickaxe || options->filter)
- DIFF_OPT_CLR(options, QUIET);
-
return 0;
}
else if (!strcmp(arg, "--cumulative")) {
options->output_format |= DIFF_FORMAT_DIRSTAT;
DIFF_OPT_SET(options, DIRSTAT_CUMULATIVE);
+ } else if (opt_arg(arg, 0, "dirstat-by-file",
+ &options->dirstat_percent)) {
+ options->output_format |= DIFF_FORMAT_DIRSTAT;
+ DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
}
else if (!strcmp(arg, "--check"))
options->output_format |= DIFF_FORMAT_CHECKDIFF;
void diffcore_std(struct diff_options *options)
{
- if (DIFF_OPT_TST(options, QUIET))
- return;
-
- if (options->skip_stat_unmatch && !DIFF_OPT_TST(options, FIND_COPIES_HARDER))
+ if (options->skip_stat_unmatch)
diffcore_skip_stat_unmatch(options);
if (options->break_opt != -1)
diffcore_break(options->break_opt);
#define DIFF_OPT_RELATIVE_NAME (1 << 17)
#define DIFF_OPT_IGNORE_SUBMODULES (1 << 18)
#define DIFF_OPT_DIRSTAT_CUMULATIVE (1 << 19)
+#define DIFF_OPT_DIRSTAT_BY_FILE (1 << 20)
#define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag)
#define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag)
#define DIFF_OPT_CLR(opts, flag) ((opts)->flags &= ~DIFF_OPT_##flag)
extern void diff_tree_combined_merge(const unsigned char *sha1, int, struct rev_info *);
+void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b);
+
extern void diff_addremove(struct diff_options *,
int addremove,
unsigned mode,
struct diff_filespec *);
extern void diff_q(struct diff_queue_struct *, struct diff_filepair *);
-extern void diffcore_pathspec(const char **pathspec);
extern void diffcore_break(int);
extern void diffcore_rename(struct diff_options *);
extern void diffcore_merge_broken(void);
int pack_fd;
snprintf(tmpfile, sizeof(tmpfile),
- "%s/tmp_pack_XXXXXX", get_object_directory());
+ "%s/pack/tmp_pack_XXXXXX", get_object_directory());
pack_fd = xmkstemp(tmpfile);
p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
strcpy(p->pack_name, tmpfile);
}
snprintf(tmpfile, sizeof(tmpfile),
- "%s/tmp_idx_XXXXXX", get_object_directory());
+ "%s/pack/tmp_idx_XXXXXX", get_object_directory());
idx_fd = xmkstemp(tmpfile);
f = sha1fd(idx_fd, tmpfile);
sha1write(f, array, 256 * sizeof(int));
test -n "$nolog" || echo "git bisect $state $rev" >>"$GIT_DIR/BISECT_LOG"
}
+is_expected_rev() {
+ test -f "$GIT_DIR/BISECT_EXPECTED_REV" &&
+ test "$1" = $(cat "$GIT_DIR/BISECT_EXPECTED_REV")
+}
+
+mark_expected_rev() {
+ echo "$1" > "$GIT_DIR/BISECT_EXPECTED_REV"
+}
+
+check_expected_revs() {
+ for _rev in "$@"; do
+ if ! is_expected_rev "$_rev"; then
+ rm -f "$GIT_DIR/BISECT_ANCESTORS_OK"
+ rm -f "$GIT_DIR/BISECT_EXPECTED_REV"
+ return
+ fi
+ done
+}
+
bisect_state() {
bisect_autostart
state=$1
1,bad|1,good|1,skip)
rev=$(git rev-parse --verify HEAD) ||
die "Bad rev input: HEAD"
- bisect_write "$state" "$rev" ;;
+ bisect_write "$state" "$rev"
+ check_expected_revs "$rev" ;;
2,bad|*,good|*,skip)
shift
eval=''
die "Bad rev input: $rev"
eval="$eval bisect_write '$state' '$sha'; "
done
- eval "$eval" ;;
+ eval "$eval"
+ check_expected_revs "$@" ;;
*,bad)
die "'git bisect bad' can take only one argument." ;;
*)
bisect_next_check && bisect_next || :
}
-eval_rev_list() {
- _eval="$1"
-
- eval $_eval
- res=$?
-
- if [ $res -ne 0 ]; then
- echo >&2 "'git rev-list --bisect-vars' failed:"
- echo >&2 "maybe you mistake good and bad revs?"
- exit $res
- fi
-
- return $res
-}
-
filter_skipped() {
_eval="$1"
_skip="$2"
if [ -z "$_skip" ]; then
- eval_rev_list "$_eval"
+ eval "$_eval"
return
fi
# Let's parse the output of:
# "git rev-list --bisect-vars --bisect-all ..."
- eval_rev_list "$_eval" | while read hash line
+ eval "$_eval" | while read hash line
do
case "$VARS,$FOUND,$TRIED,$hash" in
# We display some vars.
fi
}
+bisect_checkout() {
+ _rev="$1"
+ _msg="$2"
+ echo "Bisecting: $_msg"
+ mark_expected_rev "$_rev"
+ git checkout -q "$_rev" || exit
+ git show-branch "$_rev"
+}
+
+is_among() {
+ _rev="$1"
+ _list="$2"
+ case "$_list" in *$_rev*) return 0 ;; esac
+ return 1
+}
+
+handle_bad_merge_base() {
+ _badmb="$1"
+ _good="$2"
+ if is_expected_rev "$_badmb"; then
+ cat >&2 <<EOF
+The merge base $_badmb is bad.
+This means the bug has been fixed between $_badmb and [$_good].
+EOF
+ exit 3
+ else
+ cat >&2 <<EOF
+Some good revs are not ancestor of the bad rev.
+git bisect cannot work properly in this case.
+Maybe you mistake good and bad revs?
+EOF
+ exit 1
+ fi
+}
+
+handle_skipped_merge_base() {
+ _mb="$1"
+ _bad="$2"
+ _good="$3"
+ cat >&2 <<EOF
+Warning: the merge base between $_bad and [$_good] must be skipped.
+So we cannot be sure the first bad commit is between $_mb and $_bad.
+We continue anyway.
+EOF
+}
+
+#
+# "check_merge_bases" checks that merge bases are not "bad".
+#
+# - If one is "good", that's good, we have nothing to do.
+# - If one is "bad", it means the user assumed something wrong
+# and we must exit.
+# - If one is "skipped", we can't know but we should warn.
+# - If we don't know, we should check it out and ask the user to test.
+#
+# In the last case we will return 1, and otherwise 0.
+#
+check_merge_bases() {
+ _bad="$1"
+ _good="$2"
+ _skip="$3"
+ for _mb in $(git merge-base --all $_bad $_good)
+ do
+ if is_among "$_mb" "$_good"; then
+ continue
+ elif test "$_mb" = "$_bad"; then
+ handle_bad_merge_base "$_bad" "$_good"
+ elif is_among "$_mb" "$_skip"; then
+ handle_skipped_merge_base "$_mb" "$_bad" "$_good"
+ else
+ bisect_checkout "$_mb" "a merge base must be tested"
+ return 1
+ fi
+ done
+ return 0
+}
+
+#
+# "check_good_are_ancestors_of_bad" checks that all "good" revs are
+# ancestor of the "bad" rev.
+#
+# If that's not the case, we need to check the merge bases.
+# If a merge base must be tested by the user we return 1 and
+# otherwise 0.
+#
+check_good_are_ancestors_of_bad() {
+ test -f "$GIT_DIR/BISECT_ANCESTORS_OK" &&
+ return
+
+ _bad="$1"
+ _good=$(echo $2 | sed -e 's/\^//g')
+ _skip="$3"
+
+ # Bisecting with no good rev is ok
+ test -z "$_good" && return
+
+ _side=$(git rev-list $_good ^$_bad)
+ if test -n "$_side"; then
+ # Return if a checkout was done
+ check_merge_bases "$_bad" "$_good" "$_skip" || return
+ fi
+
+ : > "$GIT_DIR/BISECT_ANCESTORS_OK"
+
+ return 0
+}
+
bisect_next() {
case "$#" in 0) ;; *) usage ;; esac
bisect_autostart
bisect_next_check good
+ # Get bad, good and skipped revs
+ bad=$(git rev-parse --verify refs/bisect/bad) &&
+ good=$(git for-each-ref --format='^%(objectname)' \
+ "refs/bisect/good-*" | tr '\012' ' ') &&
skip=$(git for-each-ref --format='%(objectname)' \
- "refs/bisect/skip-*" | tr '\012' ' ') || exit
+ "refs/bisect/skip-*" | tr '\012' ' ') &&
+
+ # Maybe some merge bases must be tested first
+ check_good_are_ancestors_of_bad "$bad" "$good" "$skip"
+ # Return now if a checkout has already been done
+ test "$?" -eq "1" && return
+ # Get bisection information
BISECT_OPT=''
test -n "$skip" && BISECT_OPT='--bisect-all'
-
- bad=$(git rev-parse --verify refs/bisect/bad) &&
- good=$(git for-each-ref --format='^%(objectname)' \
- "refs/bisect/good-*" | tr '\012' ' ') &&
eval="git rev-list --bisect-vars $BISECT_OPT $good $bad --" &&
eval="$eval $(cat "$GIT_DIR/BISECT_NAMES")" &&
eval=$(filter_skipped "$eval" "$skip") &&
# commit is also a "skip" commit (see above).
exit_if_skipped_commits "$bisect_rev"
- echo "Bisecting: $bisect_nr revisions left to test after this"
- git checkout -q "$bisect_rev" || exit
- git show-branch "$bisect_rev"
+ bisect_checkout "$bisect_rev" "$bisect_nr revisions left to test after this"
}
bisect_visualize() {
do
git update-ref -d $ref $hash || exit
done
+ rm -f "$GIT_DIR/BISECT_EXPECTED_REV" &&
+ rm -f "$GIT_DIR/BISECT_ANCESTORS_OK" &&
rm -f "$GIT_DIR/BISECT_LOG" &&
rm -f "$GIT_DIR/BISECT_NAMES" &&
rm -f "$GIT_DIR/BISECT_RUN" &&
--- /dev/null
+* encoding=US-ASCII
+git-gui.sh encoding=UTF-8
+/po/*.po encoding=UTF-8
}
}
+proc gitattr {path attr default} {
+ if {[catch {set r [git check-attr $attr -- $path]}]} {
+ set r unspecified
+ } else {
+ set r [join [lrange [split $r :] 2 end] :]
+ regsub {^ } $r {} r
+ }
+ if {$r eq {unspecified}} {
+ return $default
+ }
+ return $r
+}
+
proc sq {value} {
regsub -all ' $value "'\\''" value
return "'$value'"
set default_config(user.name) {}
set default_config(user.email) {}
+set default_config(gui.encoding) [encoding system]
set default_config(gui.matchtrackingbranch) false
set default_config(gui.pruneduringfetch) false
set default_config(gui.trustmtime) false
}
citool {
enable_option singlecommit
+ enable_option retcode
disable_option multicommit
disable_option branch
disable_option transport
+
+ while {[llength $argv] > 0} {
+ set a [lindex $argv 0]
+ switch -- $a {
+ --amend {
+ enable_option initialamend
+ }
+ --nocommit {
+ enable_option nocommit
+ enable_option nocommitmsg
+ }
+ --commitmsg {
+ disable_option nocommitmsg
+ }
+ default {
+ break
+ }
+ }
+
+ set argv [lrange $argv 1 end]
+ }
}
}
set is_detached 0
set current_diff_path {}
set is_3way_diff 0
+set is_conflict_diff 0
set selected_commit_type new
+set nullid "0000000000000000000000000000000000000000"
+set nullid2 "0000000000000000000000000000000000000001"
+
######################################################################
##
## task management
return $empty_tree
}
+proc force_amend {} {
+ global selected_commit_type
+ global HEAD PARENT MERGE_HEAD commit_type
+
+ repository_state newType newHEAD newMERGE_HEAD
+ set HEAD $newHEAD
+ set PARENT $newHEAD
+ set MERGE_HEAD $newMERGE_HEAD
+ set commit_type $newType
+
+ set selected_commit_type amend
+ do_select_commit_type
+}
+
proc rescan {after {honor_trustmtime 1}} {
global HEAD PARENT MERGE_HEAD commit_type
global ui_index ui_workdir ui_comm
|| [string trim [$ui_comm get 0.0 end]] eq {})} {
if {[string match amend* $commit_type]} {
} elseif {[load_message GITGUI_MSG]} {
+ } elseif {[run_prepare_commit_msg_hook]} {
} elseif {[load_message MERGE_MSG]} {
} elseif {[load_message SQUASH_MSG]} {
}
return 0
}
+proc run_prepare_commit_msg_hook {} {
+ global pch_error
+
+ # prepare-commit-msg requires PREPARE_COMMIT_MSG exist. From git-gui
+ # it will be .git/MERGE_MSG (merge), .git/SQUASH_MSG (squash), or an
+ # empty file but existant file.
+
+ set fd_pcm [open [gitdir PREPARE_COMMIT_MSG] a]
+
+ if {[file isfile [gitdir MERGE_MSG]]} {
+ set pcm_source "merge"
+ set fd_mm [open [gitdir MERGE_MSG] r]
+ puts -nonewline $fd_pcm [read $fd_mm]
+ close $fd_mm
+ } elseif {[file isfile [gitdir SQUASH_MSG]]} {
+ set pcm_source "squash"
+ set fd_sm [open [gitdir SQUASH_MSG] r]
+ puts -nonewline $fd_pcm [read $fd_sm]
+ close $fd_sm
+ } else {
+ set pcm_source ""
+ }
+
+ close $fd_pcm
+
+ set fd_ph [githook_read prepare-commit-msg \
+ [gitdir PREPARE_COMMIT_MSG] $pcm_source]
+ if {$fd_ph eq {}} {
+ catch {file delete [gitdir PREPARE_COMMIT_MSG]}
+ return 0;
+ }
+
+ ui_status [mc "Calling prepare-commit-msg hook..."]
+ set pch_error {}
+
+ fconfigure $fd_ph -blocking 0 -translation binary -eofchar {}
+ fileevent $fd_ph readable \
+ [list prepare_commit_msg_hook_wait $fd_ph]
+
+ return 1;
+}
+
+proc prepare_commit_msg_hook_wait {fd_ph} {
+ global pch_error
+
+ append pch_error [read $fd_ph]
+ fconfigure $fd_ph -blocking 1
+ if {[eof $fd_ph]} {
+ if {[catch {close $fd_ph}]} {
+ ui_status [mc "Commit declined by prepare-commit-msg hook."]
+ hook_failed_popup prepare-commit-msg $pch_error
+ catch {file delete [gitdir PREPARE_COMMIT_MSG]}
+ exit 1
+ } else {
+ load_message PREPARE_COMMIT_MSG
+ }
+ set pch_error {}
+ catch {file delete [gitdir PREPARE_COMMIT_MSG]}
+ return
+ }
+ fconfigure $fd_ph -blocking 0
+ catch {file delete [gitdir PREPARE_COMMIT_MSG]}
+}
+
proc read_diff_index {fd after} {
global buf_rdi
}
set is_quitting 0
+set ret_code 1
+
+proc terminate_me {win} {
+ global ret_code
+ if {$win ne {.}} return
+ exit $ret_code
+}
-proc do_quit {} {
+proc do_quit {{rc {1}}} {
global ui_comm is_quitting repo_config commit_type
global GITGUI_BCK_exists GITGUI_BCK_i
global ui_comm_spell
+ global ret_code
if {$is_quitting} return
set is_quitting 1
}
}
+ set ret_code $rc
destroy .
}
$ui_index tag remove in_sel 0.0 end
$ui_workdir tag remove in_sel 0.0 end
- # Do not stage files with conflicts
+ # Determine the state of the file
if {[info exists file_states($path)]} {
set state [lindex $file_states($path) 0]
} else {
set state {__}
}
- if {[string first {U} $state] >= 0} {
- set col 1
- }
-
# Restage the file, or simply show the diff
if {$col == 0 && $y > 1} {
+ # Conflicts need special handling
+ if {[string first {U} $state] >= 0} {
+ merge_stage_workdir $path $w $lno
+ return
+ }
+
if {[string index $state 1] eq {O}} {
set mmask {}
} else {
# -- Commit Menu
#
+proc commit_btn_caption {} {
+ if {[is_enabled nocommit]} {
+ return [mc "Done"]
+ } else {
+ return [mc Commit@@verb]
+ }
+}
+
if {[is_enabled multicommit] || [is_enabled singlecommit]} {
menu .mbar.commit
- .mbar.commit add radiobutton \
- -label [mc "New Commit"] \
- -command do_select_commit_type \
- -variable selected_commit_type \
- -value new
- lappend disable_on_lock \
- [list .mbar.commit entryconf [.mbar.commit index last] -state]
+ if {![is_enabled nocommit]} {
+ .mbar.commit add radiobutton \
+ -label [mc "New Commit"] \
+ -command do_select_commit_type \
+ -variable selected_commit_type \
+ -value new
+ lappend disable_on_lock \
+ [list .mbar.commit entryconf [.mbar.commit index last] -state]
- .mbar.commit add radiobutton \
- -label [mc "Amend Last Commit"] \
- -command do_select_commit_type \
- -variable selected_commit_type \
- -value amend
- lappend disable_on_lock \
- [list .mbar.commit entryconf [.mbar.commit index last] -state]
+ .mbar.commit add radiobutton \
+ -label [mc "Amend Last Commit"] \
+ -command do_select_commit_type \
+ -variable selected_commit_type \
+ -value amend
+ lappend disable_on_lock \
+ [list .mbar.commit entryconf [.mbar.commit index last] -state]
- .mbar.commit add separator
+ .mbar.commit add separator
+ }
.mbar.commit add command -label [mc Rescan] \
-command ui_do_rescan \
.mbar.commit add separator
- .mbar.commit add command -label [mc "Sign Off"] \
- -command do_signoff \
- -accelerator $M1T-S
+ if {![is_enabled nocommit]} {
+ .mbar.commit add command -label [mc "Sign Off"] \
+ -command do_signoff \
+ -accelerator $M1T-S
+ }
- .mbar.commit add command -label [mc Commit@@verb] \
+ .mbar.commit add command -label [commit_btn_caption] \
-command do_commit \
-accelerator $M1T-Return
lappend disable_on_lock \
lappend disable_on_lock \
{.vpane.lower.commarea.buttons.incall conf -state}
-button .vpane.lower.commarea.buttons.signoff -text [mc "Sign Off"] \
- -command do_signoff
-pack .vpane.lower.commarea.buttons.signoff -side top -fill x
+if {![is_enabled nocommit]} {
+ button .vpane.lower.commarea.buttons.signoff -text [mc "Sign Off"] \
+ -command do_signoff
+ pack .vpane.lower.commarea.buttons.signoff -side top -fill x
+}
-button .vpane.lower.commarea.buttons.commit -text [mc Commit@@verb] \
+button .vpane.lower.commarea.buttons.commit -text [commit_btn_caption] \
-command do_commit
pack .vpane.lower.commarea.buttons.commit -side top -fill x
lappend disable_on_lock \
{.vpane.lower.commarea.buttons.commit conf -state}
-button .vpane.lower.commarea.buttons.push -text [mc Push] \
- -command do_push_anywhere
-pack .vpane.lower.commarea.buttons.push -side top -fill x
+if {![is_enabled nocommit]} {
+ button .vpane.lower.commarea.buttons.push -text [mc Push] \
+ -command do_push_anywhere
+ pack .vpane.lower.commarea.buttons.push -side top -fill x
+}
# -- Commit Message Buffer
#
frame .vpane.lower.commarea.buffer.header
set ui_comm .vpane.lower.commarea.buffer.t
set ui_coml .vpane.lower.commarea.buffer.header.l
-radiobutton .vpane.lower.commarea.buffer.header.new \
- -text [mc "New Commit"] \
- -command do_select_commit_type \
- -variable selected_commit_type \
- -value new
-lappend disable_on_lock \
- [list .vpane.lower.commarea.buffer.header.new conf -state]
-radiobutton .vpane.lower.commarea.buffer.header.amend \
- -text [mc "Amend Last Commit"] \
- -command do_select_commit_type \
- -variable selected_commit_type \
- -value amend
-lappend disable_on_lock \
- [list .vpane.lower.commarea.buffer.header.amend conf -state]
+
+if {![is_enabled nocommit]} {
+ radiobutton .vpane.lower.commarea.buffer.header.new \
+ -text [mc "New Commit"] \
+ -command do_select_commit_type \
+ -variable selected_commit_type \
+ -value new
+ lappend disable_on_lock \
+ [list .vpane.lower.commarea.buffer.header.new conf -state]
+ radiobutton .vpane.lower.commarea.buffer.header.amend \
+ -text [mc "Amend Last Commit"] \
+ -command do_select_commit_type \
+ -variable selected_commit_type \
+ -value amend
+ lappend disable_on_lock \
+ [list .vpane.lower.commarea.buffer.header.amend conf -state]
+}
+
label $ui_coml \
-anchor w \
-justify left
}
trace add variable commit_type write trace_commit_type
pack $ui_coml -side left -fill x
-pack .vpane.lower.commarea.buffer.header.amend -side right
-pack .vpane.lower.commarea.buffer.header.new -side right
+
+if {![is_enabled nocommit]} {
+ pack .vpane.lower.commarea.buffer.header.amend -side right
+ pack .vpane.lower.commarea.buffer.header.new -side right
+}
text $ui_comm -background white -foreground black \
-borderwidth 1 \
-command {incr_font_size font_diff 1}
lappend diff_actions [list $ctxm entryconf [$ctxm index last] -state]
$ctxm add separator
+ set emenu $ctxm.enc
+ menu $emenu
+ build_encoding_menu $emenu [list force_diff_encoding]
+ $ctxm add cascade \
+ -label [mc "Encoding"] \
+ -menu $emenu
+ lappend diff_actions [list $ctxm entryconf [$ctxm index last] -state]
+ $ctxm add separator
$ctxm add command -label [mc "Options..."] \
-command do_options
}
if {![winfo ismapped .]} {
wm deiconify .
}
-after 1 do_rescan
+after 1 {
+ if {[is_enabled initialamend]} {
+ force_amend
+ } else {
+ do_rescan
+ }
+
+ if {[is_enabled nocommitmsg]} {
+ $ui_comm configure -state disabled -background gray
+ }
+}
if {[is_enabled multicommit]} {
after 1000 hint_gc
}
+if {[is_enabled retcode]} {
+ bind . <Destroy> {+terminate_me %W}
+}
$w.ctxm add command \
-label [mc "Copy Commit"] \
-command [cb _copycommit]
+ $w.ctxm add separator
+ menu $w.ctxm.enc
+ build_encoding_menu $w.ctxm.enc [cb _setencoding]
+ $w.ctxm add cascade \
+ -label [mc "Encoding"] \
+ -menu $w.ctxm.enc
$w.ctxm add command \
-label [mc "Do Full Copy Detection"] \
-command [cb _fullcopyblame]
+ $w.ctxm add separator
$w.ctxm add command \
-label [mc "Show History Context"] \
-command [cb _gitkcommit]
} else {
set fd [git_read cat-file blob "$commit:$path"]
}
- fconfigure $fd -blocking 0 -translation lf -encoding binary
+ fconfigure $fd \
+ -blocking 0 \
+ -translation lf \
+ -encoding [get_path_encoding $path]
fileevent $fd readable [cb _read_file $fd $jump]
set current_fd $fd
}
}
lappend options -- $path
set fd [eval git_read --nice blame $options]
- fconfigure $fd -blocking 0 -translation lf -encoding binary
+ fconfigure $fd -blocking 0 -translation lf -encoding utf-8
fileevent $fd readable [cb _read_blame $fd $cur_w $cur_d]
set current_fd $fd
set blame_lines 0
_showcommit $this $cur_w $lno
}
+method _setencoding {enc} {
+ force_path_encoding $path $enc
+ _load $this [list \
+ $highlight_column \
+ $highlight_line \
+ [lindex [$w_file xview] 0] \
+ [lindex [$w_file yview] 0] \
+ ]
+}
+
method _load_commit {cur_w cur_d pos} {
upvar #0 $cur_d line_data
set lno [lindex [split [$cur_w index $pos] .] 0]
set enc [tcl_encoding $enc]
if {$enc ne {}} {
set msg [encoding convertfrom $enc $msg]
- set author_name [encoding convertfrom $enc $author_name]
- set committer_name [encoding convertfrom $enc $committer_name]
- set header($cmit,author) $author_name
- set header($cmit,committer) $committer_name
- set header($cmit,summary) \
- [encoding convertfrom $enc $header($cmit,summary)]
}
set msg [string trim $msg]
}
}
method _gitkcommit {} {
+ global nullid
+
set dat [_get_click_amov_info $this]
if {$dat ne {}} {
set cmit [lindex $dat 0]
+
+ # If the line belongs to the working copy, use HEAD instead
+ if {$cmit eq $nullid} {
+ if {[catch {set cmit [git rev-parse --verify HEAD]} err]} {
+ error_popup [strcat [mc "Cannot find HEAD commit:"] "\n\n$err"]
+ return;
+ }
+ }
+
set radius [get_config gui.blamehistoryctx]
set cmdline [list --select-commit=$cmit]
}
method _blameparent {} {
+ global nullid
+
set dat [_get_click_amov_info $this]
if {$dat ne {}} {
set cmit [lindex $dat 0]
set new_path [lindex $dat 1]
- if {[catch {set cparent [git rev-parse --verify "$cmit^"]}]} {
+ # Allow using Blame Parent on lines modified in the working copy
+ if {$cmit eq $nullid} {
+ set parent_ref "HEAD"
+ } else {
+ set parent_ref "$cmit^"
+ }
+ if {[catch {set cparent [git rev-parse --verify $parent_ref]} err]} {
error_popup [strcat [mc "Cannot find parent commit:"] "\n\n$err"]
return;
}
# Generate a diff between the commit and its parent,
# and use the hunks to update the line number.
# Request zero context to simplify calculations.
- if {[catch {set fd [eval git_read diff-tree \
- --unified=0 $cparent $cmit $new_path]} err]} {
+ if {$cmit eq $nullid} {
+ set diffcmd [list diff-index --unified=0 $cparent -- $new_path]
+ } else {
+ set diffcmd [list diff-tree --unified=0 $cparent $cmit -- $new_path]
+ }
+ if {[catch {set fd [eval git_read $diffcmd]} err]} {
$status stop [mc "Unable to display parent"]
error_popup [strcat [mc "Error loading diff:"] "\n\n$err"]
return
}
}
}
- if {!$files_ready && ![string match *merge $curType]} {
+ if {!$files_ready && ![string match *merge $curType] && ![is_enabled nocommit]} {
info_popup [mc "No changes to commit.
You must stage at least 1 file before you can commit.
return
}
+ if {[is_enabled nocommitmsg]} { do_quit 0 }
+
# -- A message is required.
#
set msg [string trim [$ui_comm get 1.0 end]]
puts $msg_wt $msg
close $msg_wt
+ if {[is_enabled nocommit]} { do_quit 0 }
+
# -- Run the pre-commit hook.
#
set fd_ph [githook_read pre-commit]
set ::GITGUI_BCK_exists 0
}
- if {[is_enabled singlecommit]} do_quit
+ if {[is_enabled singlecommit]} { do_quit 0 }
# -- Update in memory status
#
}
}
+proc force_diff_encoding {enc} {
+ global current_diff_path
+
+ if {$current_diff_path ne {}} {
+ force_path_encoding $current_diff_path $enc
+ reshow_diff
+ }
+}
+
proc handle_empty_diff {} {
global current_diff_path file_states file_lists
rescan ui_ready 0
}
-proc show_diff {path w {lno {}} {scroll_pos {}}} {
+proc show_diff {path w {lno {}} {scroll_pos {}} {callback {}}} {
global file_states file_lists
- global is_3way_diff diff_active repo_config
+ global is_3way_diff is_conflict_diff diff_active repo_config
global ui_diff ui_index ui_workdir
global current_diff_path current_diff_side current_diff_header
global current_diff_queue
set s $file_states($path)
set m [lindex $s 0]
+ set is_conflict_diff 0
set current_diff_path $path
set current_diff_side $w
set current_diff_queue {}
ui_status [mc "Loading diff of %s..." [escape_path $path]]
+ set cont_info [list $scroll_pos $callback]
+
if {[string first {U} $m] >= 0} {
- merge_load_stages $path [list show_unmerged_diff $scroll_pos]
+ merge_load_stages $path [list show_unmerged_diff $cont_info]
} elseif {$m eq {_O}} {
- show_other_diff $path $w $m $scroll_pos
+ show_other_diff $path $w $m $cont_info
} else {
- start_show_diff $scroll_pos
+ start_show_diff $cont_info
}
}
-proc show_unmerged_diff {scroll_pos} {
+proc show_unmerged_diff {cont_info} {
global current_diff_path current_diff_side
- global merge_stages ui_diff
+ global merge_stages ui_diff is_conflict_diff
global current_diff_queue
if {$merge_stages(2) eq {}} {
+ set is_conflict_diff 1
lappend current_diff_queue \
[list "LOCAL: deleted\nREMOTE:\n" d======= \
[list ":1:$current_diff_path" ":3:$current_diff_path"]]
} elseif {$merge_stages(3) eq {}} {
+ set is_conflict_diff 1
lappend current_diff_queue \
[list "REMOTE: deleted\nLOCAL:\n" d======= \
[list ":1:$current_diff_path" ":2:$current_diff_path"]]
} elseif {[lindex $merge_stages(1) 0] eq {120000}
|| [lindex $merge_stages(2) 0] eq {120000}
|| [lindex $merge_stages(3) 0] eq {120000}} {
+ set is_conflict_diff 1
lappend current_diff_queue \
[list "LOCAL:\n" d======= \
[list ":1:$current_diff_path" ":2:$current_diff_path"]]
[list "REMOTE:\n" d======= \
[list ":1:$current_diff_path" ":3:$current_diff_path"]]
} else {
- start_show_diff $scroll_pos
+ start_show_diff $cont_info
return
}
- advance_diff_queue $scroll_pos
+ advance_diff_queue $cont_info
}
-proc advance_diff_queue {scroll_pos} {
+proc advance_diff_queue {cont_info} {
global current_diff_queue ui_diff
set item [lindex $current_diff_queue 0]
$ui_diff insert end [lindex $item 0] [lindex $item 1]
$ui_diff conf -state disabled
- start_show_diff $scroll_pos [lindex $item 2]
+ start_show_diff $cont_info [lindex $item 2]
}
-proc show_other_diff {path w m scroll_pos} {
+proc show_other_diff {path w m cont_info} {
global file_states file_lists
global is_3way_diff diff_active repo_config
global ui_diff ui_index ui_workdir
}
file {
set fd [open $path r]
- fconfigure $fd -eofchar {}
+ fconfigure $fd \
+ -eofchar {} \
+ -encoding [get_path_encoding $path]
set content [read $fd $max_sz]
close $fd
set sz [file size $path]
$ui_diff conf -state disabled
set diff_active 0
unlock_index
+ set scroll_pos [lindex $cont_info 0]
if {$scroll_pos ne {}} {
update
$ui_diff yview moveto $scroll_pos
}
ui_ready
+ set callback [lindex $cont_info 1]
+ if {$callback ne {}} {
+ eval $callback
+ }
return
}
}
-proc start_show_diff {scroll_pos {add_opts {}}} {
+proc start_show_diff {cont_info {add_opts {}}} {
global file_states file_lists
global is_3way_diff diff_active repo_config
global ui_diff ui_index ui_workdir
set ::current_diff_inheader 1
fconfigure $fd \
-blocking 0 \
- -encoding binary \
- -translation binary
- fileevent $fd readable [list read_diff $fd $scroll_pos]
+ -encoding [get_path_encoding $path] \
+ -translation lf
+ fileevent $fd readable [list read_diff $fd $cont_info]
}
-proc read_diff {fd scroll_pos} {
+proc read_diff {fd cont_info} {
global ui_diff diff_active
- global is_3way_diff current_diff_header
+ global is_3way_diff is_conflict_diff current_diff_header
global current_diff_queue
$ui_diff conf -state normal
{--} {set tags d_--}
{++} {
if {[regexp {^\+\+([<>]{7} |={7})} $line _g op]} {
+ set is_conflict_diff 1
set line [string replace $line 0 1 { }]
set tags d$op
} else {
{-} {set tags d_-}
{+} {
if {[regexp {^\+([<>]{7} |={7})} $line _g op]} {
+ set is_conflict_diff 1
set line [string replace $line 0 0 { }]
set tags d$op
} else {
close $fd
if {$current_diff_queue ne {}} {
- advance_diff_queue $scroll_pos
+ advance_diff_queue $cont_info
return
}
set diff_active 0
unlock_index
+ set scroll_pos [lindex $cont_info 0]
if {$scroll_pos ne {}} {
update
$ui_diff yview moveto $scroll_pos
if {[$ui_diff index end] eq {2.0}} {
handle_empty_diff
}
+ set callback [lindex $cont_info 1]
+ if {$callback ne {}} {
+ eval $callback
+ }
}
}
}
if {[catch {
+ set enc [get_path_encoding $current_diff_path]
set p [eval git_write $apply_cmd]
- fconfigure $p -translation binary -encoding binary
+ fconfigure $p -translation binary -encoding $enc
puts -nonewline $p $current_diff_header
puts -nonewline $p [$ui_diff get $s_lno $e_lno]
close $p} err]} {
set patch "@@ -$hln,$n +$hln,[eval expr $n $sign 1] @@\n$patch"
if {[catch {
+ set enc [get_path_encoding $current_diff_path]
set p [eval git_write $apply_cmd]
- fconfigure $p -translation binary -encoding binary
+ fconfigure $p -translation binary -encoding $enc
puts -nonewline $p $current_diff_header
puts -nonewline $p $patch
close $p} err]} {
{ ISO-8859-16 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10 }
{ GBK CP936 MS936 windows-936 }
{ JIS_Encoding csJISEncoding }
- { Shift_JIS MS_Kanji csShiftJIS }
+ { Shift_JIS MS_Kanji csShiftJIS ShiftJIS Shift-JIS }
{ Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
EUC-JP }
{ Extended_UNIX_Code_Fixed_Width_for_Japanese csEUCFixWidJapanese }
{ Big5 csBig5 }
}
+set encoding_groups {
+ {"" ""
+ {"Unicode" UTF-8}
+ {"Western" ISO-8859-1}}
+ {we "West European"
+ {"Western" ISO-8859-15 CP-437 CP-850 MacRoman CP-1252 Windows-1252}
+ {"Celtic" ISO-8859-14}
+ {"Greek" ISO-8859-14 ISO-8859-7 CP-737 CP-869 MacGreek CP-1253 Windows-1253}
+ {"Icelandic" MacIceland MacIcelandic CP-861}
+ {"Nordic" ISO-8859-10 CP-865}
+ {"Portuguese" CP-860}
+ {"South European" ISO-8859-3}}
+ {ee "East European"
+ {"Baltic" CP-775 ISO-8859-4 ISO-8859-13 CP-1257 Windows-1257}
+ {"Central European" CP-852 ISO-8859-2 MacCE CP-1250 Windows-1250}
+ {"Croatian" MacCroatian}
+ {"Cyrillic" CP-855 ISO-8859-5 ISO-IR-111 KOI8-R MacCyrillic CP-1251 Windows-1251}
+ {"Russian" CP-866}
+ {"Ukrainian" KOI8-U MacUkraine MacUkrainian}
+ {"Romanian" ISO-8859-16 MacRomania MacRomanian}}
+ {ea "East Asian"
+ {"Generic" ISO-2022}
+ {"Chinese Simplified" GB2312 GB1988 GB12345 GB2312-RAW GBK EUC-CN GB18030 HZ ISO-2022-CN}
+ {"Chinese Traditional" Big5 Big5-HKSCS EUC-TW CP-950}
+ {"Japanese" EUC-JP ISO-2022-JP Shift-JIS JIS-0212 JIS-0208 JIS-0201 CP-932 MacJapan}
+ {"Korean" EUC-KR UHC JOHAB ISO-2022-KR CP-949 KSC5601}}
+ {sa "SE & SW Asian"
+ {"Armenian" ARMSCII-8}
+ {"Georgian" GEOSTD8}
+ {"Thai" TIS-620 ISO-8859-11 CP-874 Windows-874 MacThai}
+ {"Turkish" CP-857 CP857 ISO-8859-9 MacTurkish CP-1254 Windows-1254}
+ {"Vietnamese" TCVN VISCII VPS CP-1258 Windows-1258}
+ {"Hindi" MacDevanagari}
+ {"Gujarati" MacGujarati}
+ {"Gurmukhi" MacGurmukhi}}
+ {me "Middle Eastern"
+ {"Arabic" ISO-8859-6 Windows-1256 CP-1256 CP-864 MacArabic}
+ {"Farsi" MacFarsi}
+ {"Hebrew" ISO-8859-8-I Windows-1255 CP-1255 ISO-8859-8 CP-862 MacHebrew}}
+ {mi "Misc"
+ {"7-bit" ASCII}
+ {"16-bit" Unicode}
+ {"Legacy" CP-863 EBCDIC}
+ {"Symbol" Symbol Dingbats MacDingbats MacCentEuro}}
+}
+
+proc build_encoding_table {} {
+ global encoding_aliases encoding_lookup_table
+
+ # Prepare the lookup list; cannot use lsort -nocase because
+ # of compatibility issues with older Tcl (e.g. in msysgit)
+ set names [list]
+ foreach item [encoding names] {
+ lappend names [list [string tolower $item] $item]
+ }
+ set names [lsort -ascii -index 0 $names]
+ # neither can we use lsearch -index
+ set lnames [list]
+ foreach item $names {
+ lappend lnames [lindex $item 0]
+ }
+
+ foreach grp $encoding_aliases {
+ set target {}
+ foreach item $grp {
+ set i [lsearch -sorted -ascii $lnames \
+ [string tolower $item]]
+ if {$i >= 0} {
+ set target [lindex $names $i 1]
+ break
+ }
+ }
+ if {$target eq {}} continue
+ foreach item $grp {
+ set encoding_lookup_table([string tolower $item]) $target
+ }
+ }
+
+ foreach item $names {
+ set encoding_lookup_table([lindex $item 0]) [lindex $item 1]
+ }
+}
+
proc tcl_encoding {enc} {
- global encoding_aliases
- set names [encoding names]
- set lcnames [string tolower $names]
- set enc [string tolower $enc]
- set i [lsearch -exact $lcnames $enc]
- if {$i < 0} {
- # look for "isonnn" instead of "iso-nnn" or "iso_nnn"
- if {[regsub {^iso[-_]} $enc iso encx]} {
- set i [lsearch -exact $lcnames $encx]
+ global encoding_lookup_table
+ if {$enc eq {}} {
+ return {}
+ }
+ if {![info exists encoding_lookup_table]} {
+ build_encoding_table
+ }
+ set enc [string tolower $enc]
+ if {![info exists encoding_lookup_table($enc)]} {
+ # look for "isonnn" instead of "iso-nnn" or "iso_nnn"
+ if {[regsub {^(iso|cp|ibm|jis)[-_]} $enc {\1} encx]} {
+ set enc $encx
+ }
+ }
+ if {[info exists encoding_lookup_table($enc)]} {
+ return $encoding_lookup_table($enc)
+ } else {
+ return {}
+ }
+}
+
+proc force_path_encoding {path enc} {
+ global path_encoding_overrides last_encoding_override
+
+ set enc [tcl_encoding $enc]
+ if {$enc eq {}} {
+ catch { unset last_encoding_override }
+ catch { unset path_encoding_overrides($path) }
+ } else {
+ set last_encoding_override $enc
+ if {$path ne {}} {
+ set path_encoding_overrides($path) $enc
+ }
+ }
+}
+
+proc get_path_encoding {path} {
+ global path_encoding_overrides last_encoding_override
+
+ if {[info exists last_encoding_override]} {
+ set tcl_enc $last_encoding_override
+ } else {
+ set tcl_enc [tcl_encoding [get_config gui.encoding]]
}
- }
- if {$i < 0} {
- foreach l $encoding_aliases {
- set ll [string tolower $l]
- if {[lsearch -exact $ll $enc] < 0} continue
- # look through the aliases for one that tcl knows about
- foreach e $ll {
- set i [lsearch -exact $lcnames $e]
- if {$i < 0} {
- if {[regsub {^iso[-_]} $e iso ex]} {
- set i [lsearch -exact $lcnames $ex]
- }
+ if {$tcl_enc eq {}} {
+ set tcl_enc [encoding system]
+ }
+ if {$path ne {}} {
+ if {[info exists path_encoding_overrides($path)]} {
+ set enc2 $path_encoding_overrides($path)
+ } else {
+ set enc2 [tcl_encoding [gitattr $path encoding $tcl_enc]]
+ }
+ if {$enc2 ne {}} {
+ set tcl_enc $enc2
+ }
+ }
+ return $tcl_enc
+}
+
+proc build_encoding_submenu {parent grp cmd} {
+ global used_encodings
+
+ set mid [lindex $grp 0]
+ set gname [mc [lindex $grp 1]]
+
+ set smenu {}
+ foreach subset [lrange $grp 2 end] {
+ set name [mc [lindex $subset 0]]
+
+ foreach enc [lrange $subset 1 end] {
+ set tcl_enc [tcl_encoding $enc]
+ if {$tcl_enc eq {}} continue
+
+ if {$smenu eq {}} {
+ if {$mid eq {}} {
+ set smenu $parent
+ } else {
+ set smenu "$parent.$mid"
+ menu $smenu
+ $parent add cascade \
+ -label $gname \
+ -menu $smenu
+ }
+ }
+
+ if {$name ne {}} {
+ set lbl "$name ($enc)"
+ } else {
+ set lbl $enc
+ }
+ $smenu add command \
+ -label $lbl \
+ -command [concat $cmd [list $tcl_enc]]
+
+ lappend used_encodings $tcl_enc
+ }
+ }
+}
+
+proc popup_btn_menu {m b} {
+ tk_popup $m [winfo pointerx $b] [winfo pointery $b]
+}
+
+proc build_encoding_menu {emenu cmd {nodef 0}} {
+ $emenu configure -postcommand \
+ [list do_build_encoding_menu $emenu $cmd $nodef]
+}
+
+proc do_build_encoding_menu {emenu cmd {nodef 0}} {
+ global used_encodings encoding_groups
+
+ $emenu configure -postcommand {}
+
+ if {!$nodef} {
+ $emenu add command \
+ -label [mc "Default"] \
+ -command [concat $cmd [list {}]]
+ }
+ set sysenc [encoding system]
+ $emenu add command \
+ -label [mc "System (%s)" $sysenc] \
+ -command [concat $cmd [list $sysenc]]
+
+ # Main encoding tree
+ set used_encodings [list identity]
+ $emenu add separator
+ foreach grp $encoding_groups {
+ build_encoding_submenu $emenu $grp $cmd
+ }
+
+ # Add unclassified encodings
+ set unused_grp [list [mc Other]]
+ foreach enc [encoding names] {
+ if {[lsearch -exact $used_encodings $enc] < 0} {
+ lappend unused_grp $enc
}
- if {$i >= 0} break
- }
- break
}
- }
- if {$i >= 0} {
- return [lindex $names $i]
- }
- return {}
+ build_encoding_submenu $emenu [list other [mc Other] $unused_grp] $cmd
}
global current_diff_path
switch -- $stage {
- 1 { set target [mc "the base version"] }
- 2 { set target [mc "this branch"] }
- 3 { set target [mc "the other branch"] }
+ 1 { set targetquestion [mc "Force resolution to the base version?"] }
+ 2 { set targetquestion [mc "Force resolution to this branch?"] }
+ 3 { set targetquestion [mc "Force resolution to the other branch?"] }
}
- set op_question [mc "Force resolution to %s?
-Note that the diff shows only conflicting changes.
+ set op_question [strcat $targetquestion "\n" \
+[mc "Note that the diff shows only conflicting changes.
%s will be overwritten.
This operation can be undone only by restarting the merge." \
- $target [short_path $current_diff_path]]
+ [short_path $current_diff_path]]]
if {[ask_popup $op_question] eq {yes}} {
merge_load_stages $current_diff_path [list merge_force_stage $stage]
}
}
+proc merge_stage_workdir {path w lno} {
+ global current_diff_path diff_active
+
+ if {$diff_active} return
+
+ if {$path ne $current_diff_path} {
+ show_diff $path $w $lno {} [list do_merge_stage_workdir $path]
+ } else {
+ do_merge_stage_workdir $path
+ }
+}
+
+proc do_merge_stage_workdir {path} {
+ global current_diff_path is_conflict_diff
+
+ if {$path ne $current_diff_path} return;
+
+ if {$is_conflict_diff} {
+ if {[ask_popup [mc "File %s seems to have unresolved conflicts, still stage?" \
+ [short_path $path]]] ne {yes}} {
+ return
+ }
+ }
+
+ merge_add_resolution $path
+}
+
proc merge_add_resolution {path} {
global current_diff_path ui_workdir
# git-gui options editor
# Copyright (C) 2006, 2007 Shawn Pearce
+proc config_check_encodings {} {
+ global repo_config_new global_config_new
+
+ set enc $global_config_new(gui.encoding)
+ if {$enc eq {}} {
+ set global_config_new(gui.encoding) [encoding system]
+ } elseif {[tcl_encoding $enc] eq {}} {
+ error_popup [mc "Invalid global encoding '%s'" $enc]
+ return 0
+ }
+
+ set enc $repo_config_new(gui.encoding)
+ if {$enc eq {}} {
+ set repo_config_new(gui.encoding) [encoding system]
+ } elseif {[tcl_encoding $enc] eq {}} {
+ error_popup [mc "Invalid repo encoding '%s'" $enc]
+ return 0
+ }
+
+ return 1
+}
+
proc save_config {} {
global default_config font_descs
global repo_config global_config
{i-1..99 gui.diffcontext {mc "Number of Diff Context Lines"}}
{i-0..99 gui.commitmsgwidth {mc "Commit Message Text Width"}}
{t gui.newbranchtemplate {mc "New Branch Name Template"}}
+ {c gui.encoding {mc "Default File Contents Encoding"}}
} {
set type [lindex $option 0]
set name [lindex $option 1]
pack $w.$f.$optid.v -side right -anchor e -padx 5
pack $w.$f.$optid -side top -anchor w -fill x
}
+ c -
t {
frame $w.$f.$optid
label $w.$f.$optid.l -text "$text:"
pack $w.$f.$optid.v -side left -anchor w \
-fill x -expand 1 \
-padx 5
+ if {$type eq {c}} {
+ menu $w.$f.$optid.m
+ build_encoding_menu $w.$f.$optid.m \
+ [list set ${f}_config_new($name)] 1
+ button $w.$f.$optid.b \
+ -text [mc "Change"] \
+ -command [list popup_btn_menu \
+ $w.$f.$optid.m $w.$f.$optid.b]
+ pack $w.$f.$optid.b -side left -anchor w
+ }
pack $w.$f.$optid -side top -anchor w -fill x
}
}
}
proc do_save_config {w} {
+ if {![config_check_encodings]} return
if {[catch {save_config} err]} {
error_popup [strcat [mc "Failed to completely save options:"] "\n\n$err"]
}
msgstr ""
"Project-Id-Version: git-gui\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2008-08-02 08:58+0200\n"
-"PO-Revision-Date: 2008-08-02 09:09+0200\n"
+"POT-Creation-Date: 2008-09-13 10:20+0200\n"
+"PO-Revision-Date: 2008-09-13 10:24+0200\n"
"Last-Translator: Christian Stimming <stimming@tuhh.de>\n"
"Language-Team: German\n"
"MIME-Version: 1.0\n"
msgid "Staged for commit, missing"
msgstr "Bereitgestellt zum Eintragen, fehlend"
-#: git-gui.sh:1597
+#: git-gui.sh:1658
+msgid "File type changed, not staged"
+msgstr "Dateityp geändert, nicht bereitgestellt"
+
+#: git-gui.sh:1659
+msgid "File type changed, staged"
+msgstr "Dateityp geändert, bereitgestellt"
+
+#: git-gui.sh:1661
msgid "Untracked, not staged"
msgstr "Nicht unter Versionskontrolle, nicht bereitgestellt"
msgid "File:"
msgstr "Datei:"
-#: git-gui.sh:2589
-msgid "Apply/Reverse Hunk"
-msgstr "Kontext anwenden/umkehren"
-
-#: git-gui.sh:2696
-msgid "Apply/Reverse Line"
-msgstr "Zeile anwenden/umkehren"
-
-#: git-gui.sh:2711
+#: git-gui.sh:2834
msgid "Refresh"
msgstr "Aktualisieren"
msgid "Increase Font Size"
msgstr "Schriftgröße vergrößern"
-#: git-gui.sh:2646
+#: git-gui.sh:2870
+msgid "Apply/Reverse Hunk"
+msgstr "Kontext anwenden/umkehren"
+
+#: git-gui.sh:2875
+msgid "Apply/Reverse Line"
+msgstr "Zeile anwenden/umkehren"
+
+#: git-gui.sh:2885
+msgid "Run Merge Tool"
+msgstr "Zusammenführungswerkzeug"
+
+#: git-gui.sh:2890
+msgid "Use Remote Version"
+msgstr "Entfernte Version benutzen"
+
+#: git-gui.sh:2894
+msgid "Use Local Version"
+msgstr "Lokale Version benutzen"
+
+#: git-gui.sh:2898
+msgid "Revert To Base"
+msgstr "Ursprüngliche Version benutzen"
+
+#: git-gui.sh:2906
+msgid "Stage Working Copy"
+msgstr "Arbeitskopie bereitstellen"
+
+#: git-gui.sh:2925
msgid "Unstage Hunk From Commit"
msgstr "Kontext aus Bereitstellung herausnehmen"
msgid "Do Full Copy Detection"
msgstr "Volle Kopie-Erkennung"
-#: lib/blame.tcl:388
+#: lib/blame.tcl:263
+msgid "Show History Context"
+msgstr "Historien-Kontext anzeigen"
+
+#: lib/blame.tcl:266
+msgid "Blame Parent Commit"
+msgstr "Elternversion annotieren"
+
+#: lib/blame.tcl:394
#, tcl-format
msgid "Reading %s..."
msgstr "%s lesen..."
msgid "Original File:"
msgstr "Ursprüngliche Datei:"
-#: lib/blame.tcl:925
+#: lib/blame.tcl:990
+msgid "Cannot find parent commit:"
+msgstr "Elternversion kann nicht gefunden werden:"
+
+#: lib/blame.tcl:1001
+msgid "Unable to display parent"
+msgstr "Elternversion kann nicht angezeigt werden"
+
+#: lib/blame.tcl:1002 lib/diff.tcl:191
+msgid "Error loading diff:"
+msgstr "Fehler beim Laden des Vergleichs:"
+
+#: lib/blame.tcl:1142
msgid "Originally By:"
msgstr "Ursprünglich von:"
msgid "* Binary file (not showing content)."
msgstr "* Binärdatei (Inhalt wird nicht angezeigt)"
-#: lib/diff.tcl:185
-msgid "Error loading diff:"
-msgstr "Fehler beim Laden des Vergleichs:"
-
-#: lib/diff.tcl:303
+#: lib/diff.tcl:313
msgid "Failed to unstage selected hunk."
msgstr ""
"Fehler beim Herausnehmen des gewählten Kontexts aus der Bereitstellung."
msgid "Do Nothing"
msgstr "Nichts tun"
+#: lib/index.tcl:419
+msgid "Reverting selected files"
+msgstr "Änderungen in gewählten Dateien verwerfen"
+
+#: lib/index.tcl:423
+#, tcl-format
+msgid "Reverting %s"
+msgstr "Änderungen in %s verwerfen"
+
#: lib/merge.tcl:13
msgid ""
"Cannot merge while amending.\n"
msgid "Abort completed. Ready."
msgstr "Abbruch durchgeführt. Bereit."
+#: lib/mergetool.tcl:14
+msgid "Force resolution to the base version?"
+msgstr "Konflikt durch Basisversion ersetzen?"
+
+#: lib/mergetool.tcl:15
+msgid "Force resolution to this branch?"
+msgstr "Konflikt durch diesen Zweig ersetzen?"
+
+#: lib/mergetool.tcl:16
+msgid "Force resolution to the other branch?"
+msgstr "Konflikt durch anderen Zweig ersetzen?"
+
+#: lib/mergetool.tcl:20
+#, tcl-format
+msgid ""
+"Note that the diff shows only conflicting changes.\n"
+"\n"
+"%s will be overwritten.\n"
+"\n"
+"This operation can be undone only by restarting the merge."
+msgstr ""
+"Hinweis: Der Vergleich zeigt nur konfliktverursachende Änderungen an.\n"
+"\n"
+"»%s« wird überschrieben.\n"
+"\n"
+"Diese Operation kann nur rückgängig gemacht werden, wenn die\n"
+"Zusammenführung erneut gestartet wird."
+
+#: lib/mergetool.tcl:32
+#, tcl-format
+msgid "Adding resolution for %s"
+msgstr "Auflösung hinzugefügt für %s"
+
+#: lib/mergetool.tcl:119
+msgid "Cannot resolve deletion or link conflicts using a tool"
+msgstr ""
+"Konflikte durch gelöschte Dateien oder symbolische Links können nicht durch "
+"das Zusamenführungswerkzeug gelöst werden."
+
+#: lib/mergetool.tcl:124
+msgid "Conflict file does not exist"
+msgstr "Konflikt-Datei existiert nicht"
+
+#: lib/mergetool.tcl:236
+#, tcl-format
+msgid "Not a GUI merge tool: '%s'"
+msgstr "Kein GUI Zusammenführungswerkzeug: »%s«"
+
+#: lib/mergetool.tcl:240
+#, tcl-format
+msgid "Unsupported merge tool '%s'"
+msgstr "Unbekanntes Zusammenführungswerkzeug: »%s«"
+
+#: lib/mergetool.tcl:275
+msgid "Merge tool is already running, terminate it?"
+msgstr "Zusammenführungswerkzeug läuft bereits. Soll es abgebrochen werden?"
+
+#: lib/mergetool.tcl:295
+#, tcl-format
+msgid ""
+"Error retrieving versions:\n"
+"%s"
+msgstr ""
+"Fehler beim Abrufen der Dateiversionen:\n"
+"%s"
+
+#: lib/mergetool.tcl:315
+#, tcl-format
+msgid ""
+"Could not start the merge tool:\n"
+"\n"
+"%s"
+msgstr ""
+"Zusammenführungswerkzeug konnte nicht gestartet werden:\n"
+"\n"
+"%s"
+
+#: lib/mergetool.tcl:319
+msgid "Running merge tool..."
+msgstr "Zusammenführungswerkzeug starten..."
+
+#: lib/mergetool.tcl:347 lib/mergetool.tcl:363
+msgid "Merge tool failed."
+msgstr "Zusammenführungswerkzeug fehlgeschlagen."
+
+#: lib/mergetool.tcl:353
+#, tcl-format
+msgid "File %s unchanged, still accept as resolved?"
+msgstr "Datei »%s« unverändert. Trotzdem Konflikt als gelöst akzeptieren?"
+
#: lib/option.tcl:95
msgid "Restore Defaults"
msgstr "Voreinstellungen wiederherstellen"
msgid "Show Diffstat After Merge"
msgstr "Vergleichsstatistik nach Zusammenführen anzeigen"
-#: lib/option.tcl:123
+#: lib/option.tcl:122
+msgid "Use Merge Tool"
+msgstr "Zusammenführungswerkzeug"
+
+#: lib/option.tcl:124
msgid "Trust File Modification Timestamps"
msgstr "Auf Dateiänderungsdatum verlassen"
msgstr "Mindestzahl Zeichen für Kopie-Annotieren"
#: lib/option.tcl:128
+msgid "Blame History Context Radius (days)"
+msgstr "Anzahl Tage für Historien-Kontext"
+
+#: lib/option.tcl:129
msgid "Number of Diff Context Lines"
msgstr "Anzahl der Kontextzeilen beim Vergleich"
pick_one $sha1 ||
die_with_patch $sha1 "Could not apply $sha1... $rest"
make_patch $sha1
- : > "$DOTEST"/amend
+ git rev-parse --verify HEAD > "$DOTEST"/amend
warn "Stopped at $sha1... $rest"
warn "You can amend the commit now, with"
warn
else
. "$DOTEST"/author-script ||
die "Cannot find the author identity"
+ amend=
if test -f "$DOTEST"/amend
then
+ amend=$(git rev-parse --verify HEAD)
+ test "$amend" = $(cat "$DOTEST"/amend) ||
+ die "\
+You have uncommitted changes in your working tree. Please, commit them
+first and then run 'git rebase --continue' again."
git reset --soft HEAD^ ||
die "Cannot rewind the HEAD"
fi
export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_AUTHOR_DATE &&
- git commit --no-verify -F "$DOTEST"/message -e ||
- die "Could not commit staged changes."
+ git commit --no-verify -F "$DOTEST"/message -e || {
+ test -n "$amend" && git reset --soft $amend
+ die "Could not commit staged changes."
+ }
fi
require_clean_work_tree
a pack everything in a single pack
A same as -a, and turn unreachable objects loose
d remove redundant packs, and run git-prune-packed
-f pass --no-reuse-delta to git-pack-objects
+f pass --no-reuse-object to git-pack-objects
n do not run git-update-server-info
q,quiet be quiet
l pass --local to git-pack-objects
then
flags=--stat
fi
- s=$(git rev-parse --revs-only --no-flags --default $ref_stash "$@")
+
+ if test $# = 0
+ then
+ set x "$ref_stash@{0}"
+ shift
+ fi
+
+ s=$(git rev-parse --revs-only --no-flags "$@")
w_commit=$(git rev-parse --verify "$s") &&
b_commit=$(git rev-parse --verify "$s^") &&
shift
esac
+ if test $# = 0
+ then
+ set x "$ref_stash@{0}"
+ shift
+ fi
+
# current index state
c_tree=$(git write-tree) ||
die 'Cannot apply a stash in the middle of a merge'
# stash records the work tree, and is a merge between the
# base commit (first parent) and the index tree (second parent).
- s=$(git rev-parse --revs-only --no-flags --default $ref_stash "$@") &&
+ s=$(git rev-parse --revs-only --no-flags "$@") &&
w_tree=$(git rev-parse --verify "$s:") &&
b_tree=$(git rev-parse --verify "$s^1:") &&
i_tree=$(git rev-parse --verify "$s^2:") ||
do
name=$(module_name "$path")
url=$(git config -f .gitmodules --get submodule."$name".url)
+
+ # Possibly a url relative to parent
+ case "$url" in
+ ./*|../*)
+ url=$(resolve_relative_url "$url") || exit
+ ;;
+ esac
+
if test -e "$path"/.git
then
(
sub rebuild {
my ($self) = @_;
my $map_path = $self->map_path;
- return if (-e $map_path && ! -z $map_path);
+ my $partial = (-e $map_path && ! -z $map_path);
return unless ::verify_ref($self->refname.'^0');
- if ($self->use_svm_props || $self->no_metadata) {
+ if (!$partial && ($self->use_svm_props || $self->no_metadata)) {
my $rev_db = $self->rev_db_path;
$self->rebuild_from_rev_db($rev_db);
if ($self->use_svm_props) {
$self->unlink_rev_db_symlink;
return;
}
- print "Rebuilding $map_path ...\n";
+ print "Rebuilding $map_path ...\n" if (!$partial);
+ my ($base_rev, $head) = ($partial ? $self->rev_map_max_norebuild(1) :
+ (undef, undef));
my ($log, $ctx) =
command_output_pipe(qw/rev-list --pretty=raw --no-color --reverse/,
- $self->refname, '--');
+ ($head ? "$head.." : "") . $self->refname,
+ '--');
my $metadata_url = $self->metadata_url;
remove_username($metadata_url);
my $svn_uuid = $self->ra_uuid;
($metadata_url && $url && ($url ne $metadata_url))) {
next;
}
+ if ($partial && $head) {
+ print "Partial-rebuilding $map_path ...\n";
+ print "Currently at $base_rev = $head\n";
+ $head = undef;
+ }
$self->rev_map_set($rev, $c);
print "r$rev = $c\n";
}
command_close_pipe($log, $ctx);
- print "Done rebuilding $map_path\n";
+ print "Done rebuilding $map_path\n" if (!$partial || !$head);
my $rev_db_path = $self->rev_db_path;
if (-f $self->rev_db_path) {
unlink $self->rev_db_path or croak "unlink: $!";
sub rev_map_max {
my ($self, $want_commit) = @_;
$self->rebuild;
+ my ($r, $c) = $self->rev_map_max_norebuild($want_commit);
+ $want_commit ? ($r, $c) : $r;
+}
+
+sub rev_map_max_norebuild {
+ my ($self, $want_commit) = @_;
my $map_path = $self->map_path;
stat $map_path or return $want_commit ? (0, undef) : 0;
sysopen(my $fh, $map_path, O_RDONLY) or croak "open: $!";
my $out = syswrite($tmp_fh, $str, $res);
defined($out) && $out == $res
or croak("write ",
- $tmp_fh->filename,
+ Git::temp_path($tmp_fh),
": $!\n");
}
defined $res or croak $!;
}
$hash = $::_repository->hash_and_insert_object(
- $fh->filename);
+ Git::temp_path($fh));
$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
Git::temp_release($fb->{base}, 1);
my $old_url = $full_url;
$full_url .= '/' . escape_uri_only($path) if length $path;
my ($ra, $reparented);
- if ($old_url ne $full_url) {
- if ($old_url !~ m#^svn(\+ssh)?://#) {
- SVN::_Ra::svn_ra_reparent($self->{session}, $full_url,
- $pool);
- $self->{url} = $full_url;
- $reparented = 1;
- } else {
- $_[0] = undef;
- $self = undef;
- $RA = undef;
- $ra = Git::SVN::Ra->new($full_url);
- $ra_invalid = 1;
- }
+
+ if ($old_url =~ m#^svn(\+ssh)?://#) {
+ $_[0] = undef;
+ $self = undef;
+ $RA = undef;
+ $ra = Git::SVN::Ra->new($full_url);
+ $ra_invalid = 1;
+ } elsif ($old_url ne $full_url) {
+ SVN::_Ra::svn_ra_reparent($self->{session}, $full_url, $pool);
+ $self->{url} = $full_url;
+ $reparented = 1;
}
+
$ra ||= $self;
$url_b = escape_url($url_b);
my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool);
sub run_pager {
return unless -t *STDOUT && defined $pager;
- pipe my $rfd, my $wfd or return;
+ pipe my ($rfd, $wfd) or return;
defined(my $pid = fork) or ::fatal "Can't fork: $!";
if (!$pid) {
open STDOUT, '>&', $wfd or
valid_tool() {
case "$1" in
- firefox | iceweasel | konqueror | w3m | links | lynx | dillo | open)
+ firefox | iceweasel | konqueror | w3m | links | lynx | dillo | open | start)
;; # happy
*)
valid_custom_tool "$1" || return 1
if test -n "$SECURITYSESSIONID"; then
browser_candidates="open $browser_candidates"
fi
+ # /bin/start indicates MinGW
+ if test -n /bin/start; then
+ browser_candidates="start $browser_candidates"
+ fi
for i in $browser_candidates; do
init_browser_path $i
;;
esac
;;
- w3m|links|lynx|open)
+ w3m|links|lynx|open|start)
eval "$browser_path" "$@"
;;
dillo)
alias_string + 1, alias_command);
}
count = split_cmdline(alias_string, &new_argv);
+ if (count < 0)
+ die("Bad alias.%s string", alias_command);
option_count = handle_options(&new_argv, &count, &envchanged);
if (envchanged)
die("alias '%s' changes environment variables\n"
%files cvs
%defattr(-,root,root)
%doc Documentation/*git-cvs*.txt
+%{_bindir}/git-cvsserver
%{_libexecdir}/git-core/*cvs*
%{!?_without_docs: %{_mandir}/man1/*cvs*.1*}
%{!?_without_docs: %doc Documentation/*git-cvs*.html }
# No files for you!
%changelog
+* Fri Sep 12 2008 Quy Tonthat <qtonthat@gmail.com>
+- move git-cvsserver to bindir.
+
* Sun Jun 15 2008 Junio C Hamano <gitster@pobox.com>
- Remove curl from Requires list.
last;
}
}
- if ($co{'title'} eq "") {
+ if (! defined $co{'title'} || $co{'title'} eq "") {
$co{'title'} = $co{'title_short'} = '(no commit message)';
}
# remove added spaces
}
my $refs = git_get_references();
- my @commitlist = parse_commits($hash, 101, (100 * $page));
+ my $commit_hash = $hash;
+ if (defined $hash_parent) {
+ $commit_hash = "$hash_parent..$hash";
+ }
+ my @commitlist = parse_commits($commit_hash, 101, (100 * $page));
my $paging_nav = format_paging_nav('shortlog', $hash, $head, $page, $#commitlist >= 100);
my $next_link = '';
#include "diff.h"
#include "revision.h"
+/* Internal API */
+
+/*
+ * Output the next line for a graph.
+ * This formats the next graph line into the specified strbuf. It is not
+ * terminated with a newline.
+ *
+ * Returns 1 if the line includes the current commit, and 0 otherwise.
+ * graph_next_line() will return 1 exactly once for each time
+ * graph_update() is called.
+ */
+static int graph_next_line(struct git_graph *graph, struct strbuf *sb);
+
+/*
+ * Output a padding line in the graph.
+ * This is similar to graph_next_line(). However, it is guaranteed to
+ * never print the current commit line. Instead, if the commit line is
+ * next, it will simply output a line of vertical padding, extending the
+ * branch lines downwards, but leaving them otherwise unchanged.
+ */
+static void graph_padding_line(struct git_graph *graph, struct strbuf *sb);
+
+/*
+ * Print a strbuf to stdout. If the graph is non-NULL, all lines but the
+ * first will be prefixed with the graph output.
+ *
+ * If the strbuf ends with a newline, the output will end after this
+ * newline. A new graph line will not be printed after the final newline.
+ * If the strbuf is empty, no output will be printed.
+ *
+ * Since the first line will not include the graph ouput, the caller is
+ * responsible for printing this line's graph (perhaps via
+ * graph_show_commit() or graph_show_oneline()) before calling
+ * graph_show_strbuf().
+ */
+static void graph_show_strbuf(struct git_graph *graph, struct strbuf const *sb);
+
/*
* TODO:
* - Add colors to the graph.
return graph;
}
-void graph_release(struct git_graph *graph)
-{
- free(graph->columns);
- free(graph->new_columns);
- free(graph->mapping);
- free(graph);
-}
-
static void graph_update_state(struct git_graph *graph, enum graph_state s)
{
graph->prev_state = graph->state;
strbuf_addch(sb, '*');
}
-void graph_output_commit_line(struct git_graph *graph, struct strbuf *sb)
+static void graph_output_commit_line(struct git_graph *graph, struct strbuf *sb)
{
int seen_this = 0;
int i, j;
graph_update_state(graph, GRAPH_COLLAPSING);
}
-void graph_output_post_merge_line(struct git_graph *graph, struct strbuf *sb)
+static void graph_output_post_merge_line(struct git_graph *graph, struct strbuf *sb)
{
int seen_this = 0;
int i, j;
graph_update_state(graph, GRAPH_COLLAPSING);
}
-void graph_output_collapsing_line(struct git_graph *graph, struct strbuf *sb)
+static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf *sb)
{
int i;
int *tmp_mapping;
graph_update_state(graph, GRAPH_PADDING);
}
-int graph_next_line(struct git_graph *graph, struct strbuf *sb)
+static int graph_next_line(struct git_graph *graph, struct strbuf *sb)
{
switch (graph->state) {
case GRAPH_PADDING:
return 0;
}
-void graph_padding_line(struct git_graph *graph, struct strbuf *sb)
+static void graph_padding_line(struct git_graph *graph, struct strbuf *sb)
{
int i, j;
}
-void graph_show_strbuf(struct git_graph *graph, struct strbuf const *sb)
+static void graph_show_strbuf(struct git_graph *graph, struct strbuf const *sb)
{
char *p;
*/
struct git_graph *graph_init(struct rev_info *opt);
-/*
- * Destroy a struct git_graph and free associated memory.
- */
-void graph_release(struct git_graph *graph);
-
/*
* Update a git_graph with a new commit.
* This will cause the graph to begin outputting lines for the new commit
*/
void graph_update(struct git_graph *graph, struct commit *commit);
-/*
- * Output the next line for a graph.
- * This formats the next graph line into the specified strbuf. It is not
- * terminated with a newline.
- *
- * Returns 1 if the line includes the current commit, and 0 otherwise.
- * graph_next_line() will return 1 exactly once for each time
- * graph_update() is called.
- */
-int graph_next_line(struct git_graph *graph, struct strbuf *sb);
-
-/*
- * Output a padding line in the graph.
- * This is similar to graph_next_line(). However, it is guaranteed to
- * never print the current commit line. Instead, if the commit line is
- * next, it will simply output a line of vertical padding, extending the
- * branch lines downwards, but leaving them otherwise unchanged.
- */
-void graph_padding_line(struct git_graph *graph, struct strbuf *sb);
-
/*
* Determine if a graph has finished outputting lines for the current
* commit.
*/
int graph_show_remainder(struct git_graph *graph);
-/*
- * Print a strbuf to stdout. If the graph is non-NULL, all lines but the
- * first will be prefixed with the graph output.
- *
- * If the strbuf ends with a newline, the output will end after this
- * newline. A new graph line will not be printed after the final newline.
- * If the strbuf is empty, no output will be printed.
- *
- * Since the first line will not include the graph ouput, the caller is
- * responsible for printing this line's graph (perhaps via
- * graph_show_commit() or graph_show_oneline()) before calling
- * graph_show_strbuf().
- */
-void graph_show_strbuf(struct git_graph *graph, struct strbuf const *sb);
-
/*
* Print a commit message strbuf and the remainder of the graph to stdout.
*
{
CURL* result = curl_easy_init();
- curl_easy_setopt(result, CURLOPT_SSL_VERIFYPEER, curl_ssl_verify);
+ if (!curl_ssl_verify) {
+ curl_easy_setopt(result, CURLOPT_SSL_VERIFYPEER, 0);
+ curl_easy_setopt(result, CURLOPT_SSL_VERIFYHOST, 0);
+ } else {
+ /* Verify authenticity of the peer's certificate */
+ curl_easy_setopt(result, CURLOPT_SSL_VERIFYPEER, 1);
+ /* The name in the cert must match whom we tried to connect */
+ curl_easy_setopt(result, CURLOPT_SSL_VERIFYHOST, 2);
+ }
+
#if LIBCURL_VERSION_NUM >= 0x070907
curl_easy_setopt(result, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
#endif
if (!pack_name) {
static char tmpfile[PATH_MAX];
snprintf(tmpfile, sizeof(tmpfile),
- "%s/tmp_pack_XXXXXX", get_object_directory());
+ "%s/pack/tmp_pack_XXXXXX", get_object_directory());
output_fd = xmkstemp(tmpfile);
pack_name = xstrdup(tmpfile);
} else
#include "cache.h"
#include "diff.h"
#include "commit.h"
+#include "tag.h"
#include "graph.h"
#include "log-tree.h"
#include "reflog-walk.h"
+#include "refs.h"
struct decoration name_decoration = { "object names" };
+static void add_name_decoration(const char *prefix, const char *name, struct object *obj)
+{
+ int plen = strlen(prefix);
+ int nlen = strlen(name);
+ struct name_decoration *res = xmalloc(sizeof(struct name_decoration) + plen + nlen);
+ memcpy(res->name, prefix, plen);
+ memcpy(res->name + plen, name, nlen + 1);
+ res->next = add_decoration(&name_decoration, obj, res);
+}
+
+static int add_ref_decoration(const char *refname, const unsigned char *sha1, int flags, void *cb_data)
+{
+ struct object *obj = parse_object(sha1);
+ if (!obj)
+ return 0;
+ add_name_decoration("", refname, obj);
+ while (obj->type == OBJ_TAG) {
+ obj = ((struct tag *)obj)->tagged;
+ if (!obj)
+ break;
+ add_name_decoration("tag: ", refname, obj);
+ }
+ return 0;
+}
+
+void load_ref_decorations(void)
+{
+ static int loaded;
+ if (!loaded) {
+ loaded = 1;
+ for_each_ref(add_ref_decoration, NULL);
+ }
+}
+
static void show_parents(struct commit *commit, int abbrev)
{
struct commit_list *p;
const char **subject_p,
const char **extra_headers_p,
int *need_8bit_cte_p);
+void load_ref_decorations(void);
#endif
extern unsigned int get_max_object_index(void);
extern struct object *get_indexed_object(unsigned int);
-/** Internal only **/
+/*
+ * This can be used to see if we have heard of the object before, but
+ * it can return "yes we have, and here is a half-initialised object"
+ * for an object that we haven't loaded/parsed yet.
+ *
+ * When parsing a commit to create an in-core commit object, its
+ * parents list holds commit objects that represent its parents, but
+ * they are expected to be lazily initialized and do not know what
+ * their trees or parents are yet. When this function returns such a
+ * half-initialised objects, the caller is expected to initialize them
+ * by calling parse_object() on them.
+ */
struct object *lookup_object(const unsigned char *sha1);
extern void *create_object(const unsigned char *sha1, int type, void *obj);
if (!index_name) {
static char tmpfile[PATH_MAX];
snprintf(tmpfile, sizeof(tmpfile),
- "%s/tmp_idx_XXXXXX", get_object_directory());
+ "%s/pack/tmp_idx_XXXXXX", get_object_directory());
fd = xmkstemp(tmpfile);
index_name = xstrdup(tmpfile);
} else {
command_bidi_pipe command_close_bidi_pipe
version exec_path hash_object git_cmd_try
remote_refs
- temp_acquire temp_release temp_reset);
+ temp_acquire temp_release temp_reset temp_path);
=head1 DESCRIPTION
{ # %TEMP_* Lexical Context
-my (%TEMP_LOCKS, %TEMP_FILES);
+my (%TEMP_FILEMAP, %TEMP_FILES);
=item temp_acquire ( NAME )
my $temp_fd = _temp_cache($name);
- $TEMP_LOCKS{$temp_fd} = 1;
+ $TEMP_FILES{$temp_fd}{locked} = 1;
$temp_fd;
}
sub temp_release {
my ($self, $temp_fd, $trunc) = _maybe_self(@_);
- if (ref($temp_fd) ne 'File::Temp') {
+ if (exists $TEMP_FILEMAP{$temp_fd}) {
$temp_fd = $TEMP_FILES{$temp_fd};
}
- unless ($TEMP_LOCKS{$temp_fd}) {
+ unless ($TEMP_FILES{$temp_fd}{locked}) {
carp "Attempt to release temp file '",
$temp_fd, "' that has not been locked";
}
temp_reset($temp_fd) if $trunc and $temp_fd->opened;
- $TEMP_LOCKS{$temp_fd} = 0;
+ $TEMP_FILES{$temp_fd}{locked} = 0;
undef;
}
_verify_require();
- my $temp_fd = \$TEMP_FILES{$name};
+ my $temp_fd = \$TEMP_FILEMAP{$name};
if (defined $$temp_fd and $$temp_fd->opened) {
- if ($TEMP_LOCKS{$$temp_fd}) {
+ if ($TEMP_FILES{$$temp_fd}{locked}) {
throw Error::Simple("Temp file with moniker '",
$name, "' already in use");
}
carp "Temp file '", $name,
"' was closed. Opening replacement.";
}
- $$temp_fd = File::Temp->new(
- TEMPLATE => 'Git_XXXXXX',
- DIR => File::Spec->tmpdir
+ my $fname;
+ ($$temp_fd, $fname) = File::Temp->tempfile(
+ 'Git_XXXXXX', UNLINK => 1
) or throw Error::Simple("couldn't open new temp file");
$$temp_fd->autoflush;
binmode $$temp_fd;
+ $TEMP_FILES{$$temp_fd}{fname} = $fname;
}
$$temp_fd;
}
or throw Error::Simple("expected file position to be reset");
}
+=item temp_path ( NAME )
+
+=item temp_path ( FILEHANDLE )
+
+Returns the filename associated with the given tempfile.
+
+=cut
+
+sub temp_path {
+ my ($self, $temp_fd) = _maybe_self(@_);
+
+ if (exists $TEMP_FILEMAP{$temp_fd}) {
+ $temp_fd = $TEMP_FILEMAP{$temp_fd};
+ }
+ $TEMP_FILES{$temp_fd}{fname};
+}
+
sub END {
- unlink values %TEMP_FILES if %TEMP_FILES;
+ unlink values %TEMP_FILEMAP if %TEMP_FILEMAP;
}
} # %TEMP_* Lexical Context
#include "revision.h"
#include "string-list.h"
#include "mailmap.h"
+#include "log-tree.h"
static char *user_format;
context->commit_header_parsed = 1;
}
+static void format_decoration(struct strbuf *sb, const struct commit *commit)
+{
+ struct name_decoration *d;
+ const char *prefix = " (";
+
+ load_ref_decorations();
+ d = lookup_decoration(&name_decoration, &commit->object);
+ while (d) {
+ strbuf_addstr(sb, prefix);
+ prefix = ", ";
+ strbuf_addstr(sb, d->name);
+ d = d->next;
+ }
+ if (prefix[0] == ',')
+ strbuf_addch(sb, ')');
+}
+
static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
void *context)
{
? '<'
: '>');
return 1;
+ case 'd':
+ format_decoration(sb, commit);
+ return 1;
}
/* For the rest we have to parse the commit header. */
istate->cache_nr = 0;
istate->cache_changed = 0;
istate->timestamp = 0;
+ istate->name_hash_initialized = 0;
free_hash(&istate->name_hash);
cache_tree_free(&(istate->cache_tree));
free(istate->alloc);
* and dst pointers are always freeable pointers as well
* as the refspec pointer itself.
*/
-void free_refspecs(struct refspec *refspec, int nr_refspec)
+static void free_refspecs(struct refspec *refspec, int nr_refspec)
{
int i;
return parse_refspec_internal(nr_refspec, refspec, 1, 0);
}
-struct refspec *parse_push_refspec(int nr_refspec, const char **refspec)
+static struct refspec *parse_push_refspec(int nr_refspec, const char **refspec)
{
return parse_refspec_internal(nr_refspec, refspec, 0, 0);
}
return ret;
}
-void free_ref(struct ref *ref)
+static void free_ref(struct ref *ref)
{
if (!ref)
return;
int valid_fetch_refspec(const char *refspec);
struct refspec *parse_fetch_refspec(int nr_refspec, const char **refspec);
-struct refspec *parse_push_refspec(int nr_refspec, const char **refspec);
-void free_refspecs(struct refspec *refspec, int nr_refspec);
int match_refs(struct ref *src, struct ref *dst, struct ref ***dst_tail,
int nr_refspec, const char **refspec, int all);
return c;
}
- if (revs->reverse) {
- int limit = -1;
-
- if (0 <= revs->max_count) {
- limit = revs->max_count;
- if (0 < revs->skip_count)
- limit += revs->skip_count;
- }
- l = NULL;
- while ((c = get_revision_1(revs))) {
- commit_list_insert(c, &l);
- if ((0 < limit) && !--limit)
- break;
- }
- revs->commits = l;
- revs->reverse = 0;
- revs->max_count = -1;
- c = NULL;
- }
-
/*
* Now pick up what they want to give us
*/
struct commit *get_revision(struct rev_info *revs)
{
- struct commit *c = get_revision_internal(revs);
+ struct commit *c;
+ struct commit_list *reversed;
+
+ if (revs->reverse) {
+ reversed = NULL;
+ while ((c = get_revision_internal(revs))) {
+ commit_list_insert(c, &reversed);
+ }
+ revs->commits = reversed;
+ revs->reverse = 0;
+ revs->reverse_output_stage = 1;
+ }
+
+ if (revs->reverse_output_stage)
+ return pop_commit(&revs->commits);
+
+ c = get_revision_internal(revs);
if (c && revs->graph)
graph_update(revs->graph, c);
return c;
rewrite_parents:1,
print_parents:1,
reverse:1,
+ reverse_output_stage:1,
cherry_pick:1,
first_parent_only:1;
pos = strchr(pos, '/');
if (!pos)
break;
- *pos = 0;
+ while (*++pos == '/')
+ ;
+ if (!*pos)
+ break;
+ *--pos = '\0';
if (!stat(path, &st)) {
/* path exists */
if (!S_ISDIR(st.st_mode)) {
*/
int move_temp_to_file(const char *tmpfile, const char *filename)
{
- int ret = link(tmpfile, filename);
+ int ret = 0;
+ if (link(tmpfile, filename))
+ ret = errno;
/*
* Coda hack - coda doesn't like cross-directory links,
close $rd or die $!;
EOF
}
+
+require_svnserve () {
+ if test -z "$SVNSERVE_PORT"
+ then
+ say 'skipping svnserve test. (set $SVNSERVE_PORT to enable)'
+ test_done
+ exit
+ fi
+}
+
+start_svnserve () {
+ svnserve --listen-port $SVNSERVE_PORT \
+ --root "$rawsvnrepo" \
+ --listen-once \
+ --listen-host 127.0.0.1 &
+}
+
--- /dev/null
+#!/bin/sh
+
+test_description='respect crlf in git archive'
+
+. ./test-lib.sh
+UNZIP=${UNZIP:-unzip}
+
+test_expect_success setup '
+
+ git config core.autocrlf true
+
+ printf "CRLF line ending\r\nAnd another\r\n" > sample &&
+ git add sample &&
+
+ test_tick &&
+ git commit -m Initial
+
+'
+
+test_expect_success 'tar archive' '
+
+ git archive --format=tar HEAD |
+ ( mkdir untarred && cd untarred && "$TAR" -xf - )
+
+ test_cmp sample untarred/sample
+
+'
+
+"$UNZIP" -v >/dev/null 2>&1
+if [ $? -eq 127 ]; then
+ echo "Skipping ZIP test, because unzip was not found"
+ test_done
+ exit
+fi
+
+test_expect_success 'zip archive' '
+
+ git archive --format=zip HEAD >test.zip &&
+
+ ( mkdir unzipped && cd unzipped && unzip ../test.zip ) &&
+
+ test_cmp sample unzipped/sample
+
+'
+
+test_done
'
+test_expect_success 'check split_cmdline return' "
+ git config alias.split-cmdline-fix 'echo \"' &&
+ test_must_fail git split-cmdline-fix &&
+ echo foo > foo &&
+ git add foo &&
+ git commit -m 'initial commit' &&
+ git config branch.master.mergeoptions 'echo \"' &&
+ test_must_fail git merge master
+ "
+
test_done
test_must_fail git diff --no-index Beer.java Beer-correct.java
'
+test_expect_success 'alternation in pattern' '
+ git config diff.java.funcname "^[ ]*\\(\\(public\\|static\\).*\\)$"
+ git diff --no-index Beer.java Beer-correct.java |
+ grep "^@@.*@@ public static void main("
+'
+
test_done
test_expect_success \
'make sure index-pack detects the SHA1 collision' \
- 'test_must_fail git index-pack -o bad.idx test-3.pack'
+ 'test_must_fail git index-pack -o bad.idx test-3.pack 2>msg &&
+ grep "SHA1 COLLISION FOUND" msg'
test_expect_success \
'honor pack.packSizeLimit' \
'
+test_expect_success 'auto tag following fetches minimum' '
+
+ cd "$D" &&
+ git clone .git follow &&
+ git checkout HEAD^0 &&
+ (
+ for i in 1 2 3 4 5 6 7
+ do
+ echo $i >>file &&
+ git commit -m $i -a &&
+ git tag -a -m $i excess-$i || exit 1
+ done
+ ) &&
+ git checkout master &&
+ (
+ cd follow &&
+ git fetch
+ )
+'
+
test_done
--- /dev/null
+#!/bin/sh
+
+test_description='--reverse combines with --parents'
+
+. ./test-lib.sh
+
+
+commit () {
+ test_tick &&
+ echo $1 > foo &&
+ git add foo &&
+ git commit -m "$1"
+}
+
+test_expect_success 'set up --reverse example' '
+ commit one &&
+ git tag root &&
+ commit two &&
+ git checkout -b side HEAD^ &&
+ commit three &&
+ git checkout master &&
+ git merge -s ours side &&
+ commit five
+ '
+
+test_expect_success '--reverse --parents --full-history combines correctly' '
+ git rev-list --parents --full-history master -- foo |
+ perl -e "print reverse <>" > expected &&
+ git rev-list --reverse --parents --full-history master -- foo \
+ > actual &&
+ test_cmp actual expected
+ '
+
+test_expect_success '--boundary does too' '
+ git rev-list --boundary --parents --full-history master ^root -- foo |
+ perl -e "print reverse <>" > expected &&
+ git rev-list --boundary --reverse --parents --full-history \
+ master ^root -- foo > actual &&
+ test_cmp actual expected
+ '
+
+test_done
git branch -D bisect
'
+# This creates a "side" branch to test "siblings" cases.
+#
+# H1-H2-H3-H4-H5-H6-H7 <--other
+# \
+# S5-S6-S7 <--side
+#
+test_expect_success 'side branch creation' '
+ git bisect reset &&
+ git checkout -b side $HASH4 &&
+ add_line_into_file "5(side): first line on a side branch" hello2 &&
+ SIDE_HASH5=$(git rev-parse --verify HEAD) &&
+ add_line_into_file "6(side): second line on a side branch" hello2 &&
+ SIDE_HASH6=$(git rev-parse --verify HEAD) &&
+ add_line_into_file "7(side): third line on a side branch" hello2 &&
+ SIDE_HASH7=$(git rev-parse --verify HEAD)
+'
+
+test_expect_success 'good merge base when good and bad are siblings' '
+ git bisect start "$HASH7" "$SIDE_HASH7" > my_bisect_log.txt &&
+ grep "merge base must be tested" my_bisect_log.txt &&
+ grep $HASH4 my_bisect_log.txt &&
+ git bisect good > my_bisect_log.txt &&
+ test_must_fail grep "merge base must be tested" my_bisect_log.txt &&
+ grep $HASH6 my_bisect_log.txt &&
+ git bisect reset
+'
+test_expect_success 'skipped merge base when good and bad are siblings' '
+ git bisect start "$SIDE_HASH7" "$HASH7" > my_bisect_log.txt &&
+ grep "merge base must be tested" my_bisect_log.txt &&
+ grep $HASH4 my_bisect_log.txt &&
+ git bisect skip > my_bisect_log.txt 2>&1 &&
+ grep "Warning" my_bisect_log.txt &&
+ grep $SIDE_HASH6 my_bisect_log.txt &&
+ git bisect reset
+'
+
+test_expect_success 'bad merge base when good and bad are siblings' '
+ git bisect start "$HASH7" HEAD > my_bisect_log.txt &&
+ grep "merge base must be tested" my_bisect_log.txt &&
+ grep $HASH4 my_bisect_log.txt &&
+ test_must_fail git bisect bad > my_bisect_log.txt 2>&1 &&
+ grep "merge base $HASH4 is bad" my_bisect_log.txt &&
+ grep "fixed between $HASH4 and \[$SIDE_HASH7\]" my_bisect_log.txt &&
+ git bisect reset
+'
+
+# This creates a few more commits (A and B) to test "siblings" cases
+# when a good and a bad rev have many merge bases.
+#
+# We should have the following:
+#
+# H1-H2-H3-H4-H5-H6-H7
+# \ \ \
+# S5-A \
+# \ \
+# S6-S7----B
+#
+# And there A and B have 2 merge bases (S5 and H5) that should be
+# reported by "git merge-base --all A B".
+#
+test_expect_success 'many merge bases creation' '
+ git checkout "$SIDE_HASH5" &&
+ git merge -m "merge HASH5 and SIDE_HASH5" "$HASH5" &&
+ A_HASH=$(git rev-parse --verify HEAD) &&
+ git checkout side &&
+ git merge -m "merge HASH7 and SIDE_HASH7" "$HASH7" &&
+ B_HASH=$(git rev-parse --verify HEAD) &&
+ git merge-base --all "$A_HASH" "$B_HASH" > merge_bases.txt &&
+ test $(wc -l < merge_bases.txt) = "2" &&
+ grep "$HASH5" merge_bases.txt &&
+ grep "$SIDE_HASH5" merge_bases.txt
+'
+
+test_expect_success 'good merge bases when good and bad are siblings' '
+ git bisect start "$B_HASH" "$A_HASH" > my_bisect_log.txt &&
+ grep "merge base must be tested" my_bisect_log.txt &&
+ git bisect good > my_bisect_log2.txt &&
+ grep "merge base must be tested" my_bisect_log2.txt &&
+ {
+ {
+ grep "$SIDE_HASH5" my_bisect_log.txt &&
+ grep "$HASH5" my_bisect_log2.txt
+ } || {
+ grep "$SIDE_HASH5" my_bisect_log2.txt &&
+ grep "$HASH5" my_bisect_log.txt
+ }
+ } &&
+ git bisect reset
+'
+
+check_trace() {
+ grep "$1" "$GIT_TRACE" | grep "\^$2" | grep "$3" >/dev/null
+}
+
+test_expect_success 'optimized merge base checks' '
+ GIT_TRACE="$(pwd)/trace.log" &&
+ export GIT_TRACE &&
+ git bisect start "$HASH7" "$SIDE_HASH7" > my_bisect_log.txt &&
+ grep "merge base must be tested" my_bisect_log.txt &&
+ grep "$HASH4" my_bisect_log.txt &&
+ check_trace "rev-list" "$HASH7" "$SIDE_HASH7" &&
+ git bisect good > my_bisect_log2.txt &&
+ test -f ".git/BISECT_ANCESTORS_OK" &&
+ test "$HASH6" = $(git rev-parse --verify HEAD) &&
+ : > "$GIT_TRACE" &&
+ git bisect bad > my_bisect_log3.txt &&
+ test_must_fail check_trace "rev-list" "$HASH6" "$SIDE_HASH7" &&
+ git bisect good "$A_HASH" > my_bisect_log4.txt &&
+ grep "merge base must be tested" my_bisect_log4.txt &&
+ test_must_fail test -f ".git/BISECT_ANCESTORS_OK" &&
+ check_trace "rev-list" "$HASH6" "$A_HASH" &&
+ unset GIT_TRACE
+'
+
#
#
test_done
"
done
+cat >expected <<\EOF
+master
+testtag
+EOF
+
+test_expect_success 'Check short refname format' '
+ (git for-each-ref --format="%(refname:short)" refs/heads &&
+ git for-each-ref --format="%(refname:short)" refs/tags) >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'Check for invalid refname format' '
+ test_must_fail git for-each-ref --format="%(refname:INVALID)"
+'
+
+cat >expected <<\EOF
+heads/master
+master
+EOF
+
+test_expect_success 'Check ambiguous head and tag refs' '
+ git checkout -b newtag &&
+ echo "Using $datestamp" > one &&
+ git add one &&
+ git commit -m "Branch" &&
+ setdate_and_increment &&
+ git tag -m "Tagging at $datestamp" master &&
+ git for-each-ref --format "%(refname:short)" refs/heads/master refs/tags/master >actual &&
+ test_cmp expected actual
+'
+
+cat >expected <<\EOF
+heads/ambiguous
+ambiguous
+EOF
+
+test_expect_success 'Check ambiguous head and tag refs II' '
+ git checkout master &&
+ git tag ambiguous testtag^0 &&
+ git branch ambiguous testtag^0 &&
+ git for-each-ref --format "%(refname:short)" refs/heads/ambiguous refs/tags/ambiguous >actual &&
+ test_cmp expected actual
+'
+
test_expect_success 'an unusual tag with an incomplete line' '
git tag -m "bogo" bogo &&
test_cmp sample file
'
+test_expect_success 'failing checkout -b should not break working tree' '
+ git reset --hard master &&
+ git symbolic-ref HEAD refs/heads/master &&
+ test_must_fail git checkout -b renamer side^ &&
+ test $(git symbolic-ref HEAD) = refs/heads/master &&
+ git diff --exit-code &&
+ git diff --cached --exit-code
+
+'
+
test_done
test_expect_success \
'validate git rev-list output.' \
- 'diff current expected'
+ 'test_cmp expected current'
test_expect_success 'partial commit that involves removal (1)' '
git commit -m "Partial: add elif" elif &&
git diff-tree --name-status HEAD^ HEAD >current &&
echo "A elif" >expected &&
- diff expected current
+ test_cmp expected current
'
git commit -m "Partial: remove file" file &&
git diff-tree --name-status HEAD^ HEAD >current &&
echo "D file" >expected &&
- diff expected current
+ test_cmp expected current
'
git commit -m "Partial: modify elif" elif &&
git diff-tree --name-status HEAD^ HEAD >current &&
echo "M elif" >expected &&
- diff expected current
+ test_cmp expected current
'
expected &&
git commit --amend --author="$author" &&
git cat-file -p HEAD > current &&
- diff expected current
+ test_cmp expected current
'
expected &&
git commit --amend --author="$author" &&
git cat-file -p HEAD > current &&
- diff expected current
+ test_cmp expected current
'
test -f c5.c
'
+test_expect_success 'setup' '
+ for i in A B C D E
+ do
+ echo $i > $i.c &&
+ git add $i.c &&
+ git commit -m $i &&
+ git tag $i
+ done &&
+ git reset --hard A &&
+ for i in F G H I
+ do
+ echo $i > $i.c &&
+ git add $i.c &&
+ git commit -m $i &&
+ git tag $i
+ done
+'
+
+test_expect_success 'merge E and I' '
+ git reset --hard A &&
+ git merge E I
+'
+
+test_expect_success 'verify merge result' '
+ test $(git rev-parse HEAD^1) = $(git rev-parse E) &&
+ test $(git rev-parse HEAD^2) = $(git rev-parse I)
+'
+
+test_expect_success 'add conflicts' '
+ git reset --hard E &&
+ echo foo > file.c &&
+ git add file.c &&
+ git commit -m E2 &&
+ git tag E2 &&
+ git reset --hard I &&
+ echo bar >file.c &&
+ git add file.c &&
+ git commit -m I2 &&
+ git tag I2
+'
+
+test_expect_success 'merge E2 and I2, causing a conflict and resolve it' '
+ git reset --hard A &&
+ test_must_fail git merge E2 I2 &&
+ echo baz > file.c &&
+ git add file.c &&
+ git commit -m "resolve conflict"
+'
+
+test_expect_success 'verify merge result' '
+ test $(git rev-parse HEAD^1) = $(git rev-parse E2) &&
+ test $(git rev-parse HEAD^2) = $(git rev-parse I2)
+'
test_done
. ./lib-git-svn.sh
-if test -z "$SVNSERVE_PORT"
-then
- say 'skipping svnserve test. (set $SVNSERVE_PORT to enable)'
- test_done
- exit
-fi
-
-start_svnserve () {
- svnserve --listen-port $SVNSERVE_PORT \
- --root "$rawsvnrepo" \
- --listen-once \
- --listen-host 127.0.0.1 &
-}
+require_svnserve
test_expect_success 'start tracking an empty repo' '
svn mkdir -m "empty dir" "$svnrepo"/empty-dir &&
--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2008 Alec Berryman
+
+test_description='git svn fetch repository with deleted and readded directory'
+
+. ./lib-git-svn.sh
+
+# Don't run this by default; it opens up a port.
+require_svnserve
+
+test_expect_success 'load repository' '
+ svnadmin load -q "$rawsvnrepo" < "$TEST_DIRECTORY"/t9126/follow-deleted-readded.dump
+ '
+
+test_expect_success 'fetch repository' '
+ start_svnserve &&
+ git svn init svn://127.0.0.1:$SVNSERVE_PORT &&
+ git svn fetch
+ '
+
+test_done
--- /dev/null
+SVN-fs-dump-format-version: 2
+
+UUID: 1807dc6f-c693-4cda-9710-00e1be8c1f21
+
+Revision-number: 0
+Prop-content-length: 56
+Content-length: 56
+
+K 8
+svn:date
+V 27
+2008-09-14T19:53:13.006748Z
+PROPS-END
+
+Revision-number: 1
+Prop-content-length: 111
+Content-length: 111
+
+K 7
+svn:log
+V 12
+Create trunk
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:13.239689Z
+PROPS-END
+
+Node-path: trunk
+Node-kind: dir
+Node-action: add
+Prop-content-length: 10
+Content-length: 10
+
+PROPS-END
+
+
+Revision-number: 2
+Prop-content-length: 119
+Content-length: 119
+
+K 7
+svn:log
+V 20
+Create trunk/project
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:13.548860Z
+PROPS-END
+
+Node-path: trunk/project
+Node-kind: dir
+Node-action: add
+Prop-content-length: 10
+Content-length: 10
+
+PROPS-END
+
+
+Revision-number: 3
+Prop-content-length: 111
+Content-length: 111
+
+K 7
+svn:log
+V 12
+add new file
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:15.433630Z
+PROPS-END
+
+Node-path: trunk/project/foo
+Node-kind: file
+Node-action: add
+Prop-content-length: 10
+Text-content-length: 4
+Text-content-md5: d3b07384d113edec49eaa6238ad5ff00
+Content-length: 14
+
+PROPS-END
+foo
+
+
+Revision-number: 4
+Prop-content-length: 116
+Content-length: 116
+
+K 7
+svn:log
+V 17
+change foo to bar
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:17.339884Z
+PROPS-END
+
+Node-path: trunk/project/foo
+Node-kind: file
+Node-action: change
+Text-content-length: 4
+Text-content-md5: c157a79031e1c40f85931829bc5fc552
+Content-length: 4
+
+bar
+
+
+Revision-number: 5
+Prop-content-length: 114
+Content-length: 114
+
+K 7
+svn:log
+V 15
+don't like that
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:19.335001Z
+PROPS-END
+
+Node-path: trunk/project
+Node-action: delete
+
+
+Revision-number: 6
+Prop-content-length: 110
+Content-length: 110
+
+K 7
+svn:log
+V 11
+reset trunk
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:19.845897Z
+PROPS-END
+
+Node-path: trunk/project
+Node-kind: dir
+Node-action: add
+Node-copyfrom-rev: 4
+Node-copyfrom-path: trunk/project
+
+
+Revision-number: 7
+Prop-content-length: 113
+Content-length: 113
+
+K 7
+svn:log
+V 14
+change to quux
+K 10
+svn:author
+V 4
+alec
+K 8
+svn:date
+V 27
+2008-09-14T19:53:21.367947Z
+PROPS-END
+
+Node-path: trunk/project/foo
+Node-kind: file
+Node-action: change
+Text-content-length: 5
+Text-content-md5: d3b07a382ec010c01889250fce66fb13
+Content-length: 5
+
+quux
+
+
--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2008 Deskin Miller
+#
+
+test_description='git svn partial-rebuild tests'
+. ./lib-git-svn.sh
+
+test_expect_success 'initialize svnrepo' '
+ mkdir import &&
+ (
+ cd import &&
+ mkdir trunk branches tags &&
+ cd trunk &&
+ echo foo > foo &&
+ cd .. &&
+ svn import -m "import for git-svn" . "$svnrepo" >/dev/null &&
+ svn copy "$svnrepo"/trunk "$svnrepo"/branches/a \
+ -m "created branch a" &&
+ cd .. &&
+ rm -rf import &&
+ svn co "$svnrepo"/trunk trunk &&
+ cd trunk &&
+ echo bar >> foo &&
+ svn ci -m "updated trunk" &&
+ cd .. &&
+ svn co "$svnrepo"/branches/a a &&
+ cd a &&
+ echo baz >> a &&
+ svn add a &&
+ svn ci -m "updated a" &&
+ cd .. &&
+ git svn init --stdlayout "$svnrepo"
+ )
+'
+
+test_expect_success 'import an early SVN revision into git' '
+ git svn fetch -r1:2
+'
+
+test_expect_success 'make full git mirror of SVN' '
+ mkdir mirror &&
+ (
+ cd mirror &&
+ git init &&
+ git svn init --stdlayout "$svnrepo" &&
+ git svn fetch &&
+ cd ..
+ )
+'
+
+test_expect_success 'fetch from git mirror and partial-rebuild' '
+ git config --add remote.origin.url "file://$PWD/mirror/.git" &&
+ git config --add remote.origin.fetch refs/remotes/*:refs/remotes/* &&
+ git fetch origin &&
+ git svn fetch
+'
+
+test_done
use Cwd;
use File::Basename;
-use File::Temp;
BEGIN { use_ok('Git') }
# Failure cases for config:
# Save and restore STDERR; we will probably extract this into a
# "dies_ok" method and possibly move the STDERR handling to Git.pm.
-open our $tmpstderr, ">&", STDERR or die "cannot save STDERR"; close STDERR;
+open our $tmpstderr, ">&STDERR" or die "cannot save STDERR"; close STDERR;
eval { $r->config("test.dupstring") };
ok($@, "config: duplicate entry in scalar context fails");
eval { $r->config_bool("test.boolother") };
# objects and hashes
ok(our $file1hash = $r->command_oneline('rev-parse', "HEAD:file1"), "(get file hash)");
-our $tmpfile = File::Temp->new;
-is($r->cat_blob($file1hash, $tmpfile), 15, "cat_blob: size");
+my $tmpfile = "file.tmp";
+open TEMPFILE, "+>$tmpfile" or die "Can't open $tmpfile: $!";
+is($r->cat_blob($file1hash, \*TEMPFILE), 15, "cat_blob: size");
our $blobcontents;
-{ local $/; seek $tmpfile, 0, 0; $blobcontents = <$tmpfile>; }
+{ local $/; seek TEMPFILE, 0, 0; $blobcontents = <TEMPFILE>; }
is($blobcontents, "changed file 1\n", "cat_blob: data");
-seek $tmpfile, 0, 0;
+close TEMPFILE or die "Failed writing to $tmpfile: $!";
is(Git::hash_object("blob", $tmpfile), $file1hash, "hash_object: roundtrip");
-$tmpfile = File::Temp->new();
-print $tmpfile my $test_text = "test blob, to be inserted\n";
+open TEMPFILE, ">$tmpfile" or die "Can't open $tmpfile: $!";
+print TEMPFILE my $test_text = "test blob, to be inserted\n";
+close TEMPFILE or die "Failed writing to $tmpfile: $!";
like(our $newhash = $r->hash_and_insert_object($tmpfile), qr/[0-9a-fA-F]{40}/,
"hash_and_insert_object: returns hash");
-$tmpfile = File::Temp->new;
-is($r->cat_blob($newhash, $tmpfile), length $test_text, "cat_blob: roundtrip size");
-{ local $/; seek $tmpfile, 0, 0; $blobcontents = <$tmpfile>; }
+open TEMPFILE, "+>$tmpfile" or die "Can't open $tmpfile: $!";
+is($r->cat_blob($newhash, \*TEMPFILE), length $test_text, "cat_blob: roundtrip size");
+{ local $/; seek TEMPFILE, 0, 0; $blobcontents = <TEMPFILE>; }
is($blobcontents, $test_text, "cat_blob: roundtrip data");
+close TEMPFILE;
+unlink $tmpfile;
# paths
is($r->repo_path, "./.git", "repo_path");
dir=`expr "$$dst" : '\(.*\)/'` && \
mkdir -p blt/$$dir && \
case "$$boilerplate" in \
- *--) ;; \
- *) cp -p $$boilerplate blt/$$dst ;; \
- esac || exit; \
+ *--) continue;; \
+ esac && \
+ cp $$boilerplate blt/$$dst && \
+ if test -x "blt/$$dst"; then rx=rx; else rx=r; fi && \
+ chmod a+$$rx "blt/$$dst" || exit; \
done && \
date >$@
return -1;
}
}
- else if (newtree)
+ else if (newtree) {
+ if (oldtree && !o->initial_checkout) {
+ /*
+ * deletion of the path was staged;
+ */
+ if (same(oldtree, newtree))
+ return 1;
+ return reject_merge(oldtree, o);
+ }
return merged_entry(newtree, current, o);
+ }
return deleted_entry(oldtree, current, o);
}
verbose_update:1,
aggressive:1,
skip_unmerged:1,
+ initial_checkout:1,
gently:1;
const char *prefix;
int pos;