compat / regex.con commit update-index: refuse to add working tree items beyond symlinks (806d13b)
   1/* Extended regular expression matching and search library,
   2   version 0.12.
   3   (Implements POSIX draft P10003.2/D11.2, except for
   4   internationalization features.)
   5
   6   Copyright (C) 1993 Free Software Foundation, Inc.
   7
   8   This program is free software; you can redistribute it and/or modify
   9   it under the terms of the GNU General Public License as published by
  10   the Free Software Foundation; either version 2, or (at your option)
  11   any later version.
  12
  13   This program is distributed in the hope that it will be useful,
  14   but WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16   GNU General Public License for more details.
  17
  18   You should have received a copy of the GNU General Public License
  19   along with this program; if not, write to the Free Software
  20   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
  21
  22/* AIX requires this to be the first thing in the file. */
  23#if defined (_AIX) && !defined (REGEX_MALLOC)
  24  #pragma alloca
  25#endif
  26
  27#define _GNU_SOURCE
  28
  29/* We need this for `regex.h', and perhaps for the Emacs include files.  */
  30#include <sys/types.h>
  31
  32/* We used to test for `BSTRING' here, but only GCC and Emacs define
  33   `BSTRING', as far as I know, and neither of them use this code.  */
  34#include <string.h>
  35#ifndef bcmp
  36#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
  37#endif
  38#ifndef bcopy
  39#define bcopy(s, d, n)  memcpy ((d), (s), (n))
  40#endif
  41#ifndef bzero
  42#define bzero(s, n)     memset ((s), 0, (n))
  43#endif
  44
  45#include <stdlib.h>
  46
  47
  48/* Define the syntax stuff for \<, \>, etc.  */
  49
  50/* This must be nonzero for the wordchar and notwordchar pattern
  51   commands in re_match_2.  */
  52#ifndef Sword
  53#define Sword 1
  54#endif
  55
  56#ifdef SYNTAX_TABLE
  57
  58extern char *re_syntax_table;
  59
  60#else /* not SYNTAX_TABLE */
  61
  62/* How many characters in the character set.  */
  63#define CHAR_SET_SIZE 256
  64
  65static char re_syntax_table[CHAR_SET_SIZE];
  66
  67static void
  68init_syntax_once ()
  69{
  70   register int c;
  71   static int done = 0;
  72
  73   if (done)
  74     return;
  75
  76   bzero (re_syntax_table, sizeof re_syntax_table);
  77
  78   for (c = 'a'; c <= 'z'; c++)
  79     re_syntax_table[c] = Sword;
  80
  81   for (c = 'A'; c <= 'Z'; c++)
  82     re_syntax_table[c] = Sword;
  83
  84   for (c = '0'; c <= '9'; c++)
  85     re_syntax_table[c] = Sword;
  86
  87   re_syntax_table['_'] = Sword;
  88
  89   done = 1;
  90}
  91
  92#endif /* not SYNTAX_TABLE */
  93
  94#define SYNTAX(c) re_syntax_table[c]
  95
  96\f
  97/* Get the interface, including the syntax bits.  */
  98#include "regex.h"
  99
 100/* isalpha etc. are used for the character classes.  */
 101#include <ctype.h>
 102
 103#ifndef isascii
 104#define isascii(c) 1
 105#endif
 106
 107#ifdef isblank
 108#define ISBLANK(c) (isascii (c) && isblank (c))
 109#else
 110#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 111#endif
 112#ifdef isgraph
 113#define ISGRAPH(c) (isascii (c) && isgraph (c))
 114#else
 115#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
 116#endif
 117
 118#define ISPRINT(c) (isascii (c) && isprint (c))
 119#define ISDIGIT(c) (isascii (c) && isdigit (c))
 120#define ISALNUM(c) (isascii (c) && isalnum (c))
 121#define ISALPHA(c) (isascii (c) && isalpha (c))
 122#define ISCNTRL(c) (isascii (c) && iscntrl (c))
 123#define ISLOWER(c) (isascii (c) && islower (c))
 124#define ISPUNCT(c) (isascii (c) && ispunct (c))
 125#define ISSPACE(c) (isascii (c) && isspace (c))
 126#define ISUPPER(c) (isascii (c) && isupper (c))
 127#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
 128
 129#ifndef NULL
 130#define NULL 0
 131#endif
 132
 133/* We remove any previous definition of `SIGN_EXTEND_CHAR',
 134   since ours (we hope) works properly with all combinations of
 135   machines, compilers, `char' and `unsigned char' argument types.
 136   (Per Bothner suggested the basic approach.)  */
 137#undef SIGN_EXTEND_CHAR
 138#if __STDC__
 139#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
 140#else  /* not __STDC__ */
 141/* As in Harbison and Steele.  */
 142#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
 143#endif
 144\f
 145/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 146   use `alloca' instead of `malloc'.  This is because using malloc in
 147   re_search* or re_match* could cause memory leaks when C-g is used in
 148   Emacs; also, malloc is slower and causes storage fragmentation.  On
 149   the other hand, malloc is more portable, and easier to debug.
 150
 151   Because we sometimes use alloca, some routines have to be macros,
 152   not functions -- `alloca'-allocated space disappears at the end of the
 153   function it is called in.  */
 154
 155#ifdef REGEX_MALLOC
 156
 157#define REGEX_ALLOCATE malloc
 158#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
 159
 160#else /* not REGEX_MALLOC  */
 161
 162/* Emacs already defines alloca, sometimes.  */
 163#ifndef alloca
 164
 165/* Make alloca work the best possible way.  */
 166#ifdef __GNUC__
 167#define alloca __builtin_alloca
 168#else /* not __GNUC__ */
 169#if HAVE_ALLOCA_H
 170#include <alloca.h>
 171#else /* not __GNUC__ or HAVE_ALLOCA_H */
 172#ifndef _AIX /* Already did AIX, up at the top.  */
 173char *alloca ();
 174#endif /* not _AIX */
 175#endif /* not HAVE_ALLOCA_H */
 176#endif /* not __GNUC__ */
 177
 178#endif /* not alloca */
 179
 180#define REGEX_ALLOCATE alloca
 181
 182/* Assumes a `char *destination' variable.  */
 183#define REGEX_REALLOCATE(source, osize, nsize)                          \
 184  (destination = (char *) alloca (nsize),                               \
 185   bcopy (source, destination, osize),                                  \
 186   destination)
 187
 188#endif /* not REGEX_MALLOC */
 189
 190
 191/* True if `size1' is non-NULL and PTR is pointing anywhere inside
 192   `string1' or just past its end.  This works if PTR is NULL, which is
 193   a good thing.  */
 194#define FIRST_STRING_P(ptr)                                     \
 195  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 196
 197/* (Re)Allocate N items of type T using malloc, or fail.  */
 198#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 199#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
 200#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 201
 202#define BYTEWIDTH 8 /* In bits.  */
 203
 204#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 205
 206#define MAX(a, b) ((a) > (b) ? (a) : (b))
 207#define MIN(a, b) ((a) < (b) ? (a) : (b))
 208
 209typedef char boolean;
 210#define false 0
 211#define true 1
 212\f
 213/* These are the command codes that appear in compiled regular
 214   expressions.  Some opcodes are followed by argument bytes.  A
 215   command code can specify any interpretation whatsoever for its
 216   arguments.  Zero bytes may appear in the compiled regular expression.
 217
 218   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
 219   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
 220   `exactn' we use here must also be 1.  */
 221
 222typedef enum
 223{
 224  no_op = 0,
 225
 226        /* Followed by one byte giving n, then by n literal bytes.  */
 227  exactn = 1,
 228
 229        /* Matches any (more or less) character.  */
 230  anychar,
 231
 232        /* Matches any one char belonging to specified set.  First
 233           following byte is number of bitmap bytes.  Then come bytes
 234           for a bitmap saying which chars are in.  Bits in each byte
 235           are ordered low-bit-first.  A character is in the set if its
 236           bit is 1.  A character too large to have a bit in the map is
 237           automatically not in the set.  */
 238  charset,
 239
 240        /* Same parameters as charset, but match any character that is
 241           not one of those specified.  */
 242  charset_not,
 243
 244        /* Start remembering the text that is matched, for storing in a
 245           register.  Followed by one byte with the register number, in
 246           the range 0 to one less than the pattern buffer's re_nsub
 247           field.  Then followed by one byte with the number of groups
 248           inner to this one.  (This last has to be part of the
 249           start_memory only because we need it in the on_failure_jump
 250           of re_match_2.)  */
 251  start_memory,
 252
 253        /* Stop remembering the text that is matched and store it in a
 254           memory register.  Followed by one byte with the register
 255           number, in the range 0 to one less than `re_nsub' in the
 256           pattern buffer, and one byte with the number of inner groups,
 257           just like `start_memory'.  (We need the number of inner
 258           groups here because we don't have any easy way of finding the
 259           corresponding start_memory when we're at a stop_memory.)  */
 260  stop_memory,
 261
 262        /* Match a duplicate of something remembered. Followed by one
 263           byte containing the register number.  */
 264  duplicate,
 265
 266        /* Fail unless at beginning of line.  */
 267  begline,
 268
 269        /* Fail unless at end of line.  */
 270  endline,
 271
 272        /* Succeeds if at beginning of buffer (if emacs) or at beginning
 273           of string to be matched (if not).  */
 274  begbuf,
 275
 276        /* Analogously, for end of buffer/string.  */
 277  endbuf,
 278
 279        /* Followed by two byte relative address to which to jump.  */
 280  jump,
 281
 282        /* Same as jump, but marks the end of an alternative.  */
 283  jump_past_alt,
 284
 285        /* Followed by two-byte relative address of place to resume at
 286           in case of failure.  */
 287  on_failure_jump,
 288
 289        /* Like on_failure_jump, but pushes a placeholder instead of the
 290           current string position when executed.  */
 291  on_failure_keep_string_jump,
 292
 293        /* Throw away latest failure point and then jump to following
 294           two-byte relative address.  */
 295  pop_failure_jump,
 296
 297        /* Change to pop_failure_jump if know won't have to backtrack to
 298           match; otherwise change to jump.  This is used to jump
 299           back to the beginning of a repeat.  If what follows this jump
 300           clearly won't match what the repeat does, such that we can be
 301           sure that there is no use backtracking out of repetitions
 302           already matched, then we change it to a pop_failure_jump.
 303           Followed by two-byte address.  */
 304  maybe_pop_jump,
 305
 306        /* Jump to following two-byte address, and push a dummy failure
 307           point. This failure point will be thrown away if an attempt
 308           is made to use it for a failure.  A `+' construct makes this
 309           before the first repeat.  Also used as an intermediary kind
 310           of jump when compiling an alternative.  */
 311  dummy_failure_jump,
 312
 313        /* Push a dummy failure point and continue.  Used at the end of
 314           alternatives.  */
 315  push_dummy_failure,
 316
 317        /* Followed by two-byte relative address and two-byte number n.
 318           After matching N times, jump to the address upon failure.  */
 319  succeed_n,
 320
 321        /* Followed by two-byte relative address, and two-byte number n.
 322           Jump to the address N times, then fail.  */
 323  jump_n,
 324
 325        /* Set the following two-byte relative address to the
 326           subsequent two-byte number.  The address *includes* the two
 327           bytes of number.  */
 328  set_number_at,
 329
 330  wordchar,     /* Matches any word-constituent character.  */
 331  notwordchar,  /* Matches any char that is not a word-constituent.  */
 332
 333  wordbeg,      /* Succeeds if at word beginning.  */
 334  wordend,      /* Succeeds if at word end.  */
 335
 336  wordbound,    /* Succeeds if at a word boundary.  */
 337  notwordbound  /* Succeeds if not at a word boundary.  */
 338
 339#ifdef emacs
 340  ,before_dot,  /* Succeeds if before point.  */
 341  at_dot,       /* Succeeds if at point.  */
 342  after_dot,    /* Succeeds if after point.  */
 343
 344        /* Matches any character whose syntax is specified.  Followed by
 345           a byte which contains a syntax code, e.g., Sword.  */
 346  syntaxspec,
 347
 348        /* Matches any character whose syntax is not that specified.  */
 349  notsyntaxspec
 350#endif /* emacs */
 351} re_opcode_t;
 352\f
 353/* Common operations on the compiled pattern.  */
 354
 355/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 356
 357#define STORE_NUMBER(destination, number)                               \
 358  do {                                                                  \
 359    (destination)[0] = (number) & 0377;                                 \
 360    (destination)[1] = (number) >> 8;                                   \
 361  } while (0)
 362
 363/* Same as STORE_NUMBER, except increment DESTINATION to
 364   the byte after where the number is stored.  Therefore, DESTINATION
 365   must be an lvalue.  */
 366
 367#define STORE_NUMBER_AND_INCR(destination, number)                      \
 368  do {                                                                  \
 369    STORE_NUMBER (destination, number);                                 \
 370    (destination) += 2;                                                 \
 371  } while (0)
 372
 373/* Put into DESTINATION a number stored in two contiguous bytes starting
 374   at SOURCE.  */
 375
 376#define EXTRACT_NUMBER(destination, source)                             \
 377  do {                                                                  \
 378    (destination) = *(source) & 0377;                                   \
 379    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
 380  } while (0)
 381
 382#ifdef DEBUG
 383static void
 384extract_number (dest, source)
 385    int *dest;
 386    unsigned char *source;
 387{
 388  int temp = SIGN_EXTEND_CHAR (*(source + 1));
 389  *dest = *source & 0377;
 390  *dest += temp << 8;
 391}
 392
 393#ifndef EXTRACT_MACROS /* To debug the macros.  */
 394#undef EXTRACT_NUMBER
 395#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
 396#endif /* not EXTRACT_MACROS */
 397
 398#endif /* DEBUG */
 399
 400/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 401   SOURCE must be an lvalue.  */
 402
 403#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
 404  do {                                                                  \
 405    EXTRACT_NUMBER (destination, source);                               \
 406    (source) += 2;                                                      \
 407  } while (0)
 408
 409#ifdef DEBUG
 410static void
 411extract_number_and_incr (destination, source)
 412    int *destination;
 413    unsigned char **source;
 414{
 415  extract_number (destination, *source);
 416  *source += 2;
 417}
 418
 419#ifndef EXTRACT_MACROS
 420#undef EXTRACT_NUMBER_AND_INCR
 421#define EXTRACT_NUMBER_AND_INCR(dest, src) \
 422  extract_number_and_incr (&dest, &src)
 423#endif /* not EXTRACT_MACROS */
 424
 425#endif /* DEBUG */
 426\f
 427/* If DEBUG is defined, Regex prints many voluminous messages about what
 428   it is doing (if the variable `debug' is nonzero).  If linked with the
 429   main program in `iregex.c', you can enter patterns and strings
 430   interactively.  And if linked with the main program in `main.c' and
 431   the other test files, you can run the already-written tests.  */
 432
 433#ifdef DEBUG
 434
 435/* We use standard I/O for debugging.  */
 436#include <stdio.h>
 437
 438/* It is useful to test things that ``must'' be true when debugging.  */
 439#include <assert.h>
 440
 441static int debug = 0;
 442
 443#define DEBUG_STATEMENT(e) e
 444#define DEBUG_PRINT1(x) if (debug) printf (x)
 445#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
 446#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
 447#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
 448#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
 449  if (debug) print_partial_compiled_pattern (s, e)
 450#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
 451  if (debug) print_double_string (w, s1, sz1, s2, sz2)
 452
 453
 454extern void printchar ();
 455
 456/* Print the fastmap in human-readable form.  */
 457
 458void
 459print_fastmap (fastmap)
 460    char *fastmap;
 461{
 462  unsigned was_a_range = 0;
 463  unsigned i = 0;
 464
 465  while (i < (1 << BYTEWIDTH))
 466    {
 467      if (fastmap[i++])
 468        {
 469          was_a_range = 0;
 470          printchar (i - 1);
 471          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 472            {
 473              was_a_range = 1;
 474              i++;
 475            }
 476          if (was_a_range)
 477            {
 478              printf ("-");
 479              printchar (i - 1);
 480            }
 481        }
 482    }
 483  putchar ('\n');
 484}
 485
 486
 487/* Print a compiled pattern string in human-readable form, starting at
 488   the START pointer into it and ending just before the pointer END.  */
 489
 490void
 491print_partial_compiled_pattern (start, end)
 492    unsigned char *start;
 493    unsigned char *end;
 494{
 495  int mcnt, mcnt2;
 496  unsigned char *p = start;
 497  unsigned char *pend = end;
 498
 499  if (start == NULL)
 500    {
 501      printf ("(null)\n");
 502      return;
 503    }
 504
 505  /* Loop over pattern commands.  */
 506  while (p < pend)
 507    {
 508      switch ((re_opcode_t) *p++)
 509        {
 510        case no_op:
 511          printf ("/no_op");
 512          break;
 513
 514        case exactn:
 515          mcnt = *p++;
 516          printf ("/exactn/%d", mcnt);
 517          do
 518            {
 519              putchar ('/');
 520              printchar (*p++);
 521            }
 522          while (--mcnt);
 523          break;
 524
 525        case start_memory:
 526          mcnt = *p++;
 527          printf ("/start_memory/%d/%d", mcnt, *p++);
 528          break;
 529
 530        case stop_memory:
 531          mcnt = *p++;
 532          printf ("/stop_memory/%d/%d", mcnt, *p++);
 533          break;
 534
 535        case duplicate:
 536          printf ("/duplicate/%d", *p++);
 537          break;
 538
 539        case anychar:
 540          printf ("/anychar");
 541          break;
 542
 543        case charset:
 544        case charset_not:
 545          {
 546            register int c;
 547
 548            printf ("/charset%s",
 549                    (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
 550
 551            assert (p + *p < pend);
 552
 553            for (c = 0; c < *p; c++)
 554              {
 555                unsigned bit;
 556                unsigned char map_byte = p[1 + c];
 557
 558                putchar ('/');
 559
 560                for (bit = 0; bit < BYTEWIDTH; bit++)
 561                  if (map_byte & (1 << bit))
 562                    printchar (c * BYTEWIDTH + bit);
 563              }
 564            p += 1 + *p;
 565            break;
 566          }
 567
 568        case begline:
 569          printf ("/begline");
 570          break;
 571
 572        case endline:
 573          printf ("/endline");
 574          break;
 575
 576        case on_failure_jump:
 577          extract_number_and_incr (&mcnt, &p);
 578          printf ("/on_failure_jump/0/%d", mcnt);
 579          break;
 580
 581        case on_failure_keep_string_jump:
 582          extract_number_and_incr (&mcnt, &p);
 583          printf ("/on_failure_keep_string_jump/0/%d", mcnt);
 584          break;
 585
 586        case dummy_failure_jump:
 587          extract_number_and_incr (&mcnt, &p);
 588          printf ("/dummy_failure_jump/0/%d", mcnt);
 589          break;
 590
 591        case push_dummy_failure:
 592          printf ("/push_dummy_failure");
 593          break;
 594
 595        case maybe_pop_jump:
 596          extract_number_and_incr (&mcnt, &p);
 597          printf ("/maybe_pop_jump/0/%d", mcnt);
 598          break;
 599
 600        case pop_failure_jump:
 601          extract_number_and_incr (&mcnt, &p);
 602          printf ("/pop_failure_jump/0/%d", mcnt);
 603          break;
 604
 605        case jump_past_alt:
 606          extract_number_and_incr (&mcnt, &p);
 607          printf ("/jump_past_alt/0/%d", mcnt);
 608          break;
 609
 610        case jump:
 611          extract_number_and_incr (&mcnt, &p);
 612          printf ("/jump/0/%d", mcnt);
 613          break;
 614
 615        case succeed_n:
 616          extract_number_and_incr (&mcnt, &p);
 617          extract_number_and_incr (&mcnt2, &p);
 618          printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
 619          break;
 620
 621        case jump_n:
 622          extract_number_and_incr (&mcnt, &p);
 623          extract_number_and_incr (&mcnt2, &p);
 624          printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
 625          break;
 626
 627        case set_number_at:
 628          extract_number_and_incr (&mcnt, &p);
 629          extract_number_and_incr (&mcnt2, &p);
 630          printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
 631          break;
 632
 633        case wordbound:
 634          printf ("/wordbound");
 635          break;
 636
 637        case notwordbound:
 638          printf ("/notwordbound");
 639          break;
 640
 641        case wordbeg:
 642          printf ("/wordbeg");
 643          break;
 644
 645        case wordend:
 646          printf ("/wordend");
 647
 648#ifdef emacs
 649        case before_dot:
 650          printf ("/before_dot");
 651          break;
 652
 653        case at_dot:
 654          printf ("/at_dot");
 655          break;
 656
 657        case after_dot:
 658          printf ("/after_dot");
 659          break;
 660
 661        case syntaxspec:
 662          printf ("/syntaxspec");
 663          mcnt = *p++;
 664          printf ("/%d", mcnt);
 665          break;
 666
 667        case notsyntaxspec:
 668          printf ("/notsyntaxspec");
 669          mcnt = *p++;
 670          printf ("/%d", mcnt);
 671          break;
 672#endif /* emacs */
 673
 674        case wordchar:
 675          printf ("/wordchar");
 676          break;
 677
 678        case notwordchar:
 679          printf ("/notwordchar");
 680          break;
 681
 682        case begbuf:
 683          printf ("/begbuf");
 684          break;
 685
 686        case endbuf:
 687          printf ("/endbuf");
 688          break;
 689
 690        default:
 691          printf ("?%d", *(p-1));
 692        }
 693    }
 694  printf ("/\n");
 695}
 696
 697
 698void
 699print_compiled_pattern (bufp)
 700    struct re_pattern_buffer *bufp;
 701{
 702  unsigned char *buffer = bufp->buffer;
 703
 704  print_partial_compiled_pattern (buffer, buffer + bufp->used);
 705  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
 706
 707  if (bufp->fastmap_accurate && bufp->fastmap)
 708    {
 709      printf ("fastmap: ");
 710      print_fastmap (bufp->fastmap);
 711    }
 712
 713  printf ("re_nsub: %d\t", bufp->re_nsub);
 714  printf ("regs_alloc: %d\t", bufp->regs_allocated);
 715  printf ("can_be_null: %d\t", bufp->can_be_null);
 716  printf ("newline_anchor: %d\n", bufp->newline_anchor);
 717  printf ("no_sub: %d\t", bufp->no_sub);
 718  printf ("not_bol: %d\t", bufp->not_bol);
 719  printf ("not_eol: %d\t", bufp->not_eol);
 720  printf ("syntax: %d\n", bufp->syntax);
 721  /* Perhaps we should print the translate table?  */
 722}
 723
 724
 725void
 726print_double_string (where, string1, size1, string2, size2)
 727    const char *where;
 728    const char *string1;
 729    const char *string2;
 730    int size1;
 731    int size2;
 732{
 733  unsigned this_char;
 734
 735  if (where == NULL)
 736    printf ("(null)");
 737  else
 738    {
 739      if (FIRST_STRING_P (where))
 740        {
 741          for (this_char = where - string1; this_char < size1; this_char++)
 742            printchar (string1[this_char]);
 743
 744          where = string2;
 745        }
 746
 747      for (this_char = where - string2; this_char < size2; this_char++)
 748        printchar (string2[this_char]);
 749    }
 750}
 751
 752#else /* not DEBUG */
 753
 754#undef assert
 755#define assert(e)
 756
 757#define DEBUG_STATEMENT(e)
 758#define DEBUG_PRINT1(x)
 759#define DEBUG_PRINT2(x1, x2)
 760#define DEBUG_PRINT3(x1, x2, x3)
 761#define DEBUG_PRINT4(x1, x2, x3, x4)
 762#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
 763#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
 764
 765#endif /* not DEBUG */
 766\f
 767/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
 768   also be assigned to arbitrarily: each pattern buffer stores its own
 769   syntax, so it can be changed between regex compilations.  */
 770reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
 771
 772
 773/* Specify the precise syntax of regexps for compilation.  This provides
 774   for compatibility for various utilities which historically have
 775   different, incompatible syntaxes.
 776
 777   The argument SYNTAX is a bit mask comprised of the various bits
 778   defined in regex.h.  We return the old syntax.  */
 779
 780reg_syntax_t
 781re_set_syntax (syntax)
 782    reg_syntax_t syntax;
 783{
 784  reg_syntax_t ret = re_syntax_options;
 785
 786  re_syntax_options = syntax;
 787  return ret;
 788}
 789\f
 790/* This table gives an error message for each of the error codes listed
 791   in regex.h.  Obviously the order here has to be same as there.  */
 792
 793static const char *re_error_msg[] =
 794  { NULL,                                       /* REG_NOERROR */
 795    "No match",                                 /* REG_NOMATCH */
 796    "Invalid regular expression",               /* REG_BADPAT */
 797    "Invalid collation character",              /* REG_ECOLLATE */
 798    "Invalid character class name",             /* REG_ECTYPE */
 799    "Trailing backslash",                       /* REG_EESCAPE */
 800    "Invalid back reference",                   /* REG_ESUBREG */
 801    "Unmatched [ or [^",                        /* REG_EBRACK */
 802    "Unmatched ( or \\(",                       /* REG_EPAREN */
 803    "Unmatched \\{",                            /* REG_EBRACE */
 804    "Invalid content of \\{\\}",                /* REG_BADBR */
 805    "Invalid range end",                        /* REG_ERANGE */
 806    "Memory exhausted",                         /* REG_ESPACE */
 807    "Invalid preceding regular expression",     /* REG_BADRPT */
 808    "Premature end of regular expression",      /* REG_EEND */
 809    "Regular expression too big",               /* REG_ESIZE */
 810    "Unmatched ) or \\)",                       /* REG_ERPAREN */
 811  };
 812\f
 813/* Subroutine declarations and macros for regex_compile.  */
 814
 815static void store_op1 (), store_op2 ();
 816static void insert_op1 (), insert_op2 ();
 817static boolean at_begline_loc_p (), at_endline_loc_p ();
 818static boolean group_in_compile_stack ();
 819static reg_errcode_t compile_range ();
 820
 821/* Fetch the next character in the uncompiled pattern---translating it
 822   if necessary.  Also cast from a signed character in the constant
 823   string passed to us by the user to an unsigned char that we can use
 824   as an array index (in, e.g., `translate').  */
 825#define PATFETCH(c)                                                     \
 826  do {if (p == pend) return REG_EEND;                                   \
 827    c = (unsigned char) *p++;                                           \
 828    if (translate) c = translate[c];                                    \
 829  } while (0)
 830
 831/* Fetch the next character in the uncompiled pattern, with no
 832   translation.  */
 833#define PATFETCH_RAW(c)                                                 \
 834  do {if (p == pend) return REG_EEND;                                   \
 835    c = (unsigned char) *p++;                                           \
 836  } while (0)
 837
 838/* Go backwards one character in the pattern.  */
 839#define PATUNFETCH p--
 840
 841
 842/* If `translate' is non-null, return translate[D], else just D.  We
 843   cast the subscript to translate because some data is declared as
 844   `char *', to avoid warnings when a string constant is passed.  But
 845   when we use a character as a subscript we must make it unsigned.  */
 846#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
 847
 848
 849/* Macros for outputting the compiled pattern into `buffer'.  */
 850
 851/* If the buffer isn't allocated when it comes in, use this.  */
 852#define INIT_BUF_SIZE  32
 853
 854/* Make sure we have at least N more bytes of space in buffer.  */
 855#define GET_BUFFER_SPACE(n)                                             \
 856    while (b - bufp->buffer + (n) > bufp->allocated)                    \
 857      EXTEND_BUFFER ()
 858
 859/* Make sure we have one more byte of buffer space and then add C to it.  */
 860#define BUF_PUSH(c)                                                     \
 861  do {                                                                  \
 862    GET_BUFFER_SPACE (1);                                               \
 863    *b++ = (unsigned char) (c);                                         \
 864  } while (0)
 865
 866
 867/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
 868#define BUF_PUSH_2(c1, c2)                                              \
 869  do {                                                                  \
 870    GET_BUFFER_SPACE (2);                                               \
 871    *b++ = (unsigned char) (c1);                                        \
 872    *b++ = (unsigned char) (c2);                                        \
 873  } while (0)
 874
 875
 876/* As with BUF_PUSH_2, except for three bytes.  */
 877#define BUF_PUSH_3(c1, c2, c3)                                          \
 878  do {                                                                  \
 879    GET_BUFFER_SPACE (3);                                               \
 880    *b++ = (unsigned char) (c1);                                        \
 881    *b++ = (unsigned char) (c2);                                        \
 882    *b++ = (unsigned char) (c3);                                        \
 883  } while (0)
 884
 885
 886/* Store a jump with opcode OP at LOC to location TO.  We store a
 887   relative address offset by the three bytes the jump itself occupies.  */
 888#define STORE_JUMP(op, loc, to) \
 889  store_op1 (op, loc, (to) - (loc) - 3)
 890
 891/* Likewise, for a two-argument jump.  */
 892#define STORE_JUMP2(op, loc, to, arg) \
 893  store_op2 (op, loc, (to) - (loc) - 3, arg)
 894
 895/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
 896#define INSERT_JUMP(op, loc, to) \
 897  insert_op1 (op, loc, (to) - (loc) - 3, b)
 898
 899/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
 900#define INSERT_JUMP2(op, loc, to, arg) \
 901  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
 902
 903
 904/* This is not an arbitrary limit: the arguments which represent offsets
 905   into the pattern are two bytes long.  So if 2^16 bytes turns out to
 906   be too small, many things would have to change.  */
 907#define MAX_BUF_SIZE (1L << 16)
 908
 909
 910/* Extend the buffer by twice its current size via realloc and
 911   reset the pointers that pointed into the old block to point to the
 912   correct places in the new one.  If extending the buffer results in it
 913   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 914#define EXTEND_BUFFER()                                                 \
 915  do {                                                                  \
 916    unsigned char *old_buffer = bufp->buffer;                           \
 917    if (bufp->allocated == MAX_BUF_SIZE)                                \
 918      return REG_ESIZE;                                                 \
 919    bufp->allocated <<= 1;                                              \
 920    if (bufp->allocated > MAX_BUF_SIZE)                                 \
 921      bufp->allocated = MAX_BUF_SIZE;                                   \
 922    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
 923    if (bufp->buffer == NULL)                                           \
 924      return REG_ESPACE;                                                \
 925    /* If the buffer moved, move all the pointers into it.  */          \
 926    if (old_buffer != bufp->buffer)                                     \
 927      {                                                                 \
 928        b = (b - old_buffer) + bufp->buffer;                            \
 929        begalt = (begalt - old_buffer) + bufp->buffer;                  \
 930        if (fixup_alt_jump)                                             \
 931          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
 932        if (laststart)                                                  \
 933          laststart = (laststart - old_buffer) + bufp->buffer;          \
 934        if (pending_exact)                                              \
 935          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
 936      }                                                                 \
 937  } while (0)
 938
 939
 940/* Since we have one byte reserved for the register number argument to
 941   {start,stop}_memory, the maximum number of groups we can report
 942   things about is what fits in that byte.  */
 943#define MAX_REGNUM 255
 944
 945/* But patterns can have more than `MAX_REGNUM' registers.  We just
 946   ignore the excess.  */
 947typedef unsigned regnum_t;
 948
 949
 950/* Macros for the compile stack.  */
 951
 952/* Since offsets can go either forwards or backwards, this type needs to
 953   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
 954typedef int pattern_offset_t;
 955
 956typedef struct
 957{
 958  pattern_offset_t begalt_offset;
 959  pattern_offset_t fixup_alt_jump;
 960  pattern_offset_t inner_group_offset;
 961  pattern_offset_t laststart_offset;
 962  regnum_t regnum;
 963} compile_stack_elt_t;
 964
 965
 966typedef struct
 967{
 968  compile_stack_elt_t *stack;
 969  unsigned size;
 970  unsigned avail;                       /* Offset of next open position.  */
 971} compile_stack_type;
 972
 973
 974#define INIT_COMPILE_STACK_SIZE 32
 975
 976#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
 977#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
 978
 979/* The next available element.  */
 980#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
 981
 982
 983/* Set the bit for character C in a list.  */
 984#define SET_LIST_BIT(c)                               \
 985  (b[((unsigned char) (c)) / BYTEWIDTH]               \
 986   |= 1 << (((unsigned char) c) % BYTEWIDTH))
 987
 988
 989/* Get the next unsigned number in the uncompiled pattern.  */
 990#define GET_UNSIGNED_NUMBER(num)                                        \
 991  { if (p != pend)                                                      \
 992     {                                                                  \
 993       PATFETCH (c);                                                    \
 994       while (ISDIGIT (c))                                              \
 995         {                                                              \
 996           if (num < 0)                                                 \
 997              num = 0;                                                  \
 998           num = num * 10 + c - '0';                                    \
 999           if (p == pend)                                               \
1000              break;                                                    \
1001           PATFETCH (c);                                                \
1002         }                                                              \
1003       }                                                                \
1004    }
1005
1006#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1007
1008#define IS_CHAR_CLASS(string)                                           \
1009   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1010    || STREQ (string, "lower") || STREQ (string, "digit")               \
1011    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1012    || STREQ (string, "space") || STREQ (string, "print")               \
1013    || STREQ (string, "punct") || STREQ (string, "graph")               \
1014    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1015\f
1016/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1017   Returns one of error codes defined in `regex.h', or zero for success.
1018
1019   Assumes the `allocated' (and perhaps `buffer') and `translate'
1020   fields are set in BUFP on entry.
1021
1022   If it succeeds, results are put in BUFP (if it returns an error, the
1023   contents of BUFP are undefined):
1024     `buffer' is the compiled pattern;
1025     `syntax' is set to SYNTAX;
1026     `used' is set to the length of the compiled pattern;
1027     `fastmap_accurate' is zero;
1028     `re_nsub' is the number of subexpressions in PATTERN;
1029     `not_bol' and `not_eol' are zero;
1030
1031   The `fastmap' and `newline_anchor' fields are neither
1032   examined nor set.  */
1033
1034static reg_errcode_t
1035regex_compile (pattern, size, syntax, bufp)
1036     const char *pattern;
1037     int size;
1038     reg_syntax_t syntax;
1039     struct re_pattern_buffer *bufp;
1040{
1041  /* We fetch characters from PATTERN here.  Even though PATTERN is
1042     `char *' (i.e., signed), we declare these variables as unsigned, so
1043     they can be reliably used as array indices.  */
1044  register unsigned char c, c1;
1045
1046  /* A random tempory spot in PATTERN.  */
1047  const char *p1;
1048
1049  /* Points to the end of the buffer, where we should append.  */
1050  register unsigned char *b;
1051
1052  /* Keeps track of unclosed groups.  */
1053  compile_stack_type compile_stack;
1054
1055  /* Points to the current (ending) position in the pattern.  */
1056  const char *p = pattern;
1057  const char *pend = pattern + size;
1058
1059  /* How to translate the characters in the pattern.  */
1060  char *translate = bufp->translate;
1061
1062  /* Address of the count-byte of the most recently inserted `exactn'
1063     command.  This makes it possible to tell if a new exact-match
1064     character can be added to that command or if the character requires
1065     a new `exactn' command.  */
1066  unsigned char *pending_exact = 0;
1067
1068  /* Address of start of the most recently finished expression.
1069     This tells, e.g., postfix * where to find the start of its
1070     operand.  Reset at the beginning of groups and alternatives.  */
1071  unsigned char *laststart = 0;
1072
1073  /* Address of beginning of regexp, or inside of last group.  */
1074  unsigned char *begalt;
1075
1076  /* Place in the uncompiled pattern (i.e., the {) to
1077     which to go back if the interval is invalid.  */
1078  const char *beg_interval;
1079
1080  /* Address of the place where a forward jump should go to the end of
1081     the containing expression.  Each alternative of an `or' -- except the
1082     last -- ends with a forward jump of this sort.  */
1083  unsigned char *fixup_alt_jump = 0;
1084
1085  /* Counts open-groups as they are encountered.  Remembered for the
1086     matching close-group on the compile stack, so the same register
1087     number is put in the stop_memory as the start_memory.  */
1088  regnum_t regnum = 0;
1089
1090#ifdef DEBUG
1091  DEBUG_PRINT1 ("\nCompiling pattern: ");
1092  if (debug)
1093    {
1094      unsigned debug_count;
1095
1096      for (debug_count = 0; debug_count < size; debug_count++)
1097        printchar (pattern[debug_count]);
1098      putchar ('\n');
1099    }
1100#endif /* DEBUG */
1101
1102  /* Initialize the compile stack.  */
1103  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1104  if (compile_stack.stack == NULL)
1105    return REG_ESPACE;
1106
1107  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1108  compile_stack.avail = 0;
1109
1110  /* Initialize the pattern buffer.  */
1111  bufp->syntax = syntax;
1112  bufp->fastmap_accurate = 0;
1113  bufp->not_bol = bufp->not_eol = 0;
1114
1115  /* Set `used' to zero, so that if we return an error, the pattern
1116     printer (for debugging) will think there's no pattern.  We reset it
1117     at the end.  */
1118  bufp->used = 0;
1119
1120  /* Always count groups, whether or not bufp->no_sub is set.  */
1121  bufp->re_nsub = 0;
1122
1123#if !defined (emacs) && !defined (SYNTAX_TABLE)
1124  /* Initialize the syntax table.  */
1125   init_syntax_once ();
1126#endif
1127
1128  if (bufp->allocated == 0)
1129    {
1130      if (bufp->buffer)
1131        { /* If zero allocated, but buffer is non-null, try to realloc
1132             enough space.  This loses if buffer's address is bogus, but
1133             that is the user's responsibility.  */
1134          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1135        }
1136      else
1137        { /* Caller did not allocate a buffer.  Do it for them.  */
1138          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1139        }
1140      if (!bufp->buffer) return REG_ESPACE;
1141
1142      bufp->allocated = INIT_BUF_SIZE;
1143    }
1144
1145  begalt = b = bufp->buffer;
1146
1147  /* Loop through the uncompiled pattern until we're at the end.  */
1148  while (p != pend)
1149    {
1150      PATFETCH (c);
1151
1152      switch (c)
1153        {
1154        case '^':
1155          {
1156            if (   /* If at start of pattern, it's an operator.  */
1157                   p == pattern + 1
1158                   /* If context independent, it's an operator.  */
1159                || syntax & RE_CONTEXT_INDEP_ANCHORS
1160                   /* Otherwise, depends on what's come before.  */
1161                || at_begline_loc_p (pattern, p, syntax))
1162              BUF_PUSH (begline);
1163            else
1164              goto normal_char;
1165          }
1166          break;
1167
1168
1169        case '$':
1170          {
1171            if (   /* If at end of pattern, it's an operator.  */
1172                   p == pend
1173                   /* If context independent, it's an operator.  */
1174                || syntax & RE_CONTEXT_INDEP_ANCHORS
1175                   /* Otherwise, depends on what's next.  */
1176                || at_endline_loc_p (p, pend, syntax))
1177               BUF_PUSH (endline);
1178             else
1179               goto normal_char;
1180           }
1181           break;
1182
1183
1184        case '+':
1185        case '?':
1186          if ((syntax & RE_BK_PLUS_QM)
1187              || (syntax & RE_LIMITED_OPS))
1188            goto normal_char;
1189        handle_plus:
1190        case '*':
1191          /* If there is no previous pattern... */
1192          if (!laststart)
1193            {
1194              if (syntax & RE_CONTEXT_INVALID_OPS)
1195                return REG_BADRPT;
1196              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1197                goto normal_char;
1198            }
1199
1200          {
1201            /* Are we optimizing this jump?  */
1202            boolean keep_string_p = false;
1203
1204            /* 1 means zero (many) matches is allowed.  */
1205            char zero_times_ok = 0, many_times_ok = 0;
1206
1207            /* If there is a sequence of repetition chars, collapse it
1208               down to just one (the right one).  We can't combine
1209               interval operators with these because of, e.g., `a{2}*',
1210               which should only match an even number of `a's.  */
1211
1212            for (;;)
1213              {
1214                zero_times_ok |= c != '+';
1215                many_times_ok |= c != '?';
1216
1217                if (p == pend)
1218                  break;
1219
1220                PATFETCH (c);
1221
1222                if (c == '*'
1223                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1224                  ;
1225
1226                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1227                  {
1228                    if (p == pend) return REG_EESCAPE;
1229
1230                    PATFETCH (c1);
1231                    if (!(c1 == '+' || c1 == '?'))
1232                      {
1233                        PATUNFETCH;
1234                        PATUNFETCH;
1235                        break;
1236                      }
1237
1238                    c = c1;
1239                  }
1240                else
1241                  {
1242                    PATUNFETCH;
1243                    break;
1244                  }
1245
1246                /* If we get here, we found another repeat character.  */
1247               }
1248
1249            /* Star, etc. applied to an empty pattern is equivalent
1250               to an empty pattern.  */
1251            if (!laststart)
1252              break;
1253
1254            /* Now we know whether or not zero matches is allowed
1255               and also whether or not two or more matches is allowed.  */
1256            if (many_times_ok)
1257              { /* More than one repetition is allowed, so put in at the
1258                   end a backward relative jump from `b' to before the next
1259                   jump we're going to put in below (which jumps from
1260                   laststart to after this jump).
1261
1262                   But if we are at the `*' in the exact sequence `.*\n',
1263                   insert an unconditional jump backwards to the .,
1264                   instead of the beginning of the loop.  This way we only
1265                   push a failure point once, instead of every time
1266                   through the loop.  */
1267                assert (p - 1 > pattern);
1268
1269                /* Allocate the space for the jump.  */
1270                GET_BUFFER_SPACE (3);
1271
1272                /* We know we are not at the first character of the pattern,
1273                   because laststart was nonzero.  And we've already
1274                   incremented `p', by the way, to be the character after
1275                   the `*'.  Do we have to do something analogous here
1276                   for null bytes, because of RE_DOT_NOT_NULL?  */
1277                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1278                    && zero_times_ok
1279                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1280                    && !(syntax & RE_DOT_NEWLINE))
1281                  { /* We have .*\n.  */
1282                    STORE_JUMP (jump, b, laststart);
1283                    keep_string_p = true;
1284                  }
1285                else
1286                  /* Anything else.  */
1287                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1288
1289                /* We've added more stuff to the buffer.  */
1290                b += 3;
1291              }
1292
1293            /* On failure, jump from laststart to b + 3, which will be the
1294               end of the buffer after this jump is inserted.  */
1295            GET_BUFFER_SPACE (3);
1296            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1297                                       : on_failure_jump,
1298                         laststart, b + 3);
1299            pending_exact = 0;
1300            b += 3;
1301
1302            if (!zero_times_ok)
1303              {
1304                /* At least one repetition is required, so insert a
1305                   `dummy_failure_jump' before the initial
1306                   `on_failure_jump' instruction of the loop. This
1307                   effects a skip over that instruction the first time
1308                   we hit that loop.  */
1309                GET_BUFFER_SPACE (3);
1310                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1311                b += 3;
1312              }
1313            }
1314          break;
1315
1316
1317        case '.':
1318          laststart = b;
1319          BUF_PUSH (anychar);
1320          break;
1321
1322
1323        case '[':
1324          {
1325            boolean had_char_class = false;
1326
1327            if (p == pend) return REG_EBRACK;
1328
1329            /* Ensure that we have enough space to push a charset: the
1330               opcode, the length count, and the bitset; 34 bytes in all.  */
1331            GET_BUFFER_SPACE (34);
1332
1333            laststart = b;
1334
1335            /* We test `*p == '^' twice, instead of using an if
1336               statement, so we only need one BUF_PUSH.  */
1337            BUF_PUSH (*p == '^' ? charset_not : charset);
1338            if (*p == '^')
1339              p++;
1340
1341            /* Remember the first position in the bracket expression.  */
1342            p1 = p;
1343
1344            /* Push the number of bytes in the bitmap.  */
1345            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1346
1347            /* Clear the whole map.  */
1348            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1349
1350            /* charset_not matches newline according to a syntax bit.  */
1351            if ((re_opcode_t) b[-2] == charset_not
1352                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1353              SET_LIST_BIT ('\n');
1354
1355            /* Read in characters and ranges, setting map bits.  */
1356            for (;;)
1357              {
1358                if (p == pend) return REG_EBRACK;
1359
1360                PATFETCH (c);
1361
1362                /* \ might escape characters inside [...] and [^...].  */
1363                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1364                  {
1365                    if (p == pend) return REG_EESCAPE;
1366
1367                    PATFETCH (c1);
1368                    SET_LIST_BIT (c1);
1369                    continue;
1370                  }
1371
1372                /* Could be the end of the bracket expression.  If it's
1373                   not (i.e., when the bracket expression is `[]' so
1374                   far), the ']' character bit gets set way below.  */
1375                if (c == ']' && p != p1 + 1)
1376                  break;
1377
1378                /* Look ahead to see if it's a range when the last thing
1379                   was a character class.  */
1380                if (had_char_class && c == '-' && *p != ']')
1381                  return REG_ERANGE;
1382
1383                /* Look ahead to see if it's a range when the last thing
1384                   was a character: if this is a hyphen not at the
1385                   beginning or the end of a list, then it's the range
1386                   operator.  */
1387                if (c == '-'
1388                    && !(p - 2 >= pattern && p[-2] == '[')
1389                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1390                    && *p != ']')
1391                  {
1392                    reg_errcode_t ret
1393                      = compile_range (&p, pend, translate, syntax, b);
1394                    if (ret != REG_NOERROR) return ret;
1395                  }
1396
1397                else if (p[0] == '-' && p[1] != ']')
1398                  { /* This handles ranges made up of characters only.  */
1399                    reg_errcode_t ret;
1400
1401                    /* Move past the `-'.  */
1402                    PATFETCH (c1);
1403
1404                    ret = compile_range (&p, pend, translate, syntax, b);
1405                    if (ret != REG_NOERROR) return ret;
1406                  }
1407
1408                /* See if we're at the beginning of a possible character
1409                   class.  */
1410
1411                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1412                  { /* Leave room for the null.  */
1413                    char str[CHAR_CLASS_MAX_LENGTH + 1];
1414
1415                    PATFETCH (c);
1416                    c1 = 0;
1417
1418                    /* If pattern is `[[:'.  */
1419                    if (p == pend) return REG_EBRACK;
1420
1421                    for (;;)
1422                      {
1423                        PATFETCH (c);
1424                        if (c == ':' || c == ']' || p == pend
1425                            || c1 == CHAR_CLASS_MAX_LENGTH)
1426                          break;
1427                        str[c1++] = c;
1428                      }
1429                    str[c1] = '\0';
1430
1431                    /* If isn't a word bracketed by `[:' and:`]':
1432                       undo the ending character, the letters, and leave
1433                       the leading `:' and `[' (but set bits for them).  */
1434                    if (c == ':' && *p == ']')
1435                      {
1436                        int ch;
1437                        boolean is_alnum = STREQ (str, "alnum");
1438                        boolean is_alpha = STREQ (str, "alpha");
1439                        boolean is_blank = STREQ (str, "blank");
1440                        boolean is_cntrl = STREQ (str, "cntrl");
1441                        boolean is_digit = STREQ (str, "digit");
1442                        boolean is_graph = STREQ (str, "graph");
1443                        boolean is_lower = STREQ (str, "lower");
1444                        boolean is_print = STREQ (str, "print");
1445                        boolean is_punct = STREQ (str, "punct");
1446                        boolean is_space = STREQ (str, "space");
1447                        boolean is_upper = STREQ (str, "upper");
1448                        boolean is_xdigit = STREQ (str, "xdigit");
1449
1450                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1451
1452                        /* Throw away the ] at the end of the character
1453                           class.  */
1454                        PATFETCH (c);
1455
1456                        if (p == pend) return REG_EBRACK;
1457
1458                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1459                          {
1460                            if (   (is_alnum  && ISALNUM (ch))
1461                                || (is_alpha  && ISALPHA (ch))
1462                                || (is_blank  && ISBLANK (ch))
1463                                || (is_cntrl  && ISCNTRL (ch))
1464                                || (is_digit  && ISDIGIT (ch))
1465                                || (is_graph  && ISGRAPH (ch))
1466                                || (is_lower  && ISLOWER (ch))
1467                                || (is_print  && ISPRINT (ch))
1468                                || (is_punct  && ISPUNCT (ch))
1469                                || (is_space  && ISSPACE (ch))
1470                                || (is_upper  && ISUPPER (ch))
1471                                || (is_xdigit && ISXDIGIT (ch)))
1472                            SET_LIST_BIT (ch);
1473                          }
1474                        had_char_class = true;
1475                      }
1476                    else
1477                      {
1478                        c1++;
1479                        while (c1--)
1480                          PATUNFETCH;
1481                        SET_LIST_BIT ('[');
1482                        SET_LIST_BIT (':');
1483                        had_char_class = false;
1484                      }
1485                  }
1486                else
1487                  {
1488                    had_char_class = false;
1489                    SET_LIST_BIT (c);
1490                  }
1491              }
1492
1493            /* Discard any (non)matching list bytes that are all 0 at the
1494               end of the map.  Decrease the map-length byte too.  */
1495            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1496              b[-1]--;
1497            b += b[-1];
1498          }
1499          break;
1500
1501
1502        case '(':
1503          if (syntax & RE_NO_BK_PARENS)
1504            goto handle_open;
1505          else
1506            goto normal_char;
1507
1508
1509        case ')':
1510          if (syntax & RE_NO_BK_PARENS)
1511            goto handle_close;
1512          else
1513            goto normal_char;
1514
1515
1516        case '\n':
1517          if (syntax & RE_NEWLINE_ALT)
1518            goto handle_alt;
1519          else
1520            goto normal_char;
1521
1522
1523        case '|':
1524          if (syntax & RE_NO_BK_VBAR)
1525            goto handle_alt;
1526          else
1527            goto normal_char;
1528
1529
1530        case '{':
1531           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1532             goto handle_interval;
1533           else
1534             goto normal_char;
1535
1536
1537        case '\\':
1538          if (p == pend) return REG_EESCAPE;
1539
1540          /* Do not translate the character after the \, so that we can
1541             distinguish, e.g., \B from \b, even if we normally would
1542             translate, e.g., B to b.  */
1543          PATFETCH_RAW (c);
1544
1545          switch (c)
1546            {
1547            case '(':
1548              if (syntax & RE_NO_BK_PARENS)
1549                goto normal_backslash;
1550
1551            handle_open:
1552              bufp->re_nsub++;
1553              regnum++;
1554
1555              if (COMPILE_STACK_FULL)
1556                {
1557                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
1558                            compile_stack_elt_t);
1559                  if (compile_stack.stack == NULL) return REG_ESPACE;
1560
1561                  compile_stack.size <<= 1;
1562                }
1563
1564              /* These are the values to restore when we hit end of this
1565                 group.  They are all relative offsets, so that if the
1566                 whole pattern moves because of realloc, they will still
1567                 be valid.  */
1568              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1569              COMPILE_STACK_TOP.fixup_alt_jump
1570                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1571              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1572              COMPILE_STACK_TOP.regnum = regnum;
1573
1574              /* We will eventually replace the 0 with the number of
1575                 groups inner to this one.  But do not push a
1576                 start_memory for groups beyond the last one we can
1577                 represent in the compiled pattern.  */
1578              if (regnum <= MAX_REGNUM)
1579                {
1580                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1581                  BUF_PUSH_3 (start_memory, regnum, 0);
1582                }
1583
1584              compile_stack.avail++;
1585
1586              fixup_alt_jump = 0;
1587              laststart = 0;
1588              begalt = b;
1589              /* If we've reached MAX_REGNUM groups, then this open
1590                 won't actually generate any code, so we'll have to
1591                 clear pending_exact explicitly.  */
1592              pending_exact = 0;
1593              break;
1594
1595
1596            case ')':
1597              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1598
1599              if (COMPILE_STACK_EMPTY)
1600              {
1601                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1602                  goto normal_backslash;
1603                else
1604                  return REG_ERPAREN;
1605              }
1606
1607            handle_close:
1608              if (fixup_alt_jump)
1609                { /* Push a dummy failure point at the end of the
1610                     alternative for a possible future
1611                     `pop_failure_jump' to pop.  See comments at
1612                     `push_dummy_failure' in `re_match_2'.  */
1613                  BUF_PUSH (push_dummy_failure);
1614
1615                  /* We allocated space for this jump when we assigned
1616                     to `fixup_alt_jump', in the `handle_alt' case below.  */
1617                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1618                }
1619
1620              /* See similar code for backslashed left paren above.  */
1621              if (COMPILE_STACK_EMPTY)
1622              {
1623                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1624                  goto normal_char;
1625                else
1626                  return REG_ERPAREN;
1627              }
1628
1629              /* Since we just checked for an empty stack above, this
1630                 ``can't happen''.  */
1631              assert (compile_stack.avail != 0);
1632              {
1633                /* We don't just want to restore into `regnum', because
1634                   later groups should continue to be numbered higher,
1635                   as in `(ab)c(de)' -- the second group is #2.  */
1636                regnum_t this_group_regnum;
1637
1638                compile_stack.avail--;
1639                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1640                fixup_alt_jump
1641                  = COMPILE_STACK_TOP.fixup_alt_jump
1642                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1643                    : 0;
1644                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1645                this_group_regnum = COMPILE_STACK_TOP.regnum;
1646                /* If we've reached MAX_REGNUM groups, then this open
1647                   won't actually generate any code, so we'll have to
1648                   clear pending_exact explicitly.  */
1649                pending_exact = 0;
1650
1651                /* We're at the end of the group, so now we know how many
1652                   groups were inside this one.  */
1653                if (this_group_regnum <= MAX_REGNUM)
1654                  {
1655                    unsigned char *inner_group_loc
1656                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1657
1658                    *inner_group_loc = regnum - this_group_regnum;
1659                    BUF_PUSH_3 (stop_memory, this_group_regnum,
1660                                regnum - this_group_regnum);
1661                  }
1662              }
1663              break;
1664
1665
1666            case '|':                                   /* `\|'.  */
1667              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1668                goto normal_backslash;
1669            handle_alt:
1670              if (syntax & RE_LIMITED_OPS)
1671                goto normal_char;
1672
1673              /* Insert before the previous alternative a jump which
1674                 jumps to this alternative if the former fails.  */
1675              GET_BUFFER_SPACE (3);
1676              INSERT_JUMP (on_failure_jump, begalt, b + 6);
1677              pending_exact = 0;
1678              b += 3;
1679
1680              /* The alternative before this one has a jump after it
1681                 which gets executed if it gets matched.  Adjust that
1682                 jump so it will jump to this alternative's analogous
1683                 jump (put in below, which in turn will jump to the next
1684                 (if any) alternative's such jump, etc.).  The last such
1685                 jump jumps to the correct final destination.  A picture:
1686                          _____ _____
1687                          |   | |   |
1688                          |   v |   v
1689                         a | b   | c
1690
1691                 If we are at `b', then fixup_alt_jump right now points to a
1692                 three-byte space after `a'.  We'll put in the jump, set
1693                 fixup_alt_jump to right after `b', and leave behind three
1694                 bytes which we'll fill in when we get to after `c'.  */
1695
1696              if (fixup_alt_jump)
1697                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1698
1699              /* Mark and leave space for a jump after this alternative,
1700                 to be filled in later either by next alternative or
1701                 when know we're at the end of a series of alternatives.  */
1702              fixup_alt_jump = b;
1703              GET_BUFFER_SPACE (3);
1704              b += 3;
1705
1706              laststart = 0;
1707              begalt = b;
1708              break;
1709
1710
1711            case '{':
1712              /* If \{ is a literal.  */
1713              if (!(syntax & RE_INTERVALS)
1714                     /* If we're at `\{' and it's not the open-interval
1715                        operator.  */
1716                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1717                  || (p - 2 == pattern  &&  p == pend))
1718                goto normal_backslash;
1719
1720            handle_interval:
1721              {
1722                /* If got here, then the syntax allows intervals.  */
1723
1724                /* At least (most) this many matches must be made.  */
1725                int lower_bound = -1, upper_bound = -1;
1726
1727                beg_interval = p - 1;
1728
1729                if (p == pend)
1730                  {
1731                    if (syntax & RE_NO_BK_BRACES)
1732                      goto unfetch_interval;
1733                    else
1734                      return REG_EBRACE;
1735                  }
1736
1737                GET_UNSIGNED_NUMBER (lower_bound);
1738
1739                if (c == ',')
1740                  {
1741                    GET_UNSIGNED_NUMBER (upper_bound);
1742                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1743                  }
1744                else
1745                  /* Interval such as `{1}' => match exactly once. */
1746                  upper_bound = lower_bound;
1747
1748                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1749                    || lower_bound > upper_bound)
1750                  {
1751                    if (syntax & RE_NO_BK_BRACES)
1752                      goto unfetch_interval;
1753                    else
1754                      return REG_BADBR;
1755                  }
1756
1757                if (!(syntax & RE_NO_BK_BRACES))
1758                  {
1759                    if (c != '\\') return REG_EBRACE;
1760
1761                    PATFETCH (c);
1762                  }
1763
1764                if (c != '}')
1765                  {
1766                    if (syntax & RE_NO_BK_BRACES)
1767                      goto unfetch_interval;
1768                    else
1769                      return REG_BADBR;
1770                  }
1771
1772                /* We just parsed a valid interval.  */
1773
1774                /* If it's invalid to have no preceding re.  */
1775                if (!laststart)
1776                  {
1777                    if (syntax & RE_CONTEXT_INVALID_OPS)
1778                      return REG_BADRPT;
1779                    else if (syntax & RE_CONTEXT_INDEP_OPS)
1780                      laststart = b;
1781                    else
1782                      goto unfetch_interval;
1783                  }
1784
1785                /* If the upper bound is zero, don't want to succeed at
1786                   all; jump from `laststart' to `b + 3', which will be
1787                   the end of the buffer after we insert the jump.  */
1788                 if (upper_bound == 0)
1789                   {
1790                     GET_BUFFER_SPACE (3);
1791                     INSERT_JUMP (jump, laststart, b + 3);
1792                     b += 3;
1793                   }
1794
1795                 /* Otherwise, we have a nontrivial interval.  When
1796                    we're all done, the pattern will look like:
1797                      set_number_at <jump count> <upper bound>
1798                      set_number_at <succeed_n count> <lower bound>
1799                      succeed_n <after jump addr> <succed_n count>
1800                      <body of loop>
1801                      jump_n <succeed_n addr> <jump count>
1802                    (The upper bound and `jump_n' are omitted if
1803                    `upper_bound' is 1, though.)  */
1804                 else
1805                   { /* If the upper bound is > 1, we need to insert
1806                        more at the end of the loop.  */
1807                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
1808
1809                     GET_BUFFER_SPACE (nbytes);
1810
1811                     /* Initialize lower bound of the `succeed_n', even
1812                        though it will be set during matching by its
1813                        attendant `set_number_at' (inserted next),
1814                        because `re_compile_fastmap' needs to know.
1815                        Jump to the `jump_n' we might insert below.  */
1816                     INSERT_JUMP2 (succeed_n, laststart,
1817                                   b + 5 + (upper_bound > 1) * 5,
1818                                   lower_bound);
1819                     b += 5;
1820
1821                     /* Code to initialize the lower bound.  Insert
1822                        before the `succeed_n'.  The `5' is the last two
1823                        bytes of this `set_number_at', plus 3 bytes of
1824                        the following `succeed_n'.  */
1825                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1826                     b += 5;
1827
1828                     if (upper_bound > 1)
1829                       { /* More than one repetition is allowed, so
1830                            append a backward jump to the `succeed_n'
1831                            that starts this interval.
1832
1833                            When we've reached this during matching,
1834                            we'll have matched the interval once, so
1835                            jump back only `upper_bound - 1' times.  */
1836                         STORE_JUMP2 (jump_n, b, laststart + 5,
1837                                      upper_bound - 1);
1838                         b += 5;
1839
1840                         /* The location we want to set is the second
1841                            parameter of the `jump_n'; that is `b-2' as
1842                            an absolute address.  `laststart' will be
1843                            the `set_number_at' we're about to insert;
1844                            `laststart+3' the number to set, the source
1845                            for the relative address.  But we are
1846                            inserting into the middle of the pattern --
1847                            so everything is getting moved up by 5.
1848                            Conclusion: (b - 2) - (laststart + 3) + 5,
1849                            i.e., b - laststart.
1850
1851                            We insert this at the beginning of the loop
1852                            so that if we fail during matching, we'll
1853                            reinitialize the bounds.  */
1854                         insert_op2 (set_number_at, laststart, b - laststart,
1855                                     upper_bound - 1, b);
1856                         b += 5;
1857                       }
1858                   }
1859                pending_exact = 0;
1860                beg_interval = NULL;
1861              }
1862              break;
1863
1864            unfetch_interval:
1865              /* If an invalid interval, match the characters as literals.  */
1866               assert (beg_interval);
1867               p = beg_interval;
1868               beg_interval = NULL;
1869
1870               /* normal_char and normal_backslash need `c'.  */
1871               PATFETCH (c);
1872
1873               if (!(syntax & RE_NO_BK_BRACES))
1874                 {
1875                   if (p > pattern  &&  p[-1] == '\\')
1876                     goto normal_backslash;
1877                 }
1878               goto normal_char;
1879
1880#ifdef emacs
1881            /* There is no way to specify the before_dot and after_dot
1882               operators.  rms says this is ok.  --karl  */
1883            case '=':
1884              BUF_PUSH (at_dot);
1885              break;
1886
1887            case 's':
1888              laststart = b;
1889              PATFETCH (c);
1890              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1891              break;
1892
1893            case 'S':
1894              laststart = b;
1895              PATFETCH (c);
1896              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1897              break;
1898#endif /* emacs */
1899
1900
1901            case 'w':
1902              laststart = b;
1903              BUF_PUSH (wordchar);
1904              break;
1905
1906
1907            case 'W':
1908              laststart = b;
1909              BUF_PUSH (notwordchar);
1910              break;
1911
1912
1913            case '<':
1914              BUF_PUSH (wordbeg);
1915              break;
1916
1917            case '>':
1918              BUF_PUSH (wordend);
1919              break;
1920
1921            case 'b':
1922              BUF_PUSH (wordbound);
1923              break;
1924
1925            case 'B':
1926              BUF_PUSH (notwordbound);
1927              break;
1928
1929            case '`':
1930              BUF_PUSH (begbuf);
1931              break;
1932
1933            case '\'':
1934              BUF_PUSH (endbuf);
1935              break;
1936
1937            case '1': case '2': case '3': case '4': case '5':
1938            case '6': case '7': case '8': case '9':
1939              if (syntax & RE_NO_BK_REFS)
1940                goto normal_char;
1941
1942              c1 = c - '0';
1943
1944              if (c1 > regnum)
1945                return REG_ESUBREG;
1946
1947              /* Can't back reference to a subexpression if inside of it.  */
1948              if (group_in_compile_stack (compile_stack, c1))
1949                goto normal_char;
1950
1951              laststart = b;
1952              BUF_PUSH_2 (duplicate, c1);
1953              break;
1954
1955
1956            case '+':
1957            case '?':
1958              if (syntax & RE_BK_PLUS_QM)
1959                goto handle_plus;
1960              else
1961                goto normal_backslash;
1962
1963            default:
1964            normal_backslash:
1965              /* You might think it would be useful for \ to mean
1966                 not to translate; but if we don't translate it
1967                 it will never match anything.  */
1968              c = TRANSLATE (c);
1969              goto normal_char;
1970            }
1971          break;
1972
1973
1974        default:
1975        /* Expects the character in `c'.  */
1976        normal_char:
1977              /* If no exactn currently being built.  */
1978          if (!pending_exact
1979
1980              /* If last exactn not at current position.  */
1981              || pending_exact + *pending_exact + 1 != b
1982
1983              /* We have only one byte following the exactn for the count.  */
1984              || *pending_exact == (1 << BYTEWIDTH) - 1
1985
1986              /* If followed by a repetition operator.  */
1987              || *p == '*' || *p == '^'
1988              || ((syntax & RE_BK_PLUS_QM)
1989                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
1990                  : (*p == '+' || *p == '?'))
1991              || ((syntax & RE_INTERVALS)
1992                  && ((syntax & RE_NO_BK_BRACES)
1993                      ? *p == '{'
1994                      : (p[0] == '\\' && p[1] == '{'))))
1995            {
1996              /* Start building a new exactn.  */
1997
1998              laststart = b;
1999
2000              BUF_PUSH_2 (exactn, 0);
2001              pending_exact = b - 1;
2002            }
2003
2004          BUF_PUSH (c);
2005          (*pending_exact)++;
2006          break;
2007        } /* switch (c) */
2008    } /* while p != pend */
2009
2010
2011  /* Through the pattern now.  */
2012
2013  if (fixup_alt_jump)
2014    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2015
2016  if (!COMPILE_STACK_EMPTY)
2017    return REG_EPAREN;
2018
2019  free (compile_stack.stack);
2020
2021  /* We have succeeded; set the length of the buffer.  */
2022  bufp->used = b - bufp->buffer;
2023
2024#ifdef DEBUG
2025  if (debug)
2026    {
2027      DEBUG_PRINT1 ("\nCompiled pattern: ");
2028      print_compiled_pattern (bufp);
2029    }
2030#endif /* DEBUG */
2031
2032  return REG_NOERROR;
2033} /* regex_compile */
2034\f
2035/* Subroutines for `regex_compile'.  */
2036
2037/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2038
2039static void
2040store_op1 (op, loc, arg)
2041    re_opcode_t op;
2042    unsigned char *loc;
2043    int arg;
2044{
2045  *loc = (unsigned char) op;
2046  STORE_NUMBER (loc + 1, arg);
2047}
2048
2049
2050/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2051
2052static void
2053store_op2 (op, loc, arg1, arg2)
2054    re_opcode_t op;
2055    unsigned char *loc;
2056    int arg1, arg2;
2057{
2058  *loc = (unsigned char) op;
2059  STORE_NUMBER (loc + 1, arg1);
2060  STORE_NUMBER (loc + 3, arg2);
2061}
2062
2063
2064/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2065   for OP followed by two-byte integer parameter ARG.  */
2066
2067static void
2068insert_op1 (op, loc, arg, end)
2069    re_opcode_t op;
2070    unsigned char *loc;
2071    int arg;
2072    unsigned char *end;
2073{
2074  register unsigned char *pfrom = end;
2075  register unsigned char *pto = end + 3;
2076
2077  while (pfrom != loc)
2078    *--pto = *--pfrom;
2079
2080  store_op1 (op, loc, arg);
2081}
2082
2083
2084/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2085
2086static void
2087insert_op2 (op, loc, arg1, arg2, end)
2088    re_opcode_t op;
2089    unsigned char *loc;
2090    int arg1, arg2;
2091    unsigned char *end;
2092{
2093  register unsigned char *pfrom = end;
2094  register unsigned char *pto = end + 5;
2095
2096  while (pfrom != loc)
2097    *--pto = *--pfrom;
2098
2099  store_op2 (op, loc, arg1, arg2);
2100}
2101
2102
2103/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2104   after an alternative or a begin-subexpression.  We assume there is at
2105   least one character before the ^.  */
2106
2107static boolean
2108at_begline_loc_p (pattern, p, syntax)
2109    const char *pattern, *p;
2110    reg_syntax_t syntax;
2111{
2112  const char *prev = p - 2;
2113  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2114
2115  return
2116       /* After a subexpression?  */
2117       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2118       /* After an alternative?  */
2119    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2120}
2121
2122
2123/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2124   at least one character after the $, i.e., `P < PEND'.  */
2125
2126static boolean
2127at_endline_loc_p (p, pend, syntax)
2128    const char *p, *pend;
2129    int syntax;
2130{
2131  const char *next = p;
2132  boolean next_backslash = *next == '\\';
2133  const char *next_next = p + 1 < pend ? p + 1 : NULL;
2134
2135  return
2136       /* Before a subexpression?  */
2137       (syntax & RE_NO_BK_PARENS ? *next == ')'
2138        : next_backslash && next_next && *next_next == ')')
2139       /* Before an alternative?  */
2140    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2141        : next_backslash && next_next && *next_next == '|');
2142}
2143
2144
2145/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2146   false if it's not.  */
2147
2148static boolean
2149group_in_compile_stack (compile_stack, regnum)
2150    compile_stack_type compile_stack;
2151    regnum_t regnum;
2152{
2153  int this_element;
2154
2155  for (this_element = compile_stack.avail - 1;
2156       this_element >= 0;
2157       this_element--)
2158    if (compile_stack.stack[this_element].regnum == regnum)
2159      return true;
2160
2161  return false;
2162}
2163
2164
2165/* Read the ending character of a range (in a bracket expression) from the
2166   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2167   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2168   Then we set the translation of all bits between the starting and
2169   ending characters (inclusive) in the compiled pattern B.
2170
2171   Return an error code.
2172
2173   We use these short variable names so we can use the same macros as
2174   `regex_compile' itself.  */
2175
2176static reg_errcode_t
2177compile_range (p_ptr, pend, translate, syntax, b)
2178    const char **p_ptr, *pend;
2179    char *translate;
2180    reg_syntax_t syntax;
2181    unsigned char *b;
2182{
2183  unsigned this_char;
2184
2185  const char *p = *p_ptr;
2186  int range_start, range_end;
2187
2188  if (p == pend)
2189    return REG_ERANGE;
2190
2191  /* Even though the pattern is a signed `char *', we need to fetch
2192     with unsigned char *'s; if the high bit of the pattern character
2193     is set, the range endpoints will be negative if we fetch using a
2194     signed char *.
2195
2196     We also want to fetch the endpoints without translating them; the
2197     appropriate translation is done in the bit-setting loop below.  */
2198  range_start = ((unsigned char *) p)[-2];
2199  range_end   = ((unsigned char *) p)[0];
2200
2201  /* Have to increment the pointer into the pattern string, so the
2202     caller isn't still at the ending character.  */
2203  (*p_ptr)++;
2204
2205  /* If the start is after the end, the range is empty.  */
2206  if (range_start > range_end)
2207    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2208
2209  /* Here we see why `this_char' has to be larger than an `unsigned
2210     char' -- the range is inclusive, so if `range_end' == 0xff
2211     (assuming 8-bit characters), we would otherwise go into an infinite
2212     loop, since all characters <= 0xff.  */
2213  for (this_char = range_start; this_char <= range_end; this_char++)
2214    {
2215      SET_LIST_BIT (TRANSLATE (this_char));
2216    }
2217
2218  return REG_NOERROR;
2219}
2220\f
2221/* Failure stack declarations and macros; both re_compile_fastmap and
2222   re_match_2 use a failure stack.  These have to be macros because of
2223   REGEX_ALLOCATE.  */
2224
2225
2226/* Number of failure points for which to initially allocate space
2227   when matching.  If this number is exceeded, we allocate more
2228   space, so it is not a hard limit.  */
2229#ifndef INIT_FAILURE_ALLOC
2230#define INIT_FAILURE_ALLOC 5
2231#endif
2232
2233/* Roughly the maximum number of failure points on the stack.  Would be
2234   exactly that if always used MAX_FAILURE_SPACE each time we failed.
2235   This is a variable only so users of regex can assign to it; we never
2236   change it ourselves.  */
2237int re_max_failures = 2000;
2238
2239typedef const unsigned char *fail_stack_elt_t;
2240
2241typedef struct
2242{
2243  fail_stack_elt_t *stack;
2244  unsigned size;
2245  unsigned avail;                       /* Offset of next open position.  */
2246} fail_stack_type;
2247
2248#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2249#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2250#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2251#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2252
2253
2254/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2255
2256#define INIT_FAIL_STACK()                                               \
2257  do {                                                                  \
2258    fail_stack.stack = (fail_stack_elt_t *)                             \
2259      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
2260                                                                        \
2261    if (fail_stack.stack == NULL)                                       \
2262      return -2;                                                        \
2263                                                                        \
2264    fail_stack.size = INIT_FAILURE_ALLOC;                               \
2265    fail_stack.avail = 0;                                               \
2266  } while (0)
2267
2268
2269/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2270
2271   Return 1 if succeeds, and 0 if either ran out of memory
2272   allocating space for it or it was already too large.
2273
2274   REGEX_REALLOCATE requires `destination' be declared.   */
2275
2276#define DOUBLE_FAIL_STACK(fail_stack)                                   \
2277  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS              \
2278   ? 0                                                                  \
2279   : ((fail_stack).stack = (fail_stack_elt_t *)                         \
2280        REGEX_REALLOCATE ((fail_stack).stack,                           \
2281          (fail_stack).size * sizeof (fail_stack_elt_t),                \
2282          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
2283                                                                        \
2284      (fail_stack).stack == NULL                                        \
2285      ? 0                                                               \
2286      : ((fail_stack).size <<= 1,                                       \
2287         1)))
2288
2289
2290/* Push PATTERN_OP on FAIL_STACK.
2291
2292   Return 1 if was able to do so and 0 if ran out of memory allocating
2293   space to do so.  */
2294#define PUSH_PATTERN_OP(pattern_op, fail_stack)                         \
2295  ((FAIL_STACK_FULL ()                                                  \
2296    && !DOUBLE_FAIL_STACK (fail_stack))                                 \
2297    ? 0                                                                 \
2298    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,           \
2299       1))
2300
2301/* This pushes an item onto the failure stack.  Must be a four-byte
2302   value.  Assumes the variable `fail_stack'.  Probably should only
2303   be called from within `PUSH_FAILURE_POINT'.  */
2304#define PUSH_FAILURE_ITEM(item)                                         \
2305  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2306
2307/* The complement operation.  Assumes `fail_stack' is nonempty.  */
2308#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2309
2310/* Used to omit pushing failure point id's when we're not debugging.  */
2311#ifdef DEBUG
2312#define DEBUG_PUSH PUSH_FAILURE_ITEM
2313#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2314#else
2315#define DEBUG_PUSH(item)
2316#define DEBUG_POP(item_addr)
2317#endif
2318
2319
2320/* Push the information about the state we will need
2321   if we ever fail back to it.
2322
2323   Requires variables fail_stack, regstart, regend, reg_info, and
2324   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2325   declared.
2326
2327   Does `return FAILURE_CODE' if runs out of memory.  */
2328
2329#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
2330  do {                                                                  \
2331    char *destination;                                                  \
2332    /* Must be int, so when we don't save any registers, the arithmetic \
2333       of 0 + -1 isn't done as unsigned.  */                            \
2334    int this_reg;                                                       \
2335                                                                        \
2336    DEBUG_STATEMENT (failure_id++);                                     \
2337    DEBUG_STATEMENT (nfailure_points_pushed++);                         \
2338    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
2339    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2340    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2341                                                                        \
2342    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
2343    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
2344                                                                        \
2345    /* Ensure we have enough space allocated for what we will push.  */ \
2346    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
2347      {                                                                 \
2348        if (!DOUBLE_FAIL_STACK (fail_stack))                    \
2349          return failure_code;                                          \
2350                                                                        \
2351        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
2352                       (fail_stack).size);                              \
2353        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2354      }                                                                 \
2355                                                                        \
2356    /* Push the info, starting with the registers.  */                  \
2357    DEBUG_PRINT1 ("\n");                                                \
2358                                                                        \
2359    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
2360         this_reg++)                                                    \
2361      {                                                                 \
2362        DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
2363        DEBUG_STATEMENT (num_regs_pushed++);                            \
2364                                                                        \
2365        DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
2366        PUSH_FAILURE_ITEM (regstart[this_reg]);                         \
2367                                                                        \
2368        DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
2369        PUSH_FAILURE_ITEM (regend[this_reg]);                           \
2370                                                                        \
2371        DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
2372        DEBUG_PRINT2 (" match_null=%d",                                 \
2373                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
2374        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
2375        DEBUG_PRINT2 (" matched_something=%d",                          \
2376                      MATCHED_SOMETHING (reg_info[this_reg]));          \
2377        DEBUG_PRINT2 (" ever_matched=%d",                               \
2378                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));     \
2379        DEBUG_PRINT1 ("\n");                                            \
2380        PUSH_FAILURE_ITEM (reg_info[this_reg].word);                    \
2381      }                                                                 \
2382                                                                        \
2383    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2384    PUSH_FAILURE_ITEM (lowest_active_reg);                              \
2385                                                                        \
2386    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2387    PUSH_FAILURE_ITEM (highest_active_reg);                             \
2388                                                                        \
2389    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);           \
2390    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
2391    PUSH_FAILURE_ITEM (pattern_place);                                  \
2392                                                                        \
2393    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
2394    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2395                                 size2);                                \
2396    DEBUG_PRINT1 ("'\n");                                               \
2397    PUSH_FAILURE_ITEM (string_place);                                   \
2398                                                                        \
2399    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
2400    DEBUG_PUSH (failure_id);                                            \
2401  } while (0)
2402
2403/* This is the number of items that are pushed and popped on the stack
2404   for each register.  */
2405#define NUM_REG_ITEMS  3
2406
2407/* Individual items aside from the registers.  */
2408#ifdef DEBUG
2409#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2410#else
2411#define NUM_NONREG_ITEMS 4
2412#endif
2413
2414/* We push at most this many items on the stack.  */
2415#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2416
2417/* We actually push this many items.  */
2418#define NUM_FAILURE_ITEMS                                               \
2419  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS         \
2420    + NUM_NONREG_ITEMS)
2421
2422/* How many items can still be added to the stack without overflowing it.  */
2423#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2424
2425
2426/* Pops what PUSH_FAIL_STACK pushes.
2427
2428   We restore into the parameters, all of which should be lvalues:
2429     STR -- the saved data position.
2430     PAT -- the saved pattern position.
2431     LOW_REG, HIGH_REG -- the highest and lowest active registers.
2432     REGSTART, REGEND -- arrays of string positions.
2433     REG_INFO -- array of information about each subexpression.
2434
2435   Also assumes the variables `fail_stack' and (if debugging), `bufp',
2436   `pend', `string1', `size1', `string2', and `size2'.  */
2437
2438#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2439{                                                                       \
2440  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
2441  int this_reg;                                                         \
2442  const unsigned char *string_temp;                                     \
2443                                                                        \
2444  assert (!FAIL_STACK_EMPTY ());                                        \
2445                                                                        \
2446  /* Remove failure points and point to how many regs pushed.  */       \
2447  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
2448  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
2449  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
2450                                                                        \
2451  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
2452                                                                        \
2453  DEBUG_POP (&failure_id);                                              \
2454  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
2455                                                                        \
2456  /* If the saved string location is NULL, it came from an              \
2457     on_failure_keep_string_jump opcode, and we want to throw away the  \
2458     saved NULL, thus retaining our current position in the string.  */ \
2459  string_temp = POP_FAILURE_ITEM ();                                    \
2460  if (string_temp != NULL)                                              \
2461    str = (const char *) string_temp;                                   \
2462                                                                        \
2463  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
2464  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
2465  DEBUG_PRINT1 ("'\n");                                                 \
2466                                                                        \
2467  pat = (unsigned char *) POP_FAILURE_ITEM ();                          \
2468  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                       \
2469  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
2470                                                                        \
2471  /* Restore register info.  */                                         \
2472  high_reg = (unsigned) POP_FAILURE_ITEM ();                            \
2473  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
2474                                                                        \
2475  low_reg = (unsigned) POP_FAILURE_ITEM ();                             \
2476  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
2477                                                                        \
2478  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)            \
2479    {                                                                   \
2480      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                 \
2481                                                                        \
2482      reg_info[this_reg].word = POP_FAILURE_ITEM ();                    \
2483      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);          \
2484                                                                        \
2485      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();            \
2486      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);             \
2487                                                                        \
2488      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();          \
2489      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);         \
2490    }                                                                   \
2491                                                                        \
2492  DEBUG_STATEMENT (nfailure_points_popped++);                           \
2493} /* POP_FAILURE_POINT */
2494\f
2495/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2496   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2497   characters can start a string that matches the pattern.  This fastmap
2498   is used by re_search to skip quickly over impossible starting points.
2499
2500   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2501   area as BUFP->fastmap.
2502
2503   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2504   the pattern buffer.
2505
2506   Returns 0 if we succeed, -2 if an internal error.   */
2507
2508int
2509re_compile_fastmap (bufp)
2510     struct re_pattern_buffer *bufp;
2511{
2512  int j, k;
2513  fail_stack_type fail_stack;
2514#ifndef REGEX_MALLOC
2515  char *destination;
2516#endif
2517  /* We don't push any register information onto the failure stack.  */
2518  unsigned num_regs = 0;
2519
2520  register char *fastmap = bufp->fastmap;
2521  unsigned char *pattern = bufp->buffer;
2522  unsigned long size = bufp->used;
2523  const unsigned char *p = pattern;
2524  register unsigned char *pend = pattern + size;
2525
2526  /* Assume that each path through the pattern can be null until
2527     proven otherwise.  We set this false at the bottom of switch
2528     statement, to which we get only if a particular path doesn't
2529     match the empty string.  */
2530  boolean path_can_be_null = true;
2531
2532  /* We aren't doing a `succeed_n' to begin with.  */
2533  boolean succeed_n_p = false;
2534
2535  assert (fastmap != NULL && p != NULL);
2536
2537  INIT_FAIL_STACK ();
2538  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2539  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
2540  bufp->can_be_null = 0;
2541
2542  while (p != pend || !FAIL_STACK_EMPTY ())
2543    {
2544      if (p == pend)
2545        {
2546          bufp->can_be_null |= path_can_be_null;
2547
2548          /* Reset for next path.  */
2549          path_can_be_null = true;
2550
2551          p = fail_stack.stack[--fail_stack.avail];
2552        }
2553
2554      /* We should never be about to go beyond the end of the pattern.  */
2555      assert (p < pend);
2556
2557#ifdef SWITCH_ENUM_BUG
2558      switch ((int) ((re_opcode_t) *p++))
2559#else
2560      switch ((re_opcode_t) *p++)
2561#endif
2562        {
2563
2564        /* I guess the idea here is to simply not bother with a fastmap
2565           if a backreference is used, since it's too hard to figure out
2566           the fastmap for the corresponding group.  Setting
2567           `can_be_null' stops `re_search_2' from using the fastmap, so
2568           that is all we do.  */
2569        case duplicate:
2570          bufp->can_be_null = 1;
2571          return 0;
2572
2573
2574      /* Following are the cases which match a character.  These end
2575         with `break'.  */
2576
2577        case exactn:
2578          fastmap[p[1]] = 1;
2579          break;
2580
2581
2582        case charset:
2583          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2584            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2585              fastmap[j] = 1;
2586          break;
2587
2588
2589        case charset_not:
2590          /* Chars beyond end of map must be allowed.  */
2591          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2592            fastmap[j] = 1;
2593
2594          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2595            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2596              fastmap[j] = 1;
2597          break;
2598
2599
2600        case wordchar:
2601          for (j = 0; j < (1 << BYTEWIDTH); j++)
2602            if (SYNTAX (j) == Sword)
2603              fastmap[j] = 1;
2604          break;
2605
2606
2607        case notwordchar:
2608          for (j = 0; j < (1 << BYTEWIDTH); j++)
2609            if (SYNTAX (j) != Sword)
2610              fastmap[j] = 1;
2611          break;
2612
2613
2614        case anychar:
2615          /* `.' matches anything ...  */
2616          for (j = 0; j < (1 << BYTEWIDTH); j++)
2617            fastmap[j] = 1;
2618
2619          /* ... except perhaps newline.  */
2620          if (!(bufp->syntax & RE_DOT_NEWLINE))
2621            fastmap['\n'] = 0;
2622
2623          /* Return if we have already set `can_be_null'; if we have,
2624             then the fastmap is irrelevant.  Something's wrong here.  */
2625          else if (bufp->can_be_null)
2626            return 0;
2627
2628          /* Otherwise, have to check alternative paths.  */
2629          break;
2630
2631
2632#ifdef emacs
2633        case syntaxspec:
2634          k = *p++;
2635          for (j = 0; j < (1 << BYTEWIDTH); j++)
2636            if (SYNTAX (j) == (enum syntaxcode) k)
2637              fastmap[j] = 1;
2638          break;
2639
2640
2641        case notsyntaxspec:
2642          k = *p++;
2643          for (j = 0; j < (1 << BYTEWIDTH); j++)
2644            if (SYNTAX (j) != (enum syntaxcode) k)
2645              fastmap[j] = 1;
2646          break;
2647
2648
2649      /* All cases after this match the empty string.  These end with
2650         `continue'.  */
2651
2652
2653        case before_dot:
2654        case at_dot:
2655        case after_dot:
2656          continue;
2657#endif /* not emacs */
2658
2659
2660        case no_op:
2661        case begline:
2662        case endline:
2663        case begbuf:
2664        case endbuf:
2665        case wordbound:
2666        case notwordbound:
2667        case wordbeg:
2668        case wordend:
2669        case push_dummy_failure:
2670          continue;
2671
2672
2673        case jump_n:
2674        case pop_failure_jump:
2675        case maybe_pop_jump:
2676        case jump:
2677        case jump_past_alt:
2678        case dummy_failure_jump:
2679          EXTRACT_NUMBER_AND_INCR (j, p);
2680          p += j;
2681          if (j > 0)
2682            continue;
2683
2684          /* Jump backward implies we just went through the body of a
2685             loop and matched nothing.  Opcode jumped to should be
2686             `on_failure_jump' or `succeed_n'.  Just treat it like an
2687             ordinary jump.  For a * loop, it has pushed its failure
2688             point already; if so, discard that as redundant.  */
2689          if ((re_opcode_t) *p != on_failure_jump
2690              && (re_opcode_t) *p != succeed_n)
2691            continue;
2692
2693          p++;
2694          EXTRACT_NUMBER_AND_INCR (j, p);
2695          p += j;
2696
2697          /* If what's on the stack is where we are now, pop it.  */
2698          if (!FAIL_STACK_EMPTY ()
2699              && fail_stack.stack[fail_stack.avail - 1] == p)
2700            fail_stack.avail--;
2701
2702          continue;
2703
2704
2705        case on_failure_jump:
2706        case on_failure_keep_string_jump:
2707        handle_on_failure_jump:
2708          EXTRACT_NUMBER_AND_INCR (j, p);
2709
2710          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2711             end of the pattern.  We don't want to push such a point,
2712             since when we restore it above, entering the switch will
2713             increment `p' past the end of the pattern.  We don't need
2714             to push such a point since we obviously won't find any more
2715             fastmap entries beyond `pend'.  Such a pattern can match
2716             the null string, though.  */
2717          if (p + j < pend)
2718            {
2719              if (!PUSH_PATTERN_OP (p + j, fail_stack))
2720                return -2;
2721            }
2722          else
2723            bufp->can_be_null = 1;
2724
2725          if (succeed_n_p)
2726            {
2727              EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
2728              succeed_n_p = false;
2729            }
2730
2731          continue;
2732
2733
2734        case succeed_n:
2735          /* Get to the number of times to succeed.  */
2736          p += 2;
2737
2738          /* Increment p past the n for when k != 0.  */
2739          EXTRACT_NUMBER_AND_INCR (k, p);
2740          if (k == 0)
2741            {
2742              p -= 4;
2743              succeed_n_p = true;  /* Spaghetti code alert.  */
2744              goto handle_on_failure_jump;
2745            }
2746          continue;
2747
2748
2749        case set_number_at:
2750          p += 4;
2751          continue;
2752
2753
2754        case start_memory:
2755        case stop_memory:
2756          p += 2;
2757          continue;
2758
2759
2760        default:
2761          abort (); /* We have listed all the cases.  */
2762        } /* switch *p++ */
2763
2764      /* Getting here means we have found the possible starting
2765         characters for one path of the pattern -- and that the empty
2766         string does not match.  We need not follow this path further.
2767         Instead, look at the next alternative (remembered on the
2768         stack), or quit if no more.  The test at the top of the loop
2769         does these things.  */
2770      path_can_be_null = false;
2771      p = pend;
2772    } /* while p */
2773
2774  /* Set `can_be_null' for the last path (also the first path, if the
2775     pattern is empty).  */
2776  bufp->can_be_null |= path_can_be_null;
2777  return 0;
2778} /* re_compile_fastmap */
2779\f
2780/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2781   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2782   this memory for recording register information.  STARTS and ENDS
2783   must be allocated using the malloc library routine, and must each
2784   be at least NUM_REGS * sizeof (regoff_t) bytes long.
2785
2786   If NUM_REGS == 0, then subsequent matches should allocate their own
2787   register data.
2788
2789   Unless this function is called, the first search or match using
2790   PATTERN_BUFFER will allocate its own register data, without
2791   freeing the old data.  */
2792
2793void
2794re_set_registers (bufp, regs, num_regs, starts, ends)
2795    struct re_pattern_buffer *bufp;
2796    struct re_registers *regs;
2797    unsigned num_regs;
2798    regoff_t *starts, *ends;
2799{
2800  if (num_regs)
2801    {
2802      bufp->regs_allocated = REGS_REALLOCATE;
2803      regs->num_regs = num_regs;
2804      regs->start = starts;
2805      regs->end = ends;
2806    }
2807  else
2808    {
2809      bufp->regs_allocated = REGS_UNALLOCATED;
2810      regs->num_regs = 0;
2811      regs->start = regs->end = (regoff_t) 0;
2812    }
2813}
2814\f
2815/* Searching routines.  */
2816
2817/* Like re_search_2, below, but only one string is specified, and
2818   doesn't let you say where to stop matching. */
2819
2820int
2821re_search (bufp, string, size, startpos, range, regs)
2822     struct re_pattern_buffer *bufp;
2823     const char *string;
2824     int size, startpos, range;
2825     struct re_registers *regs;
2826{
2827  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2828                      regs, size);
2829}
2830
2831
2832/* Using the compiled pattern in BUFP->buffer, first tries to match the
2833   virtual concatenation of STRING1 and STRING2, starting first at index
2834   STARTPOS, then at STARTPOS + 1, and so on.
2835
2836   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2837
2838   RANGE is how far to scan while trying to match.  RANGE = 0 means try
2839   only at STARTPOS; in general, the last start tried is STARTPOS +
2840   RANGE.
2841
2842   In REGS, return the indices of the virtual concatenation of STRING1
2843   and STRING2 that matched the entire BUFP->buffer and its contained
2844   subexpressions.
2845
2846   Do not consider matching one past the index STOP in the virtual
2847   concatenation of STRING1 and STRING2.
2848
2849   We return either the position in the strings at which the match was
2850   found, -1 if no match, or -2 if error (such as failure
2851   stack overflow).  */
2852
2853int
2854re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2855     struct re_pattern_buffer *bufp;
2856     const char *string1, *string2;
2857     int size1, size2;
2858     int startpos;
2859     int range;
2860     struct re_registers *regs;
2861     int stop;
2862{
2863  int val;
2864  register char *fastmap = bufp->fastmap;
2865  register char *translate = bufp->translate;
2866  int total_size = size1 + size2;
2867  int endpos = startpos + range;
2868
2869  /* Check for out-of-range STARTPOS.  */
2870  if (startpos < 0 || startpos > total_size)
2871    return -1;
2872
2873  /* Fix up RANGE if it might eventually take us outside
2874     the virtual concatenation of STRING1 and STRING2.  */
2875  if (endpos < -1)
2876    range = -1 - startpos;
2877  else if (endpos > total_size)
2878    range = total_size - startpos;
2879
2880  /* If the search isn't to be a backwards one, don't waste time in a
2881     search for a pattern that must be anchored.  */
2882  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2883    {
2884      if (startpos > 0)
2885        return -1;
2886      else
2887        range = 1;
2888    }
2889
2890  /* Update the fastmap now if not correct already.  */
2891  if (fastmap && !bufp->fastmap_accurate)
2892    if (re_compile_fastmap (bufp) == -2)
2893      return -2;
2894
2895  /* Loop through the string, looking for a place to start matching.  */
2896  for (;;)
2897    {
2898      /* If a fastmap is supplied, skip quickly over characters that
2899         cannot be the start of a match.  If the pattern can match the
2900         null string, however, we don't need to skip characters; we want
2901         the first null string.  */
2902      if (fastmap && startpos < total_size && !bufp->can_be_null)
2903        {
2904          if (range > 0)        /* Searching forwards.  */
2905            {
2906              register const char *d;
2907              register int lim = 0;
2908              int irange = range;
2909
2910              if (startpos < size1 && startpos + range >= size1)
2911                lim = range - (size1 - startpos);
2912
2913              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2914
2915              /* Written out as an if-else to avoid testing `translate'
2916                 inside the loop.  */
2917              if (translate)
2918                while (range > lim
2919                       && !fastmap[(unsigned char)
2920                                   translate[(unsigned char) *d++]])
2921                  range--;
2922              else
2923                while (range > lim && !fastmap[(unsigned char) *d++])
2924                  range--;
2925
2926              startpos += irange - range;
2927            }
2928          else                          /* Searching backwards.  */
2929            {
2930              register char c = (size1 == 0 || startpos >= size1
2931                                 ? string2[startpos - size1]
2932                                 : string1[startpos]);
2933
2934              if (!fastmap[(unsigned char) TRANSLATE (c)])
2935                goto advance;
2936            }
2937        }
2938
2939      /* If can't match the null string, and that's all we have left, fail.  */
2940      if (range >= 0 && startpos == total_size && fastmap
2941          && !bufp->can_be_null)
2942        return -1;
2943
2944      val = re_match_2 (bufp, string1, size1, string2, size2,
2945                        startpos, regs, stop);
2946      if (val >= 0)
2947        return startpos;
2948
2949      if (val == -2)
2950        return -2;
2951
2952    advance:
2953      if (!range)
2954        break;
2955      else if (range > 0)
2956        {
2957          range--;
2958          startpos++;
2959        }
2960      else
2961        {
2962          range++;
2963          startpos--;
2964        }
2965    }
2966  return -1;
2967} /* re_search_2 */
2968\f
2969/* Declarations and macros for re_match_2.  */
2970
2971static int bcmp_translate ();
2972static boolean alt_match_null_string_p (),
2973               common_op_match_null_string_p (),
2974               group_match_null_string_p ();
2975
2976/* Structure for per-register (a.k.a. per-group) information.
2977   This must not be longer than one word, because we push this value
2978   onto the failure stack.  Other register information, such as the
2979   starting and ending positions (which are addresses), and the list of
2980   inner groups (which is a bits list) are maintained in separate
2981   variables.
2982
2983   We are making a (strictly speaking) nonportable assumption here: that
2984   the compiler will pack our bit fields into something that fits into
2985   the type of `word', i.e., is something that fits into one item on the
2986   failure stack.  */
2987typedef union
2988{
2989  fail_stack_elt_t word;
2990  struct
2991  {
2992      /* This field is one if this group can match the empty string,
2993         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
2994#define MATCH_NULL_UNSET_VALUE 3
2995    unsigned match_null_string_p : 2;
2996    unsigned is_active : 1;
2997    unsigned matched_something : 1;
2998    unsigned ever_matched_something : 1;
2999  } bits;
3000} register_info_type;
3001
3002#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3003#define IS_ACTIVE(R)  ((R).bits.is_active)
3004#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3005#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3006
3007
3008/* Call this when have matched a real character; it sets `matched' flags
3009   for the subexpressions which we are currently inside.  Also records
3010   that those subexprs have matched.  */
3011#define SET_REGS_MATCHED()                                              \
3012  do                                                                    \
3013    {                                                                   \
3014      unsigned r;                                                       \
3015      for (r = lowest_active_reg; r <= highest_active_reg; r++)         \
3016        {                                                               \
3017          MATCHED_SOMETHING (reg_info[r])                               \
3018            = EVER_MATCHED_SOMETHING (reg_info[r])                      \
3019            = 1;                                                        \
3020        }                                                               \
3021    }                                                                   \
3022  while (0)
3023
3024
3025/* This converts PTR, a pointer into one of the search strings `string1'
3026   and `string2' into an offset from the beginning of that string.  */
3027#define POINTER_TO_OFFSET(ptr)                                          \
3028  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3029
3030/* Registers are set to a sentinel when they haven't yet matched.  */
3031#define REG_UNSET_VALUE ((char *) -1)
3032#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3033
3034
3035/* Macros for dealing with the split strings in re_match_2.  */
3036
3037#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3038
3039/* Call before fetching a character with *d.  This switches over to
3040   string2 if necessary.  */
3041#define PREFETCH()                                                      \
3042  while (d == dend)                                                     \
3043    {                                                                   \
3044      /* End of string2 => fail.  */                                    \
3045      if (dend == end_match_2)                                          \
3046        goto fail;                                                      \
3047      /* End of string1 => advance to string2.  */                      \
3048      d = string2;                                                      \
3049      dend = end_match_2;                                               \
3050    }
3051
3052
3053/* Test if at very beginning or at very end of the virtual concatenation
3054   of `string1' and `string2'.  If only one string, it's `string2'.  */
3055#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3056#define AT_STRINGS_END(d) ((d) == end2)
3057
3058
3059/* Test if D points to a character which is word-constituent.  We have
3060   two special cases to check for: if past the end of string1, look at
3061   the first character in string2; and if before the beginning of
3062   string2, look at the last character in string1.  */
3063#define WORDCHAR_P(d)                                                   \
3064  (SYNTAX ((d) == end1 ? *string2                                       \
3065           : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3066   == Sword)
3067
3068/* Test if the character before D and the one at D differ with respect
3069   to being word-constituent.  */
3070#define AT_WORD_BOUNDARY(d)                                             \
3071  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3072   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3073
3074
3075/* Free everything we malloc.  */
3076#ifdef REGEX_MALLOC
3077#define FREE_VAR(var) if (var) free (var); var = NULL
3078#define FREE_VARIABLES()                                                \
3079  do {                                                                  \
3080    FREE_VAR (fail_stack.stack);                                        \
3081    FREE_VAR (regstart);                                                \
3082    FREE_VAR (regend);                                                  \
3083    FREE_VAR (old_regstart);                                            \
3084    FREE_VAR (old_regend);                                              \
3085    FREE_VAR (best_regstart);                                           \
3086    FREE_VAR (best_regend);                                             \
3087    FREE_VAR (reg_info);                                                \
3088    FREE_VAR (reg_dummy);                                               \
3089    FREE_VAR (reg_info_dummy);                                          \
3090  } while (0)
3091#else /* not REGEX_MALLOC */
3092/* Some MIPS systems (at least) want this to free alloca'd storage.  */
3093#define FREE_VARIABLES() alloca (0)
3094#endif /* not REGEX_MALLOC */
3095
3096
3097/* These values must meet several constraints.  They must not be valid
3098   register values; since we have a limit of 255 registers (because
3099   we use only one byte in the pattern for the register number), we can
3100   use numbers larger than 255.  They must differ by 1, because of
3101   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3102   be larger than the value for the highest register, so we do not try
3103   to actually save any registers when none are active.  */
3104#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3105#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3106\f
3107/* Matching routines.  */
3108
3109#ifndef emacs   /* Emacs never uses this.  */
3110/* re_match is like re_match_2 except it takes only a single string.  */
3111
3112int
3113re_match (bufp, string, size, pos, regs)
3114     struct re_pattern_buffer *bufp;
3115     const char *string;
3116     int size, pos;
3117     struct re_registers *regs;
3118 {
3119  return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3120}
3121#endif /* not emacs */
3122
3123
3124/* re_match_2 matches the compiled pattern in BUFP against the
3125   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3126   and SIZE2, respectively).  We start matching at POS, and stop
3127   matching at STOP.
3128
3129   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3130   store offsets for the substring each group matched in REGS.  See the
3131   documentation for exactly how many groups we fill.
3132
3133   We return -1 if no match, -2 if an internal error (such as the
3134   failure stack overflowing).  Otherwise, we return the length of the
3135   matched substring.  */
3136
3137int
3138re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3139     struct re_pattern_buffer *bufp;
3140     const char *string1, *string2;
3141     int size1, size2;
3142     int pos;
3143     struct re_registers *regs;
3144     int stop;
3145{
3146  /* General temporaries.  */
3147  int mcnt;
3148  unsigned char *p1;
3149
3150  /* Just past the end of the corresponding string.  */
3151  const char *end1, *end2;
3152
3153  /* Pointers into string1 and string2, just past the last characters in
3154     each to consider matching.  */
3155  const char *end_match_1, *end_match_2;
3156
3157  /* Where we are in the data, and the end of the current string.  */
3158  const char *d, *dend;
3159
3160  /* Where we are in the pattern, and the end of the pattern.  */
3161  unsigned char *p = bufp->buffer;
3162  register unsigned char *pend = p + bufp->used;
3163
3164  /* We use this to map every character in the string.  */
3165  char *translate = bufp->translate;
3166
3167  /* Failure point stack.  Each place that can handle a failure further
3168     down the line pushes a failure point on this stack.  It consists of
3169     restart, regend, and reg_info for all registers corresponding to
3170     the subexpressions we're currently inside, plus the number of such
3171     registers, and, finally, two char *'s.  The first char * is where
3172     to resume scanning the pattern; the second one is where to resume
3173     scanning the strings.  If the latter is zero, the failure point is
3174     a ``dummy''; if a failure happens and the failure point is a dummy,
3175     it gets discarded and the next next one is tried.  */
3176  fail_stack_type fail_stack;
3177#ifdef DEBUG
3178  static unsigned failure_id = 0;
3179  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3180#endif
3181
3182  /* We fill all the registers internally, independent of what we
3183     return, for use in backreferences.  The number here includes
3184     an element for register zero.  */
3185  unsigned num_regs = bufp->re_nsub + 1;
3186
3187  /* The currently active registers.  */
3188  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3189  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3190
3191  /* Information on the contents of registers. These are pointers into
3192     the input strings; they record just what was matched (on this
3193     attempt) by a subexpression part of the pattern, that is, the
3194     regnum-th regstart pointer points to where in the pattern we began
3195     matching and the regnum-th regend points to right after where we
3196     stopped matching the regnum-th subexpression.  (The zeroth register
3197     keeps track of what the whole pattern matches.)  */
3198  const char **regstart = NULL, **regend = NULL;
3199
3200  /* If a group that's operated upon by a repetition operator fails to
3201     match anything, then the register for its start will need to be
3202     restored because it will have been set to wherever in the string we
3203     are when we last see its open-group operator.  Similarly for a
3204     register's end.  */
3205  const char **old_regstart = NULL, **old_regend = NULL;
3206
3207  /* The is_active field of reg_info helps us keep track of which (possibly
3208     nested) subexpressions we are currently in. The matched_something
3209     field of reg_info[reg_num] helps us tell whether or not we have
3210     matched any of the pattern so far this time through the reg_num-th
3211     subexpression.  These two fields get reset each time through any
3212     loop their register is in.  */
3213  register_info_type *reg_info = NULL;
3214
3215  /* The following record the register info as found in the above
3216     variables when we find a match better than any we've seen before.
3217     This happens as we backtrack through the failure points, which in
3218     turn happens only if we have not yet matched the entire string. */
3219  unsigned best_regs_set = false;
3220  const char **best_regstart = NULL, **best_regend = NULL;
3221
3222  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3223     allocate space for that if we're not allocating space for anything
3224     else (see below).  Also, we never need info about register 0 for
3225     any of the other register vectors, and it seems rather a kludge to
3226     treat `best_regend' differently than the rest.  So we keep track of
3227     the end of the best match so far in a separate variable.  We
3228     initialize this to NULL so that when we backtrack the first time
3229     and need to test it, it's not garbage.  */
3230  const char *match_end = NULL;
3231
3232  /* Used when we pop values we don't care about.  */
3233  const char **reg_dummy = NULL;
3234  register_info_type *reg_info_dummy = NULL;
3235
3236#ifdef DEBUG
3237  /* Counts the total number of registers pushed.  */
3238  unsigned num_regs_pushed = 0;
3239#endif
3240
3241  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3242
3243  INIT_FAIL_STACK ();
3244
3245  /* Do not bother to initialize all the register variables if there are
3246     no groups in the pattern, as it takes a fair amount of time.  If
3247     there are groups, we include space for register 0 (the whole
3248     pattern), even though we never use it, since it simplifies the
3249     array indexing.  We should fix this.  */
3250  if (bufp->re_nsub)
3251    {
3252      regstart = REGEX_TALLOC (num_regs, const char *);
3253      regend = REGEX_TALLOC (num_regs, const char *);
3254      old_regstart = REGEX_TALLOC (num_regs, const char *);
3255      old_regend = REGEX_TALLOC (num_regs, const char *);
3256      best_regstart = REGEX_TALLOC (num_regs, const char *);
3257      best_regend = REGEX_TALLOC (num_regs, const char *);
3258      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3259      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3260      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3261
3262      if (!(regstart && regend && old_regstart && old_regend && reg_info
3263            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3264        {
3265          FREE_VARIABLES ();
3266          return -2;
3267        }
3268    }
3269#ifdef REGEX_MALLOC
3270  else
3271    {
3272      /* We must initialize all our variables to NULL, so that
3273         `FREE_VARIABLES' doesn't try to free them.  */
3274      regstart = regend = old_regstart = old_regend = best_regstart
3275        = best_regend = reg_dummy = NULL;
3276      reg_info = reg_info_dummy = (register_info_type *) NULL;
3277    }
3278#endif /* REGEX_MALLOC */
3279
3280  /* The starting position is bogus.  */
3281  if (pos < 0 || pos > size1 + size2)
3282    {
3283      FREE_VARIABLES ();
3284      return -1;
3285    }
3286
3287  /* Initialize subexpression text positions to -1 to mark ones that no
3288     start_memory/stop_memory has been seen for. Also initialize the
3289     register information struct.  */
3290  for (mcnt = 1; mcnt < num_regs; mcnt++)
3291    {
3292      regstart[mcnt] = regend[mcnt]
3293        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3294
3295      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3296      IS_ACTIVE (reg_info[mcnt]) = 0;
3297      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3298      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3299    }
3300
3301  /* We move `string1' into `string2' if the latter's empty -- but not if
3302     `string1' is null.  */
3303  if (size2 == 0 && string1 != NULL)
3304    {
3305      string2 = string1;
3306      size2 = size1;
3307      string1 = 0;
3308      size1 = 0;
3309    }
3310  end1 = string1 + size1;
3311  end2 = string2 + size2;
3312
3313  /* Compute where to stop matching, within the two strings.  */
3314  if (stop <= size1)
3315    {
3316      end_match_1 = string1 + stop;
3317      end_match_2 = string2;
3318    }
3319  else
3320    {
3321      end_match_1 = end1;
3322      end_match_2 = string2 + stop - size1;
3323    }
3324
3325  /* `p' scans through the pattern as `d' scans through the data.
3326     `dend' is the end of the input string that `d' points within.  `d'
3327     is advanced into the following input string whenever necessary, but
3328     this happens before fetching; therefore, at the beginning of the
3329     loop, `d' can be pointing at the end of a string, but it cannot
3330     equal `string2'.  */
3331  if (size1 > 0 && pos <= size1)
3332    {
3333      d = string1 + pos;
3334      dend = end_match_1;
3335    }
3336  else
3337    {
3338      d = string2 + pos - size1;
3339      dend = end_match_2;
3340    }
3341
3342  DEBUG_PRINT1 ("The compiled pattern is: ");
3343  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3344  DEBUG_PRINT1 ("The string to match is: `");
3345  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3346  DEBUG_PRINT1 ("'\n");
3347
3348  /* This loops over pattern commands.  It exits by returning from the
3349     function if the match is complete, or it drops through if the match
3350     fails at this starting point in the input data.  */
3351  for (;;)
3352    {
3353      DEBUG_PRINT2 ("\n0x%x: ", p);
3354
3355      if (p == pend)
3356        { /* End of pattern means we might have succeeded.  */
3357          DEBUG_PRINT1 ("end of pattern ... ");
3358
3359          /* If we haven't matched the entire string, and we want the
3360             longest match, try backtracking.  */
3361          if (d != end_match_2)
3362            {
3363              DEBUG_PRINT1 ("backtracking.\n");
3364
3365              if (!FAIL_STACK_EMPTY ())
3366                { /* More failure points to try.  */
3367                  boolean same_str_p = (FIRST_STRING_P (match_end)
3368                                        == MATCHING_IN_FIRST_STRING);
3369
3370                  /* If exceeds best match so far, save it.  */
3371                  if (!best_regs_set
3372                      || (same_str_p && d > match_end)
3373                      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3374                    {
3375                      best_regs_set = true;
3376                      match_end = d;
3377
3378                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3379
3380                      for (mcnt = 1; mcnt < num_regs; mcnt++)
3381                        {
3382                          best_regstart[mcnt] = regstart[mcnt];
3383                          best_regend[mcnt] = regend[mcnt];
3384                        }
3385                    }
3386                  goto fail;
3387                }
3388
3389              /* If no failure points, don't restore garbage.  */
3390              else if (best_regs_set)
3391                {
3392                restore_best_regs:
3393                  /* Restore best match.  It may happen that `dend ==
3394                     end_match_1' while the restored d is in string2.
3395                     For example, the pattern `x.*y.*z' against the
3396                     strings `x-' and `y-z-', if the two strings are
3397                     not consecutive in memory.  */
3398                  DEBUG_PRINT1 ("Restoring best registers.\n");
3399
3400                  d = match_end;
3401                  dend = ((d >= string1 && d <= end1)
3402                           ? end_match_1 : end_match_2);
3403
3404                  for (mcnt = 1; mcnt < num_regs; mcnt++)
3405                    {
3406                      regstart[mcnt] = best_regstart[mcnt];
3407                      regend[mcnt] = best_regend[mcnt];
3408                    }
3409                }
3410            } /* d != end_match_2 */
3411
3412          DEBUG_PRINT1 ("Accepting match.\n");
3413
3414          /* If caller wants register contents data back, do it.  */
3415          if (regs && !bufp->no_sub)
3416            {
3417              /* Have the register data arrays been allocated?  */
3418              if (bufp->regs_allocated == REGS_UNALLOCATED)
3419                { /* No.  So allocate them with malloc.  We need one
3420                     extra element beyond `num_regs' for the `-1' marker
3421                     GNU code uses.  */
3422                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3423                  regs->start = TALLOC (regs->num_regs, regoff_t);
3424                  regs->end = TALLOC (regs->num_regs, regoff_t);
3425                  if (regs->start == NULL || regs->end == NULL)
3426                    return -2;
3427                  bufp->regs_allocated = REGS_REALLOCATE;
3428                }
3429              else if (bufp->regs_allocated == REGS_REALLOCATE)
3430                { /* Yes.  If we need more elements than were already
3431                     allocated, reallocate them.  If we need fewer, just
3432                     leave it alone.  */
3433                  if (regs->num_regs < num_regs + 1)
3434                    {
3435                      regs->num_regs = num_regs + 1;
3436                      RETALLOC (regs->start, regs->num_regs, regoff_t);
3437                      RETALLOC (regs->end, regs->num_regs, regoff_t);
3438                      if (regs->start == NULL || regs->end == NULL)
3439                        return -2;
3440                    }
3441                }
3442              else
3443                assert (bufp->regs_allocated == REGS_FIXED);
3444
3445              /* Convert the pointer data in `regstart' and `regend' to
3446                 indices.  Register zero has to be set differently,
3447                 since we haven't kept track of any info for it.  */
3448              if (regs->num_regs > 0)
3449                {
3450                  regs->start[0] = pos;
3451                  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3452                                  : d - string2 + size1);
3453                }
3454
3455              /* Go through the first `min (num_regs, regs->num_regs)'
3456                 registers, since that is all we initialized.  */
3457              for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3458                {
3459                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3460                    regs->start[mcnt] = regs->end[mcnt] = -1;
3461                  else
3462                    {
3463                      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3464                      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3465                    }
3466                }
3467
3468              /* If the regs structure we return has more elements than
3469                 were in the pattern, set the extra elements to -1.  If
3470                 we (re)allocated the registers, this is the case,
3471                 because we always allocate enough to have at least one
3472                 -1 at the end.  */
3473              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3474                regs->start[mcnt] = regs->end[mcnt] = -1;
3475            } /* regs && !bufp->no_sub */
3476
3477          FREE_VARIABLES ();
3478          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3479                        nfailure_points_pushed, nfailure_points_popped,
3480                        nfailure_points_pushed - nfailure_points_popped);
3481          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3482
3483          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3484                            ? string1
3485                            : string2 - size1);
3486
3487          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3488
3489          return mcnt;
3490        }
3491
3492      /* Otherwise match next pattern command.  */
3493#ifdef SWITCH_ENUM_BUG
3494      switch ((int) ((re_opcode_t) *p++))
3495#else
3496      switch ((re_opcode_t) *p++)
3497#endif
3498        {
3499        /* Ignore these.  Used to ignore the n of succeed_n's which
3500           currently have n == 0.  */
3501        case no_op:
3502          DEBUG_PRINT1 ("EXECUTING no_op.\n");
3503          break;
3504
3505
3506        /* Match the next n pattern characters exactly.  The following
3507           byte in the pattern defines n, and the n bytes after that
3508           are the characters to match.  */
3509        case exactn:
3510          mcnt = *p++;
3511          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3512
3513          /* This is written out as an if-else so we don't waste time
3514             testing `translate' inside the loop.  */
3515          if (translate)
3516            {
3517              do
3518                {
3519                  PREFETCH ();
3520                  if (translate[(unsigned char) *d++] != (char) *p++)
3521                    goto fail;
3522                }
3523              while (--mcnt);
3524            }
3525          else
3526            {
3527              do
3528                {
3529                  PREFETCH ();
3530                  if (*d++ != (char) *p++) goto fail;
3531                }
3532              while (--mcnt);
3533            }
3534          SET_REGS_MATCHED ();
3535          break;
3536
3537
3538        /* Match any character except possibly a newline or a null.  */
3539        case anychar:
3540          DEBUG_PRINT1 ("EXECUTING anychar.\n");
3541
3542          PREFETCH ();
3543
3544          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3545              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3546            goto fail;
3547
3548          SET_REGS_MATCHED ();
3549          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3550          d++;
3551          break;
3552
3553
3554        case charset:
3555        case charset_not:
3556          {
3557            register unsigned char c;
3558            boolean not = (re_opcode_t) *(p - 1) == charset_not;
3559
3560            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3561
3562            PREFETCH ();
3563            c = TRANSLATE (*d); /* The character to match.  */
3564
3565            /* Cast to `unsigned' instead of `unsigned char' in case the
3566               bit list is a full 32 bytes long.  */
3567            if (c < (unsigned) (*p * BYTEWIDTH)
3568                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3569              not = !not;
3570
3571            p += 1 + *p;
3572
3573            if (!not) goto fail;
3574
3575            SET_REGS_MATCHED ();
3576            d++;
3577            break;
3578          }
3579
3580
3581        /* The beginning of a group is represented by start_memory.
3582           The arguments are the register number in the next byte, and the
3583           number of groups inner to this one in the next.  The text
3584           matched within the group is recorded (in the internal
3585           registers data structure) under the register number.  */
3586        case start_memory:
3587          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3588
3589          /* Find out if this group can match the empty string.  */
3590          p1 = p;               /* To send to group_match_null_string_p.  */
3591
3592          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3593            REG_MATCH_NULL_STRING_P (reg_info[*p])
3594              = group_match_null_string_p (&p1, pend, reg_info);
3595
3596          /* Save the position in the string where we were the last time
3597             we were at this open-group operator in case the group is
3598             operated upon by a repetition operator, e.g., with `(a*)*b'
3599             against `ab'; then we want to ignore where we are now in
3600             the string in case this attempt to match fails.  */
3601          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3602                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3603                             : regstart[*p];
3604          DEBUG_PRINT2 ("  old_regstart: %d\n",
3605                         POINTER_TO_OFFSET (old_regstart[*p]));
3606
3607          regstart[*p] = d;
3608          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3609
3610          IS_ACTIVE (reg_info[*p]) = 1;
3611          MATCHED_SOMETHING (reg_info[*p]) = 0;
3612
3613          /* This is the new highest active register.  */
3614          highest_active_reg = *p;
3615
3616          /* If nothing was active before, this is the new lowest active
3617             register.  */
3618          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3619            lowest_active_reg = *p;
3620
3621          /* Move past the register number and inner group count.  */
3622          p += 2;
3623          break;
3624
3625
3626        /* The stop_memory opcode represents the end of a group.  Its
3627           arguments are the same as start_memory's: the register
3628           number, and the number of inner groups.  */
3629        case stop_memory:
3630          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3631
3632          /* We need to save the string position the last time we were at
3633             this close-group operator in case the group is operated
3634             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3635             against `aba'; then we want to ignore where we are now in
3636             the string in case this attempt to match fails.  */
3637          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3638                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
3639                           : regend[*p];
3640          DEBUG_PRINT2 ("      old_regend: %d\n",
3641                         POINTER_TO_OFFSET (old_regend[*p]));
3642
3643          regend[*p] = d;
3644          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3645
3646          /* This register isn't active anymore.  */
3647          IS_ACTIVE (reg_info[*p]) = 0;
3648
3649          /* If this was the only register active, nothing is active
3650             anymore.  */
3651          if (lowest_active_reg == highest_active_reg)
3652            {
3653              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3654              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3655            }
3656          else
3657            { /* We must scan for the new highest active register, since
3658                 it isn't necessarily one less than now: consider
3659                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3660                 new highest active register is 1.  */
3661              unsigned char r = *p - 1;
3662              while (r > 0 && !IS_ACTIVE (reg_info[r]))
3663                r--;
3664
3665              /* If we end up at register zero, that means that we saved
3666                 the registers as the result of an `on_failure_jump', not
3667                 a `start_memory', and we jumped to past the innermost
3668                 `stop_memory'.  For example, in ((.)*) we save
3669                 registers 1 and 2 as a result of the *, but when we pop
3670                 back to the second ), we are at the stop_memory 1.
3671                 Thus, nothing is active.  */
3672              if (r == 0)
3673                {
3674                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3675                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3676                }
3677              else
3678                highest_active_reg = r;
3679            }
3680
3681          /* If just failed to match something this time around with a
3682             group that's operated on by a repetition operator, try to
3683             force exit from the ``loop'', and restore the register
3684             information for this group that we had before trying this
3685             last match.  */
3686          if ((!MATCHED_SOMETHING (reg_info[*p])
3687               || (re_opcode_t) p[-3] == start_memory)
3688              && (p + 2) < pend)
3689            {
3690              boolean is_a_jump_n = false;
3691
3692              p1 = p + 2;
3693              mcnt = 0;
3694              switch ((re_opcode_t) *p1++)
3695                {
3696                  case jump_n:
3697                    is_a_jump_n = true;
3698                  case pop_failure_jump:
3699                  case maybe_pop_jump:
3700                  case jump:
3701                  case dummy_failure_jump:
3702                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3703                    if (is_a_jump_n)
3704                      p1 += 2;
3705                    break;
3706
3707                  default:
3708                    /* do nothing */ ;
3709                }
3710              p1 += mcnt;
3711
3712              /* If the next operation is a jump backwards in the pattern
3713                 to an on_failure_jump right before the start_memory
3714                 corresponding to this stop_memory, exit from the loop
3715                 by forcing a failure after pushing on the stack the
3716                 on_failure_jump's jump in the pattern, and d.  */
3717              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3718                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3719                {
3720                  /* If this group ever matched anything, then restore
3721                     what its registers were before trying this last
3722                     failed match, e.g., with `(a*)*b' against `ab' for
3723                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
3724                     against `aba' for regend[3].
3725
3726                     Also restore the registers for inner groups for,
3727                     e.g., `((a*)(b*))*' against `aba' (register 3 would
3728                     otherwise get trashed).  */
3729
3730                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3731                    {
3732                      unsigned r;
3733
3734                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3735
3736                      /* Restore this and inner groups' (if any) registers.  */
3737                      for (r = *p; r < *p + *(p + 1); r++)
3738                        {
3739                          regstart[r] = old_regstart[r];
3740
3741                          /* xx why this test?  */
3742                          if ((int) old_regend[r] >= (int) regstart[r])
3743                            regend[r] = old_regend[r];
3744                        }
3745                    }
3746                  p1++;
3747                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3748                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3749
3750                  goto fail;
3751                }
3752            }
3753
3754          /* Move past the register number and the inner group count.  */
3755          p += 2;
3756          break;
3757
3758
3759        /* \<digit> has been turned into a `duplicate' command which is
3760           followed by the numeric value of <digit> as the register number.  */
3761        case duplicate:
3762          {
3763            register const char *d2, *dend2;
3764            int regno = *p++;   /* Get which register to match against.  */
3765            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3766
3767            /* Can't back reference a group which we've never matched.  */
3768            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3769              goto fail;
3770
3771            /* Where in input to try to start matching.  */
3772            d2 = regstart[regno];
3773
3774            /* Where to stop matching; if both the place to start and
3775               the place to stop matching are in the same string, then
3776               set to the place to stop, otherwise, for now have to use
3777               the end of the first string.  */
3778
3779            dend2 = ((FIRST_STRING_P (regstart[regno])
3780                      == FIRST_STRING_P (regend[regno]))
3781                     ? regend[regno] : end_match_1);
3782            for (;;)
3783              {
3784                /* If necessary, advance to next segment in register
3785                   contents.  */
3786                while (d2 == dend2)
3787                  {
3788                    if (dend2 == end_match_2) break;
3789                    if (dend2 == regend[regno]) break;
3790
3791                    /* End of string1 => advance to string2. */
3792                    d2 = string2;
3793                    dend2 = regend[regno];
3794                  }
3795                /* At end of register contents => success */
3796                if (d2 == dend2) break;
3797
3798                /* If necessary, advance to next segment in data.  */
3799                PREFETCH ();
3800
3801                /* How many characters left in this segment to match.  */
3802                mcnt = dend - d;
3803
3804                /* Want how many consecutive characters we can match in
3805                   one shot, so, if necessary, adjust the count.  */
3806                if (mcnt > dend2 - d2)
3807                  mcnt = dend2 - d2;
3808
3809                /* Compare that many; failure if mismatch, else move
3810                   past them.  */
3811                if (translate
3812                    ? bcmp_translate (d, d2, mcnt, translate)
3813                    : bcmp (d, d2, mcnt))
3814                  goto fail;
3815                d += mcnt, d2 += mcnt;
3816              }
3817          }
3818          break;
3819
3820
3821        /* begline matches the empty string at the beginning of the string
3822           (unless `not_bol' is set in `bufp'), and, if
3823           `newline_anchor' is set, after newlines.  */
3824        case begline:
3825          DEBUG_PRINT1 ("EXECUTING begline.\n");
3826
3827          if (AT_STRINGS_BEG (d))
3828            {
3829              if (!bufp->not_bol) break;
3830            }
3831          else if (d[-1] == '\n' && bufp->newline_anchor)
3832            {
3833              break;
3834            }
3835          /* In all other cases, we fail.  */
3836          goto fail;
3837
3838
3839        /* endline is the dual of begline.  */
3840        case endline:
3841          DEBUG_PRINT1 ("EXECUTING endline.\n");
3842
3843          if (AT_STRINGS_END (d))
3844            {
3845              if (!bufp->not_eol) break;
3846            }
3847
3848          /* We have to ``prefetch'' the next character.  */
3849          else if ((d == end1 ? *string2 : *d) == '\n'
3850                   && bufp->newline_anchor)
3851            {
3852              break;
3853            }
3854          goto fail;
3855
3856
3857        /* Match at the very beginning of the data.  */
3858        case begbuf:
3859          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3860          if (AT_STRINGS_BEG (d))
3861            break;
3862          goto fail;
3863
3864
3865        /* Match at the very end of the data.  */
3866        case endbuf:
3867          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3868          if (AT_STRINGS_END (d))
3869            break;
3870          goto fail;
3871
3872
3873        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3874           pushes NULL as the value for the string on the stack.  Then
3875           `pop_failure_point' will keep the current value for the
3876           string, instead of restoring it.  To see why, consider
3877           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3878           then the . fails against the \n.  But the next thing we want
3879           to do is match the \n against the \n; if we restored the
3880           string value, we would be back at the foo.
3881
3882           Because this is used only in specific cases, we don't need to
3883           check all the things that `on_failure_jump' does, to make
3884           sure the right things get saved on the stack.  Hence we don't
3885           share its code.  The only reason to push anything on the
3886           stack at all is that otherwise we would have to change
3887           `anychar's code to do something besides goto fail in this
3888           case; that seems worse than this.  */
3889        case on_failure_keep_string_jump:
3890          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3891
3892          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3893          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3894
3895          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3896          break;
3897
3898
3899        /* Uses of on_failure_jump:
3900
3901           Each alternative starts with an on_failure_jump that points
3902           to the beginning of the next alternative.  Each alternative
3903           except the last ends with a jump that in effect jumps past
3904           the rest of the alternatives.  (They really jump to the
3905           ending jump of the following alternative, because tensioning
3906           these jumps is a hassle.)
3907
3908           Repeats start with an on_failure_jump that points past both
3909           the repetition text and either the following jump or
3910           pop_failure_jump back to this on_failure_jump.  */
3911        case on_failure_jump:
3912        on_failure:
3913          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3914
3915          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3916          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3917
3918          /* If this on_failure_jump comes right before a group (i.e.,
3919             the original * applied to a group), save the information
3920             for that group and all inner ones, so that if we fail back
3921             to this point, the group's information will be correct.
3922             For example, in \(a*\)*\1, we need the preceding group,
3923             and in \(\(a*\)b*\)\2, we need the inner group.  */
3924
3925          /* We can't use `p' to check ahead because we push
3926             a failure point to `p + mcnt' after we do this.  */
3927          p1 = p;
3928
3929          /* We need to skip no_op's before we look for the
3930             start_memory in case this on_failure_jump is happening as
3931             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3932             against aba.  */
3933          while (p1 < pend && (re_opcode_t) *p1 == no_op)
3934            p1++;
3935
3936          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3937            {
3938              /* We have a new highest active register now.  This will
3939                 get reset at the start_memory we are about to get to,
3940                 but we will have saved all the registers relevant to
3941                 this repetition op, as described above.  */
3942              highest_active_reg = *(p1 + 1) + *(p1 + 2);
3943              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3944                lowest_active_reg = *(p1 + 1);
3945            }
3946
3947          DEBUG_PRINT1 (":\n");
3948          PUSH_FAILURE_POINT (p + mcnt, d, -2);
3949          break;
3950
3951
3952        /* A smart repeat ends with `maybe_pop_jump'.
3953           We change it to either `pop_failure_jump' or `jump'.  */
3954        case maybe_pop_jump:
3955          EXTRACT_NUMBER_AND_INCR (mcnt, p);
3956          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3957          {
3958            register unsigned char *p2 = p;
3959
3960            /* Compare the beginning of the repeat with what in the
3961               pattern follows its end. If we can establish that there
3962               is nothing that they would both match, i.e., that we
3963               would have to backtrack because of (as in, e.g., `a*a')
3964               then we can change to pop_failure_jump, because we'll
3965               never have to backtrack.
3966
3967               This is not true in the case of alternatives: in
3968               `(a|ab)*' we do need to backtrack to the `ab' alternative
3969               (e.g., if the string was `ab').  But instead of trying to
3970               detect that here, the alternative has put on a dummy
3971               failure point which is what we will end up popping.  */
3972
3973            /* Skip over open/close-group commands.  */
3974            while (p2 + 2 < pend
3975                   && ((re_opcode_t) *p2 == stop_memory
3976                       || (re_opcode_t) *p2 == start_memory))
3977              p2 += 3;                  /* Skip over args, too.  */
3978
3979            /* If we're at the end of the pattern, we can change.  */
3980            if (p2 == pend)
3981              {
3982                /* Consider what happens when matching ":\(.*\)"
3983                   against ":/".  I don't really understand this code
3984                   yet.  */
3985                p[-3] = (unsigned char) pop_failure_jump;
3986                DEBUG_PRINT1
3987                  ("  End of pattern: change to `pop_failure_jump'.\n");
3988              }
3989
3990            else if ((re_opcode_t) *p2 == exactn
3991                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
3992              {
3993                register unsigned char c
3994                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
3995                p1 = p + mcnt;
3996
3997                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
3998                   to the `maybe_finalize_jump' of this case.  Examine what
3999                   follows.  */
4000                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4001                  {
4002                    p[-3] = (unsigned char) pop_failure_jump;
4003                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4004                                  c, p1[5]);
4005                  }
4006
4007                else if ((re_opcode_t) p1[3] == charset
4008                         || (re_opcode_t) p1[3] == charset_not)
4009                  {
4010                    int not = (re_opcode_t) p1[3] == charset_not;
4011
4012                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4013                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4014                      not = !not;
4015
4016                    /* `not' is equal to 1 if c would match, which means
4017                        that we can't change to pop_failure_jump.  */
4018                    if (!not)
4019                      {
4020                        p[-3] = (unsigned char) pop_failure_jump;
4021                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4022                      }
4023                  }
4024              }
4025          }
4026          p -= 2;               /* Point at relative address again.  */
4027          if ((re_opcode_t) p[-1] != pop_failure_jump)
4028            {
4029              p[-1] = (unsigned char) jump;
4030              DEBUG_PRINT1 ("  Match => jump.\n");
4031              goto unconditional_jump;
4032            }
4033        /* Note fall through.  */
4034
4035
4036        /* The end of a simple repeat has a pop_failure_jump back to
4037           its matching on_failure_jump, where the latter will push a
4038           failure point.  The pop_failure_jump takes off failure
4039           points put on by this pop_failure_jump's matching
4040           on_failure_jump; we got through the pattern to here from the
4041           matching on_failure_jump, so didn't fail.  */
4042        case pop_failure_jump:
4043          {
4044            /* We need to pass separate storage for the lowest and
4045               highest registers, even though we don't care about the
4046               actual values.  Otherwise, we will restore only one
4047               register from the stack, since lowest will == highest in
4048               `pop_failure_point'.  */
4049            unsigned dummy_low_reg, dummy_high_reg;
4050            unsigned char *pdummy;
4051            const char *sdummy;
4052
4053            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4054            POP_FAILURE_POINT (sdummy, pdummy,
4055                               dummy_low_reg, dummy_high_reg,
4056                               reg_dummy, reg_dummy, reg_info_dummy);
4057          }
4058          /* Note fall through.  */
4059
4060
4061        /* Unconditionally jump (without popping any failure points).  */
4062        case jump:
4063        unconditional_jump:
4064          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4065          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4066          p += mcnt;                            /* Do the jump.  */
4067          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4068          break;
4069
4070
4071        /* We need this opcode so we can detect where alternatives end
4072           in `group_match_null_string_p' et al.  */
4073        case jump_past_alt:
4074          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4075          goto unconditional_jump;
4076
4077
4078        /* Normally, the on_failure_jump pushes a failure point, which
4079           then gets popped at pop_failure_jump.  We will end up at
4080           pop_failure_jump, also, and with a pattern of, say, `a+', we
4081           are skipping over the on_failure_jump, so we have to push
4082           something meaningless for pop_failure_jump to pop.  */
4083        case dummy_failure_jump:
4084          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4085          /* It doesn't matter what we push for the string here.  What
4086             the code at `fail' tests is the value for the pattern.  */
4087          PUSH_FAILURE_POINT (0, 0, -2);
4088          goto unconditional_jump;
4089
4090
4091        /* At the end of an alternative, we need to push a dummy failure
4092           point in case we are followed by a `pop_failure_jump', because
4093           we don't want the failure point for the alternative to be
4094           popped.  For example, matching `(a|ab)*' against `aab'
4095           requires that we match the `ab' alternative.  */
4096        case push_dummy_failure:
4097          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4098          /* See comments just above at `dummy_failure_jump' about the
4099             two zeroes.  */
4100          PUSH_FAILURE_POINT (0, 0, -2);
4101          break;
4102
4103        /* Have to succeed matching what follows at least n times.
4104           After that, handle like `on_failure_jump'.  */
4105        case succeed_n:
4106          EXTRACT_NUMBER (mcnt, p + 2);
4107          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4108
4109          assert (mcnt >= 0);
4110          /* Originally, this is how many times we HAVE to succeed.  */
4111          if (mcnt > 0)
4112            {
4113               mcnt--;
4114               p += 2;
4115               STORE_NUMBER_AND_INCR (p, mcnt);
4116               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4117            }
4118          else if (mcnt == 0)
4119            {
4120              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4121              p[2] = (unsigned char) no_op;
4122              p[3] = (unsigned char) no_op;
4123              goto on_failure;
4124            }
4125          break;
4126
4127        case jump_n:
4128          EXTRACT_NUMBER (mcnt, p + 2);
4129          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4130
4131          /* Originally, this is how many times we CAN jump.  */
4132          if (mcnt)
4133            {
4134               mcnt--;
4135               STORE_NUMBER (p + 2, mcnt);
4136               goto unconditional_jump;
4137            }
4138          /* If don't have to jump any more, skip over the rest of command.  */
4139          else
4140            p += 4;
4141          break;
4142
4143        case set_number_at:
4144          {
4145            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4146
4147            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4148            p1 = p + mcnt;
4149            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4150            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4151            STORE_NUMBER (p1, mcnt);
4152            break;
4153          }
4154
4155        case wordbound:
4156          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4157          if (AT_WORD_BOUNDARY (d))
4158            break;
4159          goto fail;
4160
4161        case notwordbound:
4162          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4163          if (AT_WORD_BOUNDARY (d))
4164            goto fail;
4165          break;
4166
4167        case wordbeg:
4168          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4169          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4170            break;
4171          goto fail;
4172
4173        case wordend:
4174          DEBUG_PRINT1 ("EXECUTING wordend.\n");
4175          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4176              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4177            break;
4178          goto fail;
4179
4180#ifdef emacs
4181#ifdef emacs19
4182        case before_dot:
4183          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4184          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4185            goto fail;
4186          break;
4187
4188        case at_dot:
4189          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4190          if (PTR_CHAR_POS ((unsigned char *) d) != point)
4191            goto fail;
4192          break;
4193
4194        case after_dot:
4195          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4196          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4197            goto fail;
4198          break;
4199#else /* not emacs19 */
4200        case at_dot:
4201          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4202          if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4203            goto fail;
4204          break;
4205#endif /* not emacs19 */
4206
4207        case syntaxspec:
4208          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4209          mcnt = *p++;
4210          goto matchsyntax;
4211
4212        case wordchar:
4213          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4214          mcnt = (int) Sword;
4215        matchsyntax:
4216          PREFETCH ();
4217          if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4218            goto fail;
4219          SET_REGS_MATCHED ();
4220          break;
4221
4222        case notsyntaxspec:
4223          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4224          mcnt = *p++;
4225          goto matchnotsyntax;
4226
4227        case notwordchar:
4228          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4229          mcnt = (int) Sword;
4230        matchnotsyntax:
4231          PREFETCH ();
4232          if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4233            goto fail;
4234          SET_REGS_MATCHED ();
4235          break;
4236
4237#else /* not emacs */
4238        case wordchar:
4239          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4240          PREFETCH ();
4241          if (!WORDCHAR_P (d))
4242            goto fail;
4243          SET_REGS_MATCHED ();
4244          d++;
4245          break;
4246
4247        case notwordchar:
4248          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4249          PREFETCH ();
4250          if (WORDCHAR_P (d))
4251            goto fail;
4252          SET_REGS_MATCHED ();
4253          d++;
4254          break;
4255#endif /* not emacs */
4256
4257        default:
4258          abort ();
4259        }
4260      continue;  /* Successfully executed one pattern command; keep going.  */
4261
4262
4263    /* We goto here if a matching operation fails. */
4264    fail:
4265      if (!FAIL_STACK_EMPTY ())
4266        { /* A restart point is known.  Restore to that state.  */
4267          DEBUG_PRINT1 ("\nFAIL:\n");
4268          POP_FAILURE_POINT (d, p,
4269                             lowest_active_reg, highest_active_reg,
4270                             regstart, regend, reg_info);
4271
4272          /* If this failure point is a dummy, try the next one.  */
4273          if (!p)
4274            goto fail;
4275
4276          /* If we failed to the end of the pattern, don't examine *p.  */
4277          assert (p <= pend);
4278          if (p < pend)
4279            {
4280              boolean is_a_jump_n = false;
4281
4282              /* If failed to a backwards jump that's part of a repetition
4283                 loop, need to pop this failure point and use the next one.  */
4284              switch ((re_opcode_t) *p)
4285                {
4286                case jump_n:
4287                  is_a_jump_n = true;
4288                case maybe_pop_jump:
4289                case pop_failure_jump:
4290                case jump:
4291                  p1 = p + 1;
4292                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4293                  p1 += mcnt;
4294
4295                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4296                      || (!is_a_jump_n
4297                          && (re_opcode_t) *p1 == on_failure_jump))
4298                    goto fail;
4299                  break;
4300                default:
4301                  /* do nothing */ ;
4302                }
4303            }
4304
4305          if (d >= string1 && d <= end1)
4306            dend = end_match_1;
4307        }
4308      else
4309        break;   /* Matching at this starting point really fails.  */
4310    } /* for (;;) */
4311
4312  if (best_regs_set)
4313    goto restore_best_regs;
4314
4315  FREE_VARIABLES ();
4316
4317  return -1;                            /* Failure to match.  */
4318} /* re_match_2 */
4319\f
4320/* Subroutine definitions for re_match_2.  */
4321
4322
4323/* We are passed P pointing to a register number after a start_memory.
4324
4325   Return true if the pattern up to the corresponding stop_memory can
4326   match the empty string, and false otherwise.
4327
4328   If we find the matching stop_memory, sets P to point to one past its number.
4329   Otherwise, sets P to an undefined byte less than or equal to END.
4330
4331   We don't handle duplicates properly (yet).  */
4332
4333static boolean
4334group_match_null_string_p (p, end, reg_info)
4335    unsigned char **p, *end;
4336    register_info_type *reg_info;
4337{
4338  int mcnt;
4339  /* Point to after the args to the start_memory.  */
4340  unsigned char *p1 = *p + 2;
4341
4342  while (p1 < end)
4343    {
4344      /* Skip over opcodes that can match nothing, and return true or
4345         false, as appropriate, when we get to one that can't, or to the
4346         matching stop_memory.  */
4347
4348      switch ((re_opcode_t) *p1)
4349        {
4350        /* Could be either a loop or a series of alternatives.  */
4351        case on_failure_jump:
4352          p1++;
4353          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4354
4355          /* If the next operation is not a jump backwards in the
4356             pattern.  */
4357
4358          if (mcnt >= 0)
4359            {
4360              /* Go through the on_failure_jumps of the alternatives,
4361                 seeing if any of the alternatives cannot match nothing.
4362                 The last alternative starts with only a jump,
4363                 whereas the rest start with on_failure_jump and end
4364                 with a jump, e.g., here is the pattern for `a|b|c':
4365
4366                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4367                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4368                 /exactn/1/c
4369
4370                 So, we have to first go through the first (n-1)
4371                 alternatives and then deal with the last one separately.  */
4372
4373
4374              /* Deal with the first (n-1) alternatives, which start
4375                 with an on_failure_jump (see above) that jumps to right
4376                 past a jump_past_alt.  */
4377
4378              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4379                {
4380                  /* `mcnt' holds how many bytes long the alternative
4381                     is, including the ending `jump_past_alt' and
4382                     its number.  */
4383
4384                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4385                                                      reg_info))
4386                    return false;
4387
4388                  /* Move to right after this alternative, including the
4389                     jump_past_alt.  */
4390                  p1 += mcnt;
4391
4392                  /* Break if it's the beginning of an n-th alternative
4393                     that doesn't begin with an on_failure_jump.  */
4394                  if ((re_opcode_t) *p1 != on_failure_jump)
4395                    break;
4396
4397                  /* Still have to check that it's not an n-th
4398                     alternative that starts with an on_failure_jump.  */
4399                  p1++;
4400                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4401                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4402                    {
4403                      /* Get to the beginning of the n-th alternative.  */
4404                      p1 -= 3;
4405                      break;
4406                    }
4407                }
4408
4409              /* Deal with the last alternative: go back and get number
4410                 of the `jump_past_alt' just before it.  `mcnt' contains
4411                 the length of the alternative.  */
4412              EXTRACT_NUMBER (mcnt, p1 - 2);
4413
4414              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4415                return false;
4416
4417              p1 += mcnt;       /* Get past the n-th alternative.  */
4418            } /* if mcnt > 0 */
4419          break;
4420
4421
4422        case stop_memory:
4423          assert (p1[1] == **p);
4424          *p = p1 + 2;
4425          return true;
4426
4427
4428        default:
4429          if (!common_op_match_null_string_p (&p1, end, reg_info))
4430            return false;
4431        }
4432    } /* while p1 < end */
4433
4434  return false;
4435} /* group_match_null_string_p */
4436
4437
4438/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4439   It expects P to be the first byte of a single alternative and END one
4440   byte past the last. The alternative can contain groups.  */
4441
4442static boolean
4443alt_match_null_string_p (p, end, reg_info)
4444    unsigned char *p, *end;
4445    register_info_type *reg_info;
4446{
4447  int mcnt;
4448  unsigned char *p1 = p;
4449
4450  while (p1 < end)
4451    {
4452      /* Skip over opcodes that can match nothing, and break when we get
4453         to one that can't.  */
4454
4455      switch ((re_opcode_t) *p1)
4456        {
4457        /* It's a loop.  */
4458        case on_failure_jump:
4459          p1++;
4460          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4461          p1 += mcnt;
4462          break;
4463
4464        default:
4465          if (!common_op_match_null_string_p (&p1, end, reg_info))
4466            return false;
4467        }
4468    }  /* while p1 < end */
4469
4470  return true;
4471} /* alt_match_null_string_p */
4472
4473
4474/* Deals with the ops common to group_match_null_string_p and
4475   alt_match_null_string_p.
4476
4477   Sets P to one after the op and its arguments, if any.  */
4478
4479static boolean
4480common_op_match_null_string_p (p, end, reg_info)
4481    unsigned char **p, *end;
4482    register_info_type *reg_info;
4483{
4484  int mcnt;
4485  boolean ret;
4486  int reg_no;
4487  unsigned char *p1 = *p;
4488
4489  switch ((re_opcode_t) *p1++)
4490    {
4491    case no_op:
4492    case begline:
4493    case endline:
4494    case begbuf:
4495    case endbuf:
4496    case wordbeg:
4497    case wordend:
4498    case wordbound:
4499    case notwordbound:
4500#ifdef emacs
4501    case before_dot:
4502    case at_dot:
4503    case after_dot:
4504#endif
4505      break;
4506
4507    case start_memory:
4508      reg_no = *p1;
4509      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4510      ret = group_match_null_string_p (&p1, end, reg_info);
4511
4512      /* Have to set this here in case we're checking a group which
4513         contains a group and a back reference to it.  */
4514
4515      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4516        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4517
4518      if (!ret)
4519        return false;
4520      break;
4521
4522    /* If this is an optimized succeed_n for zero times, make the jump.  */
4523    case jump:
4524      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4525      if (mcnt >= 0)
4526        p1 += mcnt;
4527      else
4528        return false;
4529      break;
4530
4531    case succeed_n:
4532      /* Get to the number of times to succeed.  */
4533      p1 += 2;
4534      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4535
4536      if (mcnt == 0)
4537        {
4538          p1 -= 4;
4539          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4540          p1 += mcnt;
4541        }
4542      else
4543        return false;
4544      break;
4545
4546    case duplicate:
4547      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4548        return false;
4549      break;
4550
4551    case set_number_at:
4552      p1 += 4;
4553
4554    default:
4555      /* All other opcodes mean we cannot match the empty string.  */
4556      return false;
4557  }
4558
4559  *p = p1;
4560  return true;
4561} /* common_op_match_null_string_p */
4562
4563
4564/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4565   bytes; nonzero otherwise.  */
4566
4567static int
4568bcmp_translate(
4569     unsigned char *s1,
4570     unsigned char *s2,
4571     int len,
4572     char *translate
4573)
4574{
4575  register unsigned char *p1 = s1, *p2 = s2;
4576  while (len)
4577    {
4578      if (translate[*p1++] != translate[*p2++]) return 1;
4579      len--;
4580    }
4581  return 0;
4582}
4583\f
4584/* Entry points for GNU code.  */
4585
4586/* re_compile_pattern is the GNU regular expression compiler: it
4587   compiles PATTERN (of length SIZE) and puts the result in BUFP.
4588   Returns 0 if the pattern was valid, otherwise an error string.
4589
4590   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4591   are set in BUFP on entry.
4592
4593   We call regex_compile to do the actual compilation.  */
4594
4595const char *
4596re_compile_pattern (pattern, length, bufp)
4597     const char *pattern;
4598     int length;
4599     struct re_pattern_buffer *bufp;
4600{
4601  reg_errcode_t ret;
4602
4603  /* GNU code is written to assume at least RE_NREGS registers will be set
4604     (and at least one extra will be -1).  */
4605  bufp->regs_allocated = REGS_UNALLOCATED;
4606
4607  /* And GNU code determines whether or not to get register information
4608     by passing null for the REGS argument to re_match, etc., not by
4609     setting no_sub.  */
4610  bufp->no_sub = 0;
4611
4612  /* Match anchors at newline.  */
4613  bufp->newline_anchor = 1;
4614
4615  ret = regex_compile (pattern, length, re_syntax_options, bufp);
4616
4617  return re_error_msg[(int) ret];
4618}
4619\f
4620/* Entry points compatible with 4.2 BSD regex library.  We don't define
4621   them if this is an Emacs or POSIX compilation.  */
4622
4623#if !defined (emacs) && !defined (_POSIX_SOURCE)
4624
4625/* BSD has one and only one pattern buffer.  */
4626static struct re_pattern_buffer re_comp_buf;
4627
4628char *
4629re_comp (s)
4630    const char *s;
4631{
4632  reg_errcode_t ret;
4633
4634  if (!s)
4635    {
4636      if (!re_comp_buf.buffer)
4637        return "No previous regular expression";
4638      return 0;
4639    }
4640
4641  if (!re_comp_buf.buffer)
4642    {
4643      re_comp_buf.buffer = (unsigned char *) malloc (200);
4644      if (re_comp_buf.buffer == NULL)
4645        return "Memory exhausted";
4646      re_comp_buf.allocated = 200;
4647
4648      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4649      if (re_comp_buf.fastmap == NULL)
4650        return "Memory exhausted";
4651    }
4652
4653  /* Since `re_exec' always passes NULL for the `regs' argument, we
4654     don't need to initialize the pattern buffer fields which affect it.  */
4655
4656  /* Match anchors at newlines.  */
4657  re_comp_buf.newline_anchor = 1;
4658
4659  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4660
4661  /* Yes, we're discarding `const' here.  */
4662  return (char *) re_error_msg[(int) ret];
4663}
4664
4665
4666int
4667re_exec (s)
4668    const char *s;
4669{
4670  const int len = strlen (s);
4671  return
4672    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4673}
4674#endif /* not emacs and not _POSIX_SOURCE */
4675\f
4676/* POSIX.2 functions.  Don't define these for Emacs.  */
4677
4678#ifndef emacs
4679
4680/* regcomp takes a regular expression as a string and compiles it.
4681
4682   PREG is a regex_t *.  We do not expect any fields to be initialized,
4683   since POSIX says we shouldn't.  Thus, we set
4684
4685     `buffer' to the compiled pattern;
4686     `used' to the length of the compiled pattern;
4687     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4688       REG_EXTENDED bit in CFLAGS is set; otherwise, to
4689       RE_SYNTAX_POSIX_BASIC;
4690     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4691     `fastmap' and `fastmap_accurate' to zero;
4692     `re_nsub' to the number of subexpressions in PATTERN.
4693
4694   PATTERN is the address of the pattern string.
4695
4696   CFLAGS is a series of bits which affect compilation.
4697
4698     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4699     use POSIX basic syntax.
4700
4701     If REG_NEWLINE is set, then . and [^...] don't match newline.
4702     Also, regexec will try a match beginning after every newline.
4703
4704     If REG_ICASE is set, then we considers upper- and lowercase
4705     versions of letters to be equivalent when matching.
4706
4707     If REG_NOSUB is set, then when PREG is passed to regexec, that
4708     routine will report only success or failure, and nothing about the
4709     registers.
4710
4711   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4712   the return codes and their meanings.)  */
4713
4714int
4715regcomp (preg, pattern, cflags)
4716    regex_t *preg;
4717    const char *pattern;
4718    int cflags;
4719{
4720  reg_errcode_t ret;
4721  unsigned syntax
4722    = (cflags & REG_EXTENDED) ?
4723      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4724
4725  /* regex_compile will allocate the space for the compiled pattern.  */
4726  preg->buffer = 0;
4727  preg->allocated = 0;
4728
4729  /* Don't bother to use a fastmap when searching.  This simplifies the
4730     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4731     characters after newlines into the fastmap.  This way, we just try
4732     every character.  */
4733  preg->fastmap = 0;
4734
4735  if (cflags & REG_ICASE)
4736    {
4737      unsigned i;
4738
4739      preg->translate = (char *) malloc (CHAR_SET_SIZE);
4740      if (preg->translate == NULL)
4741        return (int) REG_ESPACE;
4742
4743      /* Map uppercase characters to corresponding lowercase ones.  */
4744      for (i = 0; i < CHAR_SET_SIZE; i++)
4745        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4746    }
4747  else
4748    preg->translate = NULL;
4749
4750  /* If REG_NEWLINE is set, newlines are treated differently.  */
4751  if (cflags & REG_NEWLINE)
4752    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4753      syntax &= ~RE_DOT_NEWLINE;
4754      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4755      /* It also changes the matching behavior.  */
4756      preg->newline_anchor = 1;
4757    }
4758  else
4759    preg->newline_anchor = 0;
4760
4761  preg->no_sub = !!(cflags & REG_NOSUB);
4762
4763  /* POSIX says a null character in the pattern terminates it, so we
4764     can use strlen here in compiling the pattern.  */
4765  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4766
4767  /* POSIX doesn't distinguish between an unmatched open-group and an
4768     unmatched close-group: both are REG_EPAREN.  */
4769  if (ret == REG_ERPAREN) ret = REG_EPAREN;
4770
4771  return (int) ret;
4772}
4773
4774
4775/* regexec searches for a given pattern, specified by PREG, in the
4776   string STRING.
4777
4778   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4779   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4780   least NMATCH elements, and we set them to the offsets of the
4781   corresponding matched substrings.
4782
4783   EFLAGS specifies `execution flags' which affect matching: if
4784   REG_NOTBOL is set, then ^ does not match at the beginning of the
4785   string; if REG_NOTEOL is set, then $ does not match at the end.
4786
4787   We return 0 if we find a match and REG_NOMATCH if not.  */
4788
4789int
4790regexec (preg, string, nmatch, pmatch, eflags)
4791    const regex_t *preg;
4792    const char *string;
4793    size_t nmatch;
4794    regmatch_t pmatch[];
4795    int eflags;
4796{
4797  int ret;
4798  struct re_registers regs;
4799  regex_t private_preg;
4800  int len = strlen (string);
4801  boolean want_reg_info = !preg->no_sub && nmatch > 0;
4802
4803  private_preg = *preg;
4804
4805  private_preg.not_bol = !!(eflags & REG_NOTBOL);
4806  private_preg.not_eol = !!(eflags & REG_NOTEOL);
4807
4808  /* The user has told us exactly how many registers to return
4809     information about, via `nmatch'.  We have to pass that on to the
4810     matching routines.  */
4811  private_preg.regs_allocated = REGS_FIXED;
4812
4813  if (want_reg_info)
4814    {
4815      regs.num_regs = nmatch;
4816      regs.start = TALLOC (nmatch, regoff_t);
4817      regs.end = TALLOC (nmatch, regoff_t);
4818      if (regs.start == NULL || regs.end == NULL)
4819        return (int) REG_NOMATCH;
4820    }
4821
4822  /* Perform the searching operation.  */
4823  ret = re_search (&private_preg, string, len,
4824                   /* start: */ 0, /* range: */ len,
4825                   want_reg_info ? &regs : (struct re_registers *) 0);
4826
4827  /* Copy the register information to the POSIX structure.  */
4828  if (want_reg_info)
4829    {
4830      if (ret >= 0)
4831        {
4832          unsigned r;
4833
4834          for (r = 0; r < nmatch; r++)
4835            {
4836              pmatch[r].rm_so = regs.start[r];
4837              pmatch[r].rm_eo = regs.end[r];
4838            }
4839        }
4840
4841      /* If we needed the temporary register info, free the space now.  */
4842      free (regs.start);
4843      free (regs.end);
4844    }
4845
4846  /* We want zero return to mean success, unlike `re_search'.  */
4847  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4848}
4849
4850
4851/* Returns a message corresponding to an error code, ERRCODE, returned
4852   from either regcomp or regexec.   We don't use PREG here.  */
4853
4854size_t
4855regerror (errcode, preg, errbuf, errbuf_size)
4856    int errcode;
4857    const regex_t *preg;
4858    char *errbuf;
4859    size_t errbuf_size;
4860{
4861  const char *msg;
4862  size_t msg_size;
4863
4864  if (errcode < 0
4865      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4866    /* Only error codes returned by the rest of the code should be passed
4867       to this routine.  If we are given anything else, or if other regex
4868       code generates an invalid error code, then the program has a bug.
4869       Dump core so we can fix it.  */
4870    abort ();
4871
4872  msg = re_error_msg[errcode];
4873
4874  /* POSIX doesn't require that we do anything in this case, but why
4875     not be nice.  */
4876  if (! msg)
4877    msg = "Success";
4878
4879  msg_size = strlen (msg) + 1; /* Includes the null.  */
4880
4881  if (errbuf_size != 0)
4882    {
4883      if (msg_size > errbuf_size)
4884        {
4885          strncpy (errbuf, msg, errbuf_size - 1);
4886          errbuf[errbuf_size - 1] = 0;
4887        }
4888      else
4889        strcpy (errbuf, msg);
4890    }
4891
4892  return msg_size;
4893}
4894
4895
4896/* Free dynamically allocated space used by PREG.  */
4897
4898void
4899regfree (preg)
4900    regex_t *preg;
4901{
4902  if (preg->buffer != NULL)
4903    free (preg->buffer);
4904  preg->buffer = NULL;
4905
4906  preg->allocated = 0;
4907  preg->used = 0;
4908
4909  if (preg->fastmap != NULL)
4910    free (preg->fastmap);
4911  preg->fastmap = NULL;
4912  preg->fastmap_accurate = 0;
4913
4914  if (preg->translate != NULL)
4915    free (preg->translate);
4916  preg->translate = NULL;
4917}
4918
4919#endif /* not emacs  */
4920\f
4921/*
4922Local variables:
4923make-backup-files: t
4924version-control: t
4925trim-versions-without-asking: nil
4926End:
4927*/