kwset.con commit Add string search routines from GNU grep (05f3dbb)
   1/* kwset.c - search for any of a set of keywords.
   2   Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc.
   3
   4   This program is free software; you can redistribute it and/or modify
   5   it under the terms of the GNU General Public License as published by
   6   the Free Software Foundation; either version 2, or (at your option)
   7   any later version.
   8
   9   This program is distributed in the hope that it will be useful,
  10   but WITHOUT ANY WARRANTY; without even the implied warranty of
  11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12   GNU General Public License for more details.
  13
  14   You should have received a copy of the GNU General Public License
  15   along with this program; if not, write to the Free Software
  16   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  17   02110-1301, USA.  */
  18
  19/* Written August 1989 by Mike Haertel.
  20   The author may be reached (Email) at the address mike@ai.mit.edu,
  21   or (US mail) as Mike Haertel c/o Free Software Foundation. */
  22
  23/* The algorithm implemented by these routines bears a startling resemblence
  24   to one discovered by Beate Commentz-Walter, although it is not identical.
  25   See "A String Matching Algorithm Fast on the Average," Technical Report,
  26   IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
  27   Heidelberg, Germany.  See also Aho, A.V., and M. Corasick, "Efficient
  28   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
  29   Vol. 18, No. 6, which describes the failure function used below. */
  30
  31#ifdef HAVE_CONFIG_H
  32# include <config.h>
  33#endif
  34#include <sys/types.h>
  35#include "system.h"
  36#include "kwset.h"
  37#include "obstack.h"
  38
  39#ifdef GREP
  40extern char *xmalloc();
  41# undef malloc
  42# define malloc xmalloc
  43#endif
  44
  45#define NCHAR (UCHAR_MAX + 1)
  46#define obstack_chunk_alloc malloc
  47#define obstack_chunk_free free
  48
  49#define U(c) ((unsigned char) (c))
  50
  51/* Balanced tree of edges and labels leaving a given trie node. */
  52struct tree
  53{
  54  struct tree *llink;           /* Left link; MUST be first field. */
  55  struct tree *rlink;           /* Right link (to larger labels). */
  56  struct trie *trie;            /* Trie node pointed to by this edge. */
  57  unsigned char label;          /* Label on this edge. */
  58  char balance;                 /* Difference in depths of subtrees. */
  59};
  60
  61/* Node of a trie representing a set of reversed keywords. */
  62struct trie
  63{
  64  unsigned int accepting;       /* Word index of accepted word, or zero. */
  65  struct tree *links;           /* Tree of edges leaving this node. */
  66  struct trie *parent;          /* Parent of this node. */
  67  struct trie *next;            /* List of all trie nodes in level order. */
  68  struct trie *fail;            /* Aho-Corasick failure function. */
  69  int depth;                    /* Depth of this node from the root. */
  70  int shift;                    /* Shift function for search failures. */
  71  int maxshift;                 /* Max shift of self and descendents. */
  72};
  73
  74/* Structure returned opaquely to the caller, containing everything. */
  75struct kwset
  76{
  77  struct obstack obstack;       /* Obstack for node allocation. */
  78  int words;                    /* Number of words in the trie. */
  79  struct trie *trie;            /* The trie itself. */
  80  int mind;                     /* Minimum depth of an accepting node. */
  81  int maxd;                     /* Maximum depth of any node. */
  82  unsigned char delta[NCHAR];   /* Delta table for rapid search. */
  83  struct trie *next[NCHAR];     /* Table of children of the root. */
  84  char *target;                 /* Target string if there's only one. */
  85  int mind2;                    /* Used in Boyer-Moore search for one string. */
  86  char const *trans;            /* Character translation table. */
  87};
  88
  89/* Allocate and initialize a keyword set object, returning an opaque
  90   pointer to it.  Return NULL if memory is not available. */
  91kwset_t
  92kwsalloc (char const *trans)
  93{
  94  struct kwset *kwset;
  95
  96  kwset = (struct kwset *) malloc(sizeof (struct kwset));
  97  if (!kwset)
  98    return NULL;
  99
 100  obstack_init(&kwset->obstack);
 101  kwset->words = 0;
 102  kwset->trie
 103    = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
 104  if (!kwset->trie)
 105    {
 106      kwsfree((kwset_t) kwset);
 107      return NULL;
 108    }
 109  kwset->trie->accepting = 0;
 110  kwset->trie->links = NULL;
 111  kwset->trie->parent = NULL;
 112  kwset->trie->next = NULL;
 113  kwset->trie->fail = NULL;
 114  kwset->trie->depth = 0;
 115  kwset->trie->shift = 0;
 116  kwset->mind = INT_MAX;
 117  kwset->maxd = -1;
 118  kwset->target = NULL;
 119  kwset->trans = trans;
 120
 121  return (kwset_t) kwset;
 122}
 123
 124/* This upper bound is valid for CHAR_BIT >= 4 and
 125   exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
 126#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
 127
 128/* Add the given string to the contents of the keyword set.  Return NULL
 129   for success, an error message otherwise. */
 130const char *
 131kwsincr (kwset_t kws, char const *text, size_t len)
 132{
 133  struct kwset *kwset;
 134  register struct trie *trie;
 135  register unsigned char label;
 136  register struct tree *link;
 137  register int depth;
 138  struct tree *links[DEPTH_SIZE];
 139  enum { L, R } dirs[DEPTH_SIZE];
 140  struct tree *t, *r, *l, *rl, *lr;
 141
 142  kwset = (struct kwset *) kws;
 143  trie = kwset->trie;
 144  text += len;
 145
 146  /* Descend the trie (built of reversed keywords) character-by-character,
 147     installing new nodes when necessary. */
 148  while (len--)
 149    {
 150      label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
 151
 152      /* Descend the tree of outgoing links for this trie node,
 153         looking for the current character and keeping track
 154         of the path followed. */
 155      link = trie->links;
 156      links[0] = (struct tree *) &trie->links;
 157      dirs[0] = L;
 158      depth = 1;
 159
 160      while (link && label != link->label)
 161        {
 162          links[depth] = link;
 163          if (label < link->label)
 164            dirs[depth++] = L, link = link->llink;
 165          else
 166            dirs[depth++] = R, link = link->rlink;
 167        }
 168
 169      /* The current character doesn't have an outgoing link at
 170         this trie node, so build a new trie node and install
 171         a link in the current trie node's tree. */
 172      if (!link)
 173        {
 174          link = (struct tree *) obstack_alloc(&kwset->obstack,
 175                                               sizeof (struct tree));
 176          if (!link)
 177            return _("memory exhausted");
 178          link->llink = NULL;
 179          link->rlink = NULL;
 180          link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
 181                                                     sizeof (struct trie));
 182          if (!link->trie)
 183            {
 184              obstack_free(&kwset->obstack, link);
 185              return _("memory exhausted");
 186            }
 187          link->trie->accepting = 0;
 188          link->trie->links = NULL;
 189          link->trie->parent = trie;
 190          link->trie->next = NULL;
 191          link->trie->fail = NULL;
 192          link->trie->depth = trie->depth + 1;
 193          link->trie->shift = 0;
 194          link->label = label;
 195          link->balance = 0;
 196
 197          /* Install the new tree node in its parent. */
 198          if (dirs[--depth] == L)
 199            links[depth]->llink = link;
 200          else
 201            links[depth]->rlink = link;
 202
 203          /* Back up the tree fixing the balance flags. */
 204          while (depth && !links[depth]->balance)
 205            {
 206              if (dirs[depth] == L)
 207                --links[depth]->balance;
 208              else
 209                ++links[depth]->balance;
 210              --depth;
 211            }
 212
 213          /* Rebalance the tree by pointer rotations if necessary. */
 214          if (depth && ((dirs[depth] == L && --links[depth]->balance)
 215                        || (dirs[depth] == R && ++links[depth]->balance)))
 216            {
 217              switch (links[depth]->balance)
 218                {
 219                case (char) -2:
 220                  switch (dirs[depth + 1])
 221                    {
 222                    case L:
 223                      r = links[depth], t = r->llink, rl = t->rlink;
 224                      t->rlink = r, r->llink = rl;
 225                      t->balance = r->balance = 0;
 226                      break;
 227                    case R:
 228                      r = links[depth], l = r->llink, t = l->rlink;
 229                      rl = t->rlink, lr = t->llink;
 230                      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
 231                      l->balance = t->balance != 1 ? 0 : -1;
 232                      r->balance = t->balance != (char) -1 ? 0 : 1;
 233                      t->balance = 0;
 234                      break;
 235                    default:
 236                      abort ();
 237                    }
 238                  break;
 239                case 2:
 240                  switch (dirs[depth + 1])
 241                    {
 242                    case R:
 243                      l = links[depth], t = l->rlink, lr = t->llink;
 244                      t->llink = l, l->rlink = lr;
 245                      t->balance = l->balance = 0;
 246                      break;
 247                    case L:
 248                      l = links[depth], r = l->rlink, t = r->llink;
 249                      lr = t->llink, rl = t->rlink;
 250                      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
 251                      l->balance = t->balance != 1 ? 0 : -1;
 252                      r->balance = t->balance != (char) -1 ? 0 : 1;
 253                      t->balance = 0;
 254                      break;
 255                    default:
 256                      abort ();
 257                    }
 258                  break;
 259                default:
 260                  abort ();
 261                }
 262
 263              if (dirs[depth - 1] == L)
 264                links[depth - 1]->llink = t;
 265              else
 266                links[depth - 1]->rlink = t;
 267            }
 268        }
 269
 270      trie = link->trie;
 271    }
 272
 273  /* Mark the node we finally reached as accepting, encoding the
 274     index number of this word in the keyword set so far. */
 275  if (!trie->accepting)
 276    trie->accepting = 1 + 2 * kwset->words;
 277  ++kwset->words;
 278
 279  /* Keep track of the longest and shortest string of the keyword set. */
 280  if (trie->depth < kwset->mind)
 281    kwset->mind = trie->depth;
 282  if (trie->depth > kwset->maxd)
 283    kwset->maxd = trie->depth;
 284
 285  return NULL;
 286}
 287
 288/* Enqueue the trie nodes referenced from the given tree in the
 289   given queue. */
 290static void
 291enqueue (struct tree *tree, struct trie **last)
 292{
 293  if (!tree)
 294    return;
 295  enqueue(tree->llink, last);
 296  enqueue(tree->rlink, last);
 297  (*last) = (*last)->next = tree->trie;
 298}
 299
 300/* Compute the Aho-Corasick failure function for the trie nodes referenced
 301   from the given tree, given the failure function for their parent as
 302   well as a last resort failure node. */
 303static void
 304treefails (register struct tree const *tree, struct trie const *fail,
 305           struct trie *recourse)
 306{
 307  register struct tree *link;
 308
 309  if (!tree)
 310    return;
 311
 312  treefails(tree->llink, fail, recourse);
 313  treefails(tree->rlink, fail, recourse);
 314
 315  /* Find, in the chain of fails going back to the root, the first
 316     node that has a descendent on the current label. */
 317  while (fail)
 318    {
 319      link = fail->links;
 320      while (link && tree->label != link->label)
 321        if (tree->label < link->label)
 322          link = link->llink;
 323        else
 324          link = link->rlink;
 325      if (link)
 326        {
 327          tree->trie->fail = link->trie;
 328          return;
 329        }
 330      fail = fail->fail;
 331    }
 332
 333  tree->trie->fail = recourse;
 334}
 335
 336/* Set delta entries for the links of the given tree such that
 337   the preexisting delta value is larger than the current depth. */
 338static void
 339treedelta (register struct tree const *tree,
 340           register unsigned int depth,
 341           unsigned char delta[])
 342{
 343  if (!tree)
 344    return;
 345  treedelta(tree->llink, depth, delta);
 346  treedelta(tree->rlink, depth, delta);
 347  if (depth < delta[tree->label])
 348    delta[tree->label] = depth;
 349}
 350
 351/* Return true if A has every label in B. */
 352static int
 353hasevery (register struct tree const *a, register struct tree const *b)
 354{
 355  if (!b)
 356    return 1;
 357  if (!hasevery(a, b->llink))
 358    return 0;
 359  if (!hasevery(a, b->rlink))
 360    return 0;
 361  while (a && b->label != a->label)
 362    if (b->label < a->label)
 363      a = a->llink;
 364    else
 365      a = a->rlink;
 366  return !!a;
 367}
 368
 369/* Compute a vector, indexed by character code, of the trie nodes
 370   referenced from the given tree. */
 371static void
 372treenext (struct tree const *tree, struct trie *next[])
 373{
 374  if (!tree)
 375    return;
 376  treenext(tree->llink, next);
 377  treenext(tree->rlink, next);
 378  next[tree->label] = tree->trie;
 379}
 380
 381/* Compute the shift for each trie node, as well as the delta
 382   table and next cache for the given keyword set. */
 383const char *
 384kwsprep (kwset_t kws)
 385{
 386  register struct kwset *kwset;
 387  register int i;
 388  register struct trie *curr;
 389  register char const *trans;
 390  unsigned char delta[NCHAR];
 391
 392  kwset = (struct kwset *) kws;
 393
 394  /* Initial values for the delta table; will be changed later.  The
 395     delta entry for a given character is the smallest depth of any
 396     node at which an outgoing edge is labeled by that character. */
 397  memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
 398
 399  /* Check if we can use the simple boyer-moore algorithm, instead
 400     of the hairy commentz-walter algorithm. */
 401  if (kwset->words == 1 && kwset->trans == NULL)
 402    {
 403      char c;
 404
 405      /* Looking for just one string.  Extract it from the trie. */
 406      kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
 407      if (!kwset->target)
 408        return _("memory exhausted");
 409      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
 410        {
 411          kwset->target[i] = curr->links->label;
 412          curr = curr->links->trie;
 413        }
 414      /* Build the Boyer Moore delta.  Boy that's easy compared to CW. */
 415      for (i = 0; i < kwset->mind; ++i)
 416        delta[U(kwset->target[i])] = kwset->mind - (i + 1);
 417      /* Find the minimal delta2 shift that we might make after
 418         a backwards match has failed. */
 419      c = kwset->target[kwset->mind - 1];
 420      for (i = kwset->mind - 2; i >= 0; --i)
 421        if (kwset->target[i] == c)
 422          break;
 423      kwset->mind2 = kwset->mind - (i + 1);
 424    }
 425  else
 426    {
 427      register struct trie *fail;
 428      struct trie *last, *next[NCHAR];
 429
 430      /* Traverse the nodes of the trie in level order, simultaneously
 431         computing the delta table, failure function, and shift function. */
 432      for (curr = last = kwset->trie; curr; curr = curr->next)
 433        {
 434          /* Enqueue the immediate descendents in the level order queue. */
 435          enqueue(curr->links, &last);
 436
 437          curr->shift = kwset->mind;
 438          curr->maxshift = kwset->mind;
 439
 440          /* Update the delta table for the descendents of this node. */
 441          treedelta(curr->links, curr->depth, delta);
 442
 443          /* Compute the failure function for the decendents of this node. */
 444          treefails(curr->links, curr->fail, kwset->trie);
 445
 446          /* Update the shifts at each node in the current node's chain
 447             of fails back to the root. */
 448          for (fail = curr->fail; fail; fail = fail->fail)
 449            {
 450              /* If the current node has some outgoing edge that the fail
 451                 doesn't, then the shift at the fail should be no larger
 452                 than the difference of their depths. */
 453              if (!hasevery(fail->links, curr->links))
 454                if (curr->depth - fail->depth < fail->shift)
 455                  fail->shift = curr->depth - fail->depth;
 456
 457              /* If the current node is accepting then the shift at the
 458                 fail and its descendents should be no larger than the
 459                 difference of their depths. */
 460              if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
 461                fail->maxshift = curr->depth - fail->depth;
 462            }
 463        }
 464
 465      /* Traverse the trie in level order again, fixing up all nodes whose
 466         shift exceeds their inherited maxshift. */
 467      for (curr = kwset->trie->next; curr; curr = curr->next)
 468        {
 469          if (curr->maxshift > curr->parent->maxshift)
 470            curr->maxshift = curr->parent->maxshift;
 471          if (curr->shift > curr->maxshift)
 472            curr->shift = curr->maxshift;
 473        }
 474
 475      /* Create a vector, indexed by character code, of the outgoing links
 476         from the root node. */
 477      for (i = 0; i < NCHAR; ++i)
 478        next[i] = NULL;
 479      treenext(kwset->trie->links, next);
 480
 481      if ((trans = kwset->trans) != NULL)
 482        for (i = 0; i < NCHAR; ++i)
 483          kwset->next[i] = next[U(trans[i])];
 484      else
 485        memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
 486    }
 487
 488  /* Fix things up for any translation table. */
 489  if ((trans = kwset->trans) != NULL)
 490    for (i = 0; i < NCHAR; ++i)
 491      kwset->delta[i] = delta[U(trans[i])];
 492  else
 493    memcpy(kwset->delta, delta, NCHAR);
 494
 495  return NULL;
 496}
 497
 498/* Fast boyer-moore search. */
 499static size_t
 500bmexec (kwset_t kws, char const *text, size_t size)
 501{
 502  struct kwset const *kwset;
 503  register unsigned char const *d1;
 504  register char const *ep, *sp, *tp;
 505  register int d, gc, i, len, md2;
 506
 507  kwset = (struct kwset const *) kws;
 508  len = kwset->mind;
 509
 510  if (len == 0)
 511    return 0;
 512  if (len > size)
 513    return -1;
 514  if (len == 1)
 515    {
 516      tp = memchr (text, kwset->target[0], size);
 517      return tp ? tp - text : -1;
 518    }
 519
 520  d1 = kwset->delta;
 521  sp = kwset->target + len;
 522  gc = U(sp[-2]);
 523  md2 = kwset->mind2;
 524  tp = text + len;
 525
 526  /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
 527  if (size > 12 * len)
 528    /* 11 is not a bug, the initial offset happens only once. */
 529    for (ep = text + size - 11 * len;;)
 530      {
 531        while (tp <= ep)
 532          {
 533            d = d1[U(tp[-1])], tp += d;
 534            d = d1[U(tp[-1])], tp += d;
 535            if (d == 0)
 536              goto found;
 537            d = d1[U(tp[-1])], tp += d;
 538            d = d1[U(tp[-1])], tp += d;
 539            d = d1[U(tp[-1])], tp += d;
 540            if (d == 0)
 541              goto found;
 542            d = d1[U(tp[-1])], tp += d;
 543            d = d1[U(tp[-1])], tp += d;
 544            d = d1[U(tp[-1])], tp += d;
 545            if (d == 0)
 546              goto found;
 547            d = d1[U(tp[-1])], tp += d;
 548            d = d1[U(tp[-1])], tp += d;
 549          }
 550        break;
 551      found:
 552        if (U(tp[-2]) == gc)
 553          {
 554            for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 555              ;
 556            if (i > len)
 557              return tp - len - text;
 558          }
 559        tp += md2;
 560      }
 561
 562  /* Now we have only a few characters left to search.  We
 563     carefully avoid ever producing an out-of-bounds pointer. */
 564  ep = text + size;
 565  d = d1[U(tp[-1])];
 566  while (d <= ep - tp)
 567    {
 568      d = d1[U((tp += d)[-1])];
 569      if (d != 0)
 570        continue;
 571      if (U(tp[-2]) == gc)
 572        {
 573          for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
 574            ;
 575          if (i > len)
 576            return tp - len - text;
 577        }
 578      d = md2;
 579    }
 580
 581  return -1;
 582}
 583
 584/* Hairy multiple string search. */
 585static size_t
 586cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
 587{
 588  struct kwset const *kwset;
 589  struct trie * const *next;
 590  struct trie const *trie;
 591  struct trie const *accept;
 592  char const *beg, *lim, *mch, *lmch;
 593  register unsigned char c;
 594  register unsigned char const *delta;
 595  register int d;
 596  register char const *end, *qlim;
 597  register struct tree const *tree;
 598  register char const *trans;
 599
 600#ifdef lint
 601  accept = NULL;
 602#endif
 603
 604  /* Initialize register copies and look for easy ways out. */
 605  kwset = (struct kwset *) kws;
 606  if (len < kwset->mind)
 607    return -1;
 608  next = kwset->next;
 609  delta = kwset->delta;
 610  trans = kwset->trans;
 611  lim = text + len;
 612  end = text;
 613  if ((d = kwset->mind) != 0)
 614    mch = NULL;
 615  else
 616    {
 617      mch = text, accept = kwset->trie;
 618      goto match;
 619    }
 620
 621  if (len >= 4 * kwset->mind)
 622    qlim = lim - 4 * kwset->mind;
 623  else
 624    qlim = NULL;
 625
 626  while (lim - end >= d)
 627    {
 628      if (qlim && end <= qlim)
 629        {
 630          end += d - 1;
 631          while ((d = delta[c = *end]) && end < qlim)
 632            {
 633              end += d;
 634              end += delta[U(*end)];
 635              end += delta[U(*end)];
 636            }
 637          ++end;
 638        }
 639      else
 640        d = delta[c = (end += d)[-1]];
 641      if (d)
 642        continue;
 643      beg = end - 1;
 644      trie = next[c];
 645      if (trie->accepting)
 646        {
 647          mch = beg;
 648          accept = trie;
 649        }
 650      d = trie->shift;
 651      while (beg > text)
 652        {
 653          c = trans ? trans[U(*--beg)] : *--beg;
 654          tree = trie->links;
 655          while (tree && c != tree->label)
 656            if (c < tree->label)
 657              tree = tree->llink;
 658            else
 659              tree = tree->rlink;
 660          if (tree)
 661            {
 662              trie = tree->trie;
 663              if (trie->accepting)
 664                {
 665                  mch = beg;
 666                  accept = trie;
 667                }
 668            }
 669          else
 670            break;
 671          d = trie->shift;
 672        }
 673      if (mch)
 674        goto match;
 675    }
 676  return -1;
 677
 678 match:
 679  /* Given a known match, find the longest possible match anchored
 680     at or before its starting point.  This is nearly a verbatim
 681     copy of the preceding main search loops. */
 682  if (lim - mch > kwset->maxd)
 683    lim = mch + kwset->maxd;
 684  lmch = 0;
 685  d = 1;
 686  while (lim - end >= d)
 687    {
 688      if ((d = delta[c = (end += d)[-1]]) != 0)
 689        continue;
 690      beg = end - 1;
 691      if (!(trie = next[c]))
 692        {
 693          d = 1;
 694          continue;
 695        }
 696      if (trie->accepting && beg <= mch)
 697        {
 698          lmch = beg;
 699          accept = trie;
 700        }
 701      d = trie->shift;
 702      while (beg > text)
 703        {
 704          c = trans ? trans[U(*--beg)] : *--beg;
 705          tree = trie->links;
 706          while (tree && c != tree->label)
 707            if (c < tree->label)
 708              tree = tree->llink;
 709            else
 710              tree = tree->rlink;
 711          if (tree)
 712            {
 713              trie = tree->trie;
 714              if (trie->accepting && beg <= mch)
 715                {
 716                  lmch = beg;
 717                  accept = trie;
 718                }
 719            }
 720          else
 721            break;
 722          d = trie->shift;
 723        }
 724      if (lmch)
 725        {
 726          mch = lmch;
 727          goto match;
 728        }
 729      if (!d)
 730        d = 1;
 731    }
 732
 733  if (kwsmatch)
 734    {
 735      kwsmatch->index = accept->accepting / 2;
 736      kwsmatch->offset[0] = mch - text;
 737      kwsmatch->size[0] = accept->depth;
 738    }
 739  return mch - text;
 740}
 741
 742/* Search through the given text for a match of any member of the
 743   given keyword set.  Return a pointer to the first character of
 744   the matching substring, or NULL if no match is found.  If FOUNDLEN
 745   is non-NULL store in the referenced location the length of the
 746   matching substring.  Similarly, if FOUNDIDX is non-NULL, store
 747   in the referenced location the index number of the particular
 748   keyword matched. */
 749size_t
 750kwsexec (kwset_t kws, char const *text, size_t size,
 751         struct kwsmatch *kwsmatch)
 752{
 753  struct kwset const *kwset = (struct kwset *) kws;
 754  if (kwset->words == 1 && kwset->trans == NULL)
 755    {
 756      size_t ret = bmexec (kws, text, size);
 757      if (kwsmatch != NULL && ret != (size_t) -1)
 758        {
 759          kwsmatch->index = 0;
 760          kwsmatch->offset[0] = ret;
 761          kwsmatch->size[0] = kwset->mind;
 762        }
 763      return ret;
 764    }
 765  else
 766    return cwexec(kws, text, size, kwsmatch);
 767}
 768
 769/* Free the components of the given keyword set. */
 770void
 771kwsfree (kwset_t kws)
 772{
 773  struct kwset *kwset;
 774
 775  kwset = (struct kwset *) kws;
 776  obstack_free(&kwset->obstack, NULL);
 777  free(kws);
 778}