Latest from Jeffrey Ollie.

Infinite failure stack, some bugs fixed (fastmap, star_jump, register bug).

Latest from Jeffrey Ollie.
Infinite failure stack, some bugs fixed (fastmap, star_jump, register bug).
004c1e1d · Guido van Rossum · 1681429b · 004c1e1d · 004c1e1d
Commit 004c1e1d authored May 09, 1997 by Guido van Rossum
Show whitespace changes
Inline Side-by-side

Showing with 1680 additions and 1473 deletions

Modules/regexpr.c Modules/regexpr.c +1679 -1454

Modules/regexpr.h Modules/regexpr.h +1 -19

No files found.
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
-/*
-
-regexpr.c
-
-Author: Tatu Ylonen <ylo@ngs.fi>
-
-Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
-
-Permission to use, copy, modify, distribute, and sell this software
-and its documentation for any purpose is hereby granted without fee,
-provided that the above copyright notice appear in all copies.  This
-software is provided "as is" without express or implied warranty.
-
-Created: Thu Sep 26 17:14:05 1991 ylo
-Last modified: Mon Nov  4 17:06:48 1991 ylo
-Ported to Think C: 19 Jan 1992 guido@cwi.nl
-
-This code draws many ideas from the regular expression packages by
-Henry Spencer of the University of Toronto and Richard Stallman of the
-Free Software Foundation.
-
-Emacs-specific code and syntax table code is almost directly borrowed
-from GNU regexp.
-
-*/
+/* regexpr.c
+ *
+ * Author: Tatu Ylonen <ylo@ngs.fi>
+ *
+ * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty.
+ *
+ * Created: Thu Sep 26 17:14:05 1991 ylo
+ * Last modified: Mon Nov  4 17:06:48 1991 ylo
+ * Ported to Think C: 19 Jan 1992 guido@cwi.nl
+ *
+ * This code draws many ideas from the regular expression packages by
+ * Henry Spencer of the University of Toronto and Richard Stallman of
+ * the Free Software Foundation.
+ *
+ * Emacs-specific code and syntax table code is almost directly borrowed
+ * from GNU regexp.
+ *
+ * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April
+ * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim
+ * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and
+ * probably one or two others that I'm forgetting.
+ *
+ * $Id$ */

 #include "config.h" /* For Win* specific redefinition of printf c.s. */

-#include "myproto.h" /* For Py_PROTO macro --Guido */
+#include "myproto.h" /* For PROTO macro --Guido */

 #include <stdio.h>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
 #include <assert.h>
 #include "regexpr.h"

@@ -48,8 +57,316 @@ char *realloc();
 #endif /* __STDC__ */
 #endif /* THINK_C */

-#define MACRO_BEGIN do {
-#define MACRO_END } while (0)
+/* The stack implementation is taken from an idea by Andrew Kuchling.
+ * It's a doubly linked list of arrays. The advantages of this over a
+ * simple linked list are that the number of mallocs required are
+ * reduced. It also makes it possible to statically allocate enough
+ * space so that small patterns don't ever need to call malloc.
+ *
+ * The advantages over a single array is that is periodically
+ * realloced when more space is needed is that we avoid ever copying
+ * the stack. */
+
+/* item_t is the basic stack element.  Defined as a union of
+ * structures so that both registers, failure points, and counters can
+ * be pushed/popped from the stack.  There's nothing built into the
+ * item to keep track of whether a certain stack item is a register, a
+ * failure point, or a counter. */
+
+typedef union item_t
+{
+      struct
+      {
+	    int num;
+	    int level;
+	    char *start;
+	    char *end;
+      } reg;
+      struct
+      {
+	    int count;
+	    int level;
+	    int phantom;
+	    char *code;
+	    char *text;
+      } fail;
+      struct
+      {
+	    int num;
+	    int level;
+	    int count;
+      } cntr;
+} item_t;
+
+#define STACK_PAGE_SIZE 256
+#define NUM_REGISTERS 256
+
+/* A 'page' of stack items. */
+
+typedef struct item_page_t
+{
+      item_t items[STACK_PAGE_SIZE];
+      struct item_page_t *prev;
+      struct item_page_t *next;
+} item_page_t;
+
+
+typedef struct match_state
+{
+      /* Structure to encapsulate the stack. */
+      struct
+      {
+	    /* index into the curent page.  If index == 0 and you need
+	     * to pop and item, move to the previous page and set
+	     * index = STACK_PAGE_SIZE - 1.  Otherwise decrement index
+	     * to push a page. If index == STACK_PAGE_SIZE and you
+	     * need to push a page move to the next page and set index
+	     * = 0. If there is no new next page, allocate a new page
+	     * and link it in. Otherwise, increment index to push a
+	     * page. */
+	    int index;
+	    item_page_t *current; /* Pointer to the current page. */
+	    item_page_t first; /* First page is statically allocated. */
+      } stack;
+      char *start[NUM_REGISTERS];
+      char *end[NUM_REGISTERS];
+
+      int changed[NUM_REGISTERS];
+      /* The number of registers that have been pushed onto the stack
+       * since the last failure point. */
+      int count;
+      /* Used to control when registers need to be pushed onto the
+       * stack. */
+      int level;
+      /* The number of failure points on the stack. */
+      int point;
+} match_state;
+
+/* Discard the top 'count' stack items. */
+
+#define STACK_DISCARD(stack, count, on_error) \
+stack.index -= count; \
+while (stack.index < 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index += STACK_PAGE_SIZE; \
+}
+
+/* Store a pointer to the previous item on the stack. Used to pop an
+ * item off of the stack. */
+
+#define STACK_PREV(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index = STACK_PAGE_SIZE - 1; \
+} \
+else \
+   stack.index--; \
+top = &(stack.current->items[stack.index])
+
+/* Store a pointer to the next item on the stack. Used to push an item
+ * on to the stack. */
+
+#define STACK_NEXT(stack, top, on_error) \
+if (stack.index == STACK_PAGE_SIZE) \
+{ \
+   if (stack.current->next == NULL) \
+   { \
+      stack.current->next = malloc(sizeof(item_page_t)); \
+      if (stack.current->next == NULL) \
+         on_error; \
+      stack.current->next->prev = stack.current; \
+      stack.current->next->next = NULL; \
+   } \
+   stack.current = stack.current->next; \
+   stack.index = 0; \
+} \
+top = &(stack.current->items[stack.index++])
+
+/* Store a pointer to the item that is 'count' items back in the
+ * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to
+ * STACK_TOP(stack, top, on_error).  */
+
+#define STACK_BACK(stack, top, count, on_error) \
+{ \
+   int index; \
+   item_page_t *current; \
+   current = stack.current; \
+   index = stack.index - (count); \
+   while (index < 0) \
+   { \
+      if (current->prev == NULL) \
+	 on_error; \
+      current = current->prev; \
+      index += STACK_PAGE_SIZE; \
+   } \
+   top = &(current->items[index]); \
+}
+
+/* Store a pointer to the top item on the stack. Execute the
+ * 'on_error' code if there are no items on the stack. */
+
+#define STACK_TOP(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \
+} \
+else \
+   top = &(stack.current->items[stack.index - 1])
+
+/* Test to see if the stack is empty */
+
+#define STACK_EMPTY(stack) ((stack.index == 0) && \
+			    (stack.current->prev == NULL))
+
+
+/* Initialize a state object */
+
+#define NEW_STATE(state) \
+memset(&state, 0, sizeof(match_state)); \
+state.stack.current = &state.stack.first; \
+state.level = 1
+
+/* Free any memory that might have been malloc'd */
+
+#define FREE_STATE(state) \
+while(state.stack.first.next != NULL) \
+{ \
+   state.stack.current = state.stack.first.next; \
+   state.stack.first.next = state.stack.current->next; \
+   free(state.stack.current); \
+}
+
+/* Return the start of register 'reg' */
+
+#define GET_REG_START(state, reg) (state.start[reg])
+
+/* Return the end of register 'reg' */
+
+#define GET_REG_END(state, reg) (state.end[reg])
+
+/* Set the start of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_START(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.start[reg] = text
+
+/* Set the end of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_END(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.end[reg] = text
+
+#define PUSH_FAILURE(state, xcode, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->fail.code = xcode; \
+   item->fail.text = xtext; \
+   item->fail.count = state.count; \
+   item->fail.level = state.level; \
+   item->fail.phantom = 0; \
+   state.count = 0; \
+   state.level++; \
+   state.point++; \
+}
+
+/* Update the last failure point with a new position in the text. */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_DISCARD(state.stack, state.count, on_error); \ */
+/*    STACK_TOP(state.stack, item, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/*    state.count = 0; \ */
+/* } */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_BACK(state.stack, item, state.count + 1, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/* } */
+
+#define UPDATE_FAILURE(state, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_BACK(state.stack, item, state.count + 1, on_error); \
+   if (!item->fail.phantom) \
+   { \
+      item_t *item2; \
+      STACK_NEXT(state.stack, item2, on_error); \
+      item2->fail.code = item->fail.code; \
+      item2->fail.text = xtext; \
+      item2->fail.count = state.count; \
+      item2->fail.level = state.level; \
+      item2->fail.phantom = 1; \
+      state.count = 0; \
+      state.level++; \
+      state.point++; \
+   } \
+   else \
+   { \
+      STACK_DISCARD(state.stack, state.count, on_error); \
+      STACK_TOP(state.stack, item, on_error); \
+      item->fail.text = xtext; \
+      state.count = 0; \
+      state.level++; \
+   } \
+}
+
+#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \
+{ \
+   item_t *item; \
+   do \
+   { \
+      while(state.count > 0) \
+      { \
+         STACK_PREV(state.stack, item, on_error); \
+         state.start[item->reg.num] = item->reg.start; \
+         state.end[item->reg.num] = item->reg.end; \
+         state.changed[item->reg.num] = item->reg.level; \
+         state.count--; \
+      } \
+      STACK_PREV(state.stack, item, on_empty); \
+      xcode = item->fail.code; \
+      xtext = item->fail.text; \
+      state.count = item->fail.count; \
+      state.level = item->fail.level; \
+      state.point--; \
+   } \
+   while (item->fail.text == NULL); \
+}

 enum regexp_compiled_ops /* opcodes for compiled regexp */
 {
@@ -73,9 +390,6 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
  Cwordend,		/* match at end of word */
  Cwordbound,		/* match if at word boundary */
  Cnotwordbound,	/* match if not at word boundary */
-#ifdef emacs
-  Cemacs_at_dot,	/* emacs only: matches at dot */
-#endif /* emacs */
  Csyntaxspec,		/* matches syntax code (1 byte follows) */
  Cnotsyntaxspec	/* matches if syntax code does not match (1 byte foll)*/
 };
@@ -106,11 +420,6 @@ enum regexp_syntax_op	/* syntax codes for plain and quoted characters */
  Rwordend,		/* end of word */
  Rwordbound,		/* word bound */
  Rnotwordbound,	/* not word bound */
-#ifdef emacs
-  Remacs_at_dot,	/* emacs: at dot */
-  Remacs_syntaxspec,	/* syntaxspec */
-  Remacs_notsyntaxspec,	/* notsyntaxspec */
-#endif /* emacs */
  Rnum_ops
 };

@@ -126,38 +435,15 @@ static int regexp_ansi_sequences;
 #define NUM_LEVELS  5    /* number of precedence levels in use */
 #define MAX_NESTING 100  /* max nesting level of operators */

-#ifdef emacs
-
-/* This code is for emacs compatibility only. */
-
-#include "config.h"
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
-
-/* emacs defines NULL in some strange way? */
-#undef NULL
-#define NULL 0
-
-#else /* emacs */
-
 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
 #define Sword 1

-#ifdef SYNTAX_TABLE
-char *re_syntax_table;
-#else
 static char re_syntax_table[256];
-#endif /* SYNTAX_TABLE */
-
-#endif /* emacs */

-static void re_compile_initialize Py_PROTO((void));
-static void re_compile_initialize()
+static void re_compile_initialize(void)
 {
   int a;
  
-#if !defined(emacs) && !defined(SYNTAX_TABLE)
   static int syntax_table_inited = 0;
   
   if (!syntax_table_inited)
@@ -171,7 +457,6 @@ static void re_compile_initialize()
      for (a = '0'; a <= '9'; a++)
 	 re_syntax_table[a] = Sword;
   }
-#endif /* !emacs && !SYNTAX_TABLE */
   re_compile_initialized = 1;
   for (a = 0; a < 256; a++)
   {
@@ -214,11 +499,6 @@ static void re_compile_initialize()
   regexp_plain_ops['.'] = Ranychar;
   if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
   {
-#ifdef emacs
-      regexp_quoted_ops['='] = Remacs_at_dot;
-      regexp_quoted_ops['s'] = Remacs_syntaxspec;
-      regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
-#endif /* emacs */
      regexp_quoted_ops['w'] = Rwordchar;
      regexp_quoted_ops['W'] = Rnotwordchar;
      regexp_quoted_ops['<'] = Rwordbeg;
@@ -250,8 +530,7 @@ static void re_compile_initialize()
   regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
 }

-int re_set_syntax(syntax)
-int syntax;
+int re_set_syntax(int syntax)
 {
   int ret;
   
@@ -262,9 +541,7 @@ int syntax;
   return ret;
 }

-static int hex_char_to_decimal Py_PROTO((int));
-static int hex_char_to_decimal(ch)
-int ch;
+static int hex_char_to_decimal(int ch)
 {
   if (ch >= '0' && ch <= '9')
      return ch - '0';
@@ -275,224 +552,659 @@ int ch;
   return 16;
 }

-char *re_compile_pattern(regex, size, bufp)
-char *regex;
-int size;
-regexp_t bufp;
+static void re_compile_fastmap_aux(char *code,
+				   int pos,
+				   char *visited,
+				   char *can_be_null,
+				   char *fastmap)
 {
-  int a, pos, op, current_level, level, opcode;
-  int pattern_offset = 0, alloc;
-  int starts[NUM_LEVELS * MAX_NESTING], starts_base;
-  int future_jumps[MAX_NESTING], num_jumps;
-  unsigned char ch = '\0';
-  char *pattern, *translate;
-  int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
-  int beginning_context;
-
-#define NEXTCHAR(var)			\
-  MACRO_BEGIN				\
-    if (pos >= size)			\
-      goto ends_prematurely;		\
-    (var) = regex[pos];			\
-    pos++;				\
-  MACRO_END
-
-#define ALLOC(amount)				\
-  MACRO_BEGIN					\
-    if (pattern_offset+(amount) > alloc)	\
-      {						\
-	alloc += 256 + (amount);		\
-	pattern = realloc(pattern, alloc);	\
-	if (!pattern)				\
-	  goto out_of_memory;			\
-      }						\
-  MACRO_END
-
-#define STORE(ch) pattern[pattern_offset++] = (ch)
-
-#define CURRENT_LEVEL_START (starts[starts_base + current_level])
-
-#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
-
-#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
-		            starts_base += NUM_LEVELS;			\
-                          else						\
-			    goto too_complex
-
-#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
-
-#define PUT_ADDR(offset,addr)				\
-  MACRO_BEGIN						\
-    int disp = (addr) - (offset) - 2;			\
-    pattern[(offset)] = disp & 0xff;			\
-    pattern[(offset)+1] = (disp>>8) & 0xff;		\
-  MACRO_END
-
-#define INSERT_JUMP(pos,type,addr)			\
-  MACRO_BEGIN						\
-    int a, p = (pos), t = (type), ad = (addr);		\
-    for (a = pattern_offset - 1; a >= p; a--)		\
-      pattern[a + 3] = pattern[a];			\
-    pattern[p] = t;					\
-    PUT_ADDR(p+1,ad);					\
-    pattern_offset += 3;				\
-  MACRO_END
-
-#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
-
-#define SET_FIELDS				\
-  MACRO_BEGIN					\
-    bufp->allocated = alloc;			\
-    bufp->buffer = pattern;			\
-    bufp->used = pattern_offset;		\
-  MACRO_END
-    
-#define GETHEX(var)						\
-  MACRO_BEGIN							\
-    char gethex_ch, gethex_value;				\
-    NEXTCHAR(gethex_ch);					\
-    gethex_value = hex_char_to_decimal(gethex_ch);		\
-    if (gethex_value == 16)					\
-      goto hex_error;						\
-    NEXTCHAR(gethex_ch);					\
-    gethex_ch = hex_char_to_decimal(gethex_ch);			\
-    if (gethex_ch == 16)					\
-      goto hex_error;						\
-    (var) = gethex_value * 16 + gethex_ch;			\
-  MACRO_END
-
-#define ANSI_TRANSLATE(ch)				\
-  MACRO_BEGIN						\
-    switch (ch)						\
-      {							\
-      case 'a':						\
-      case 'A':						\
-	ch = 7; /* audible bell */			\
-	break;						\
-      case 'b':						\
-      case 'B':						\
-	ch = 8; /* backspace */				\
-	break;						\
-      case 'f':						\
-      case 'F':						\
-	ch = 12; /* form feed */			\
-	break;						\
-      case 'n':						\
-      case 'N':						\
-	ch = 10; /* line feed */			\
-	break;						\
-      case 'r':						\
-      case 'R':						\
-	ch = 13; /* carriage return */			\
-	break;						\
-      case 't':						\
-      case 'T':						\
-	ch = 9; /* tab */				\
-	break;						\
-      case 'v':						\
-      case 'V':						\
-	ch = 11; /* vertical tab */			\
-	break;						\
-      case 'x': /* hex code */				\
-      case 'X':						\
-	GETHEX(ch);					\
-	break;						\
-      default:						\
-	/* other characters passed through */		\
-	if (translate)					\
-	  ch = translate[(unsigned char)ch];		\
-	break;						\
-      }							\
-  MACRO_END
+   int a;
+   int b;
+   int syntaxcode;
   
-  if (!re_compile_initialized)
-    re_compile_initialize();
-  bufp->used = 0;
-  bufp->fastmap_accurate = 0;
-  bufp->uses_registers = 0;
-  translate = bufp->translate;
-  pattern = bufp->buffer;
-  alloc = bufp->allocated;
-  if (alloc == 0 || pattern == NULL)
-    {
-      alloc = 256;
-      pattern = malloc(alloc);
-      if (!pattern)
-	goto out_of_memory;
-    }
-  pattern_offset = 0;
-  starts_base = 0;
-  num_jumps = 0;
-  current_level = 0;
-  SET_LEVEL_START;
-  num_open_registers = 0;
-  next_register = 1;
-  paren_depth = 0;
-  beginning_context = 1;
-  op = -1;
-  /* we use Rend dummy to ensure that pending jumps are updated (due to
-     low priority of Rend) before exiting the loop. */
-  pos = 0;
-  while (op != Rend)
+   if (visited[pos])
+      return;  /* we have already been here */
+   visited[pos] = 1;
+   for (;;)
+      switch (code[pos++])
      {
-      if (pos >= size)
-	op = Rend;
-      else
+	 case Cend:
 	 {
-	  NEXTCHAR(ch);
-	  if (translate)
-	    ch = translate[(unsigned char)ch];
-	  op = regexp_plain_ops[(unsigned char)ch];
-	  if (op == Rquote)
+	    *can_be_null = 1;
+	    return;
+	 }
+	 case Cbol:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
 	 {
-	      NEXTCHAR(ch);
-	      op = regexp_quoted_ops[(unsigned char)ch];
-	      if (op == Rnormal && regexp_ansi_sequences)
-		ANSI_TRANSLATE(ch);
+	    break;
 	 }
+	 case Csyntaxspec:
+	 {
+	    syntaxcode = code[pos++];
+	    for (a = 0; a < 256; a++)
+	       if (SYNTAX(a) == syntaxcode)
+		  fastmap[a] = 1;
+	    return;
 	 }
-      level = regexp_precedences[op];
-      /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
-	     ch, op, level, current_level, CURRENT_LEVEL_START); */
-      if (level > current_level)
+	 case Cnotsyntaxspec:
 	 {
-	  for (current_level++; current_level < level; current_level++)
-	    SET_LEVEL_START;
-	  SET_LEVEL_START;
+	    syntaxcode = code[pos++];
+	    for (a = 0; a < 256; a++)
+	       if (SYNTAX(a) != syntaxcode)
+		  fastmap[a] = 1;
+	    return;
 	 }
-      else
-	if (level < current_level)
+	 case Ceol:
 	 {
-	    current_level = level;
-	    for (;num_jumps > 0 &&
-		 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
-		 num_jumps--)
-	      PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
+	    fastmap['\n'] = 1;
+	    if (*can_be_null == 0)
+	       *can_be_null = 2; /* can match null, but only at end of buffer*/
+	    return;
 	 }
-      switch (op)
+	 case Cset:
 	 {
-	case Rend:
-	  break;
-	case Rnormal:
-	normal_char:
-	  opcode = Cexact;
+	    for (a = 0; a < 256/8; a++)
+	       if (code[pos + a] != 0)
+		  for (b = 0; b < 8; b++)
+		     if (code[pos + a] & (1 << b))
+			fastmap[(a << 3) + b] = 1;
+	    pos += 256/8;
+	    return;
+	 }
+	 case Cexact:
+	 {
+	    fastmap[(unsigned char)code[pos]] = 1;
+	    return;
+	 }
+	 case Canychar:
+	 {
+	    for (a = 0; a < 256; a++)
+	       if (a != '\n')
+		  fastmap[a] = 1;
+	    return;
+	 }
+	 case Cstart_memory:
+	 case Cend_memory:
+	 {
+	    pos++;
+	    break;
+	 }
+	 case Cmatch_memory:
+	 {
+	    for (a = 0; a < 256; a++)
+	       fastmap[a] = 1;
+	    *can_be_null = 1;
+	    return;
+	 }
+	 case Cjump:
+	 case Cdummy_failure_jump:
+	 case Cupdate_failure_jump:
+	 case Cstar_jump:
+	 {
+	    a = (unsigned char)code[pos++];
+	    a |= (unsigned char)code[pos++] << 8;
+	    pos += (int)(short)a;
+	    if (visited[pos])
+	    {
+	       /* argh... the regexp contains empty loops.  This is not
+		  good, as this may cause a failure stack overflow when
+		  matching.  Oh well. */
+	       /* this path leads nowhere; pursue other paths. */
+	       return;
+	    }
+	    visited[pos] = 1;
+	    break;
+	 }
+	 case Cfailure_jump:
+	 {
+	    a = (unsigned char)code[pos++];
+	    a |= (unsigned char)code[pos++] << 8;
+	    a = pos + (int)(short)a;
+	    re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
+	    break;
+	 }
+	 default:
+	 {
+	    abort();  /* probably some opcode is missing from this switch */
+	    /*NOTREACHED*/
+	 }
+      }
+}
+
+static int re_do_compile_fastmap(char *buffer,
+				 int used,
+				 int pos,
+				 char *can_be_null,
+				 char *fastmap)
+{
+   char small_visited[512], *visited;
+   
+   if (used <= sizeof(small_visited))
+      visited = small_visited;
+   else
+   {
+      visited = malloc(used);
+      if (!visited)
+	 return 0;
+   }
+   *can_be_null = 0;
+   memset(fastmap, 0, 256);
+   memset(visited, 0, used);
+   re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
+   if (visited != small_visited)
+      free(visited);
+   return 1;
+}
+
+void re_compile_fastmap(regexp_t bufp)
+{
+   if (!bufp->fastmap || bufp->fastmap_accurate)
+      return;
+   assert(bufp->used > 0);
+   if (!re_do_compile_fastmap(bufp->buffer,
+			      bufp->used,
+			      0,
+			      &bufp->can_be_null,
+			      bufp->fastmap))
+      return;
+   if (bufp->buffer[0] == Cbol)
+      bufp->anchor = 1;   /* begline */
+   else
+      if (bufp->buffer[0] == Cbegbuf)
+	 bufp->anchor = 2; /* begbuf */
+      else
+	 bufp->anchor = 0; /* none */
+   bufp->fastmap_accurate = 1;
+}
+
+/* 
+ * star is coded as:
+ * 1: failure_jump 2
+ *    ... code for operand of star
+ *    star_jump 1
+ * 2: ... code after star
+ *
+ * We change the star_jump to update_failure_jump if we can determine
+ * that it is safe to do so; otherwise we change it to an ordinary
+ * jump.
+ *
+ * plus is coded as
+ *
+ *    jump 2
+ * 1: failure_jump 3
+ * 2: ... code for operand of plus
+ *    star_jump 1
+ * 3: ... code after plus
+ *
+ * For star_jump considerations this is processed identically to star.
+ *
+ */
+
+static int re_optimize_star_jump(regexp_t bufp, char *code)
+{
+   char map[256];
+   char can_be_null;
+   char *p1;
+   char *p2;
+   char ch;
+   int a;
+   int b;
+
+   a = (unsigned char)*code++;
+   a |= (unsigned char)*code++ << 8;
+   a = (int)(short)a;
+
+   p1 = code + a + 3; /* skip the failure_jump */
+   assert(p1[-3] == Cfailure_jump);
+   p2 = code;
+   /* p1 points inside loop, p2 points to after loop */
+   if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
+			      p2 - bufp->buffer, &can_be_null, map))
+      goto make_normal_jump;
+   
+   /* If we might introduce a new update point inside the
+    * loop, we can't optimize because then update_jump would
+    * update a wrong failure point.  Thus we have to be
+    * quite careful here.
+    */
+      
+   /* loop until we find something that consumes a character */
+  loop_p1:
+   switch (*p1++)
+   {
+      case Cbol:
+      case Ceol:
+      case Cbegbuf:
+      case Cendbuf:
+      case Cwordbeg:
+      case Cwordend:
+      case Cwordbound:
+      case Cnotwordbound:
+      {
+	 goto loop_p1;
+      }
+      case Cstart_memory:
+      case Cend_memory:
+      {
+	 p1++;
+	 goto loop_p1;
+      }
+      case Cexact:
+      {
+	 ch = (unsigned char)*p1++;
+	 if (map[ch])
+	    goto make_normal_jump;
+	 break;
+      }
+      case Canychar:
+      {
+	 for (b = 0; b < 256; b++)
+	    if (b != '\n' && map[b])
+	       goto make_normal_jump;
+	 break;
+      }
+      case Cset:
+      {
+	 for (b = 0; b < 256; b++)
+	    if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
+	       goto make_normal_jump;
+	 p1 += 256/8;
+	 break;
+      }
+      default:
+      {
+	 goto make_normal_jump;
+      }
+   }
+   /* now we know that we can't backtrack. */
+   while (p1 != p2 - 3)
+   {
+      switch (*p1++)
+      {
+	 case Cend:
+	 {
+	    return 0;
+	 }
+	 case Cbol:
+	 case Ceol:
+	 case Canychar:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
+	 {
+	    break;
+	 }
+	 case Cset:
+	 {
+	    p1 += 256/8;
+	    break;
+	 }
+	 case Cexact:
+	 case Cstart_memory:
+	 case Cend_memory:
+	 case Cmatch_memory:
+	 case Csyntaxspec:
+	 case Cnotsyntaxspec:
+	 {
+	    p1++;
+	    break;
+	 }
+	 case Cjump:
+	 case Cstar_jump:
+	 case Cfailure_jump:
+	 case Cupdate_failure_jump:
+	 case Cdummy_failure_jump:
+	 {
+	    goto make_normal_jump;
+	 }
+	 default:
+	 {
+	    return 0;
+	    break;
+	 }
+      }
+   }
+
+  make_update_jump:
+   code -= 3;
+   a += 3;  /* jump to after the Cfailure_jump */
+   code[0] = Cupdate_failure_jump;
+   code[1] = a & 0xff;
+   code[2] = a >> 8;
+   return 1;
+
+  make_normal_jump:
+   code -= 3;
+   *code = Cjump;
+   return 1;
+}
+
+static int re_optimize(regexp_t bufp)
+{
+   char *code;
+
+   code = bufp->buffer;
+
+   while(1)
+   {
+      switch (*code++)
+      {
+	 case Cend:
+	 {
+	    return 1;
+	 }
+	 case Canychar:
+	 case Cbol:
+	 case Ceol:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
+	 {
+	    break;
+	 }
+	 case Cset:
+	 {
+	    code += 256/8;
+	    break;
+	 }
+	 case Cexact:
+	 case Cstart_memory:
+	 case Cend_memory:
+	 case Cmatch_memory:
+	 case Csyntaxspec:
+	 case Cnotsyntaxspec:
+	 {
+	    code++;
+	    break;
+	 }
+	 case Cstar_jump:
+	 {
+	    if (!re_optimize_star_jump(bufp, code))
+	    {
+	       return 0;
+	    }
+	    /* fall through */
+	 }
+	 case Cupdate_failure_jump:
+	 case Cjump:
+	 case Cdummy_failure_jump:
+	 case Cfailure_jump:
+	 {
+	    code += 2;
+	    break;
+	 }
+	 default:
+	 {
+	    return 0;
+	 }
+      }
+   }
+}
+
+#define NEXTCHAR(var) \
+{ \
+   if (pos >= size) \
+      goto ends_prematurely; \
+   (var) = regex[pos]; \
+   pos++; \
+}
+
+#define ALLOC(amount) \
+{ \
+   if (pattern_offset+(amount) > alloc) \
+   { \
+      alloc += 256 + (amount); \
+      pattern = realloc(pattern, alloc); \
+      if (!pattern) \
+	 goto out_of_memory; \
+   } \
+}
+
+#define STORE(ch) pattern[pattern_offset++] = (ch)
+
+#define CURRENT_LEVEL_START (starts[starts_base + current_level])
+
+#define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
+
+#define PUSH_LEVEL_STARTS \
+   if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
+      starts_base += NUM_LEVELS; \
+   else \
+      goto too_complex
+
+#define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
+
+#define PUT_ADDR(offset,addr) \
+{ \
+   int disp = (addr) - (offset) - 2; \
+   pattern[(offset)] = disp & 0xff; \
+   pattern[(offset)+1] = (disp>>8) & 0xff; \
+}
+
+#define INSERT_JUMP(pos,type,addr) \
+{ \
+   int a, p = (pos), t = (type), ad = (addr); \
+   for (a = pattern_offset - 1; a >= p; a--) \
+      pattern[a + 3] = pattern[a]; \
+   pattern[p] = t; \
+   PUT_ADDR(p+1,ad); \
+   pattern_offset += 3; \
+}
+#define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
+
+#define SET_FIELDS \
+{ \
+   bufp->allocated = alloc; \
+   bufp->buffer = pattern; \
+   bufp->used = pattern_offset; \
+}
+    
+#define GETHEX(var) \
+{ \
+   char gethex_ch, gethex_value; \
+   NEXTCHAR(gethex_ch); \
+   gethex_value = hex_char_to_decimal(gethex_ch); \
+   if (gethex_value == 16) \
+      goto hex_error; \
+   NEXTCHAR(gethex_ch); \
+   gethex_ch = hex_char_to_decimal(gethex_ch); \
+   if (gethex_ch == 16) \
+      goto hex_error; \
+   (var) = gethex_value * 16 + gethex_ch; \
+}
+
+#define ANSI_TRANSLATE(ch)  \
+{ \
+   switch (ch) \
+   { \
+      case 'a': \
+      case 'A': \
+      { \
+	 ch = 7; /* audible bell */ \
+	 break; \
+      } \
+      case 'b': \
+      case 'B': \
+      { \
+	 ch = 8; /* backspace */ \
+	 break; \
+      } \
+      case 'f': \
+      case 'F': \
+      { \
+	 ch = 12; /* form feed */ \
+	 break; \
+      } \
+      case 'n': \
+      case 'N': \
+      { \
+	 ch = 10; /* line feed */ \
+	 break; \
+      } \
+      case 'r': \
+      case 'R': \
+      { \
+	 ch = 13; /* carriage return */ \
+	 break; \
+      } \
+      case 't': \
+      case 'T': \
+      { \
+	 ch = 9; /* tab */ \
+	 break; \
+      } \
+      case 'v': \
+      case 'V': \
+      { \
+	 ch = 11; /* vertical tab */ \
+	 break; \
+      } \
+      case 'x': /* hex code */ \
+      case 'X': \
+      { \
+	 GETHEX(ch); \
+	 break; \
+      } \
+      default: \
+      { \
+	 /* other characters passed through */ \
+	 if (translate) \
+	    ch = translate[(unsigned char)ch]; \
+	 break; \
+      } \
+   } \
+}
+
+char *re_compile_pattern(char *regex, int size, regexp_t bufp)
+{
+   int a;
+   int pos;
+   int op;
+   int current_level;
+   int level;
+   int opcode;
+   int pattern_offset, alloc;
+   int starts[NUM_LEVELS * MAX_NESTING];
+   int starts_base;
+   int future_jumps[MAX_NESTING];
+   int num_jumps;
+   unsigned char ch;
+   char *pattern;
+   char *translate;
+   int next_register;
+   int paren_depth;
+   int num_open_registers;
+   int open_registers[RE_NREGS];
+   int beginning_context;
+
+   if (!re_compile_initialized)
+      re_compile_initialize();
+   bufp->used = 0;
+   bufp->fastmap_accurate = 0;
+   bufp->uses_registers = 0;
+   translate = bufp->translate;
+   pattern = bufp->buffer;
+   alloc = bufp->allocated;
+   if (alloc == 0 || pattern == NULL)
+   {
+      alloc = 256;
+      pattern = malloc(alloc);
+      if (!pattern)
+	 goto out_of_memory;
+   }
+   pattern_offset = 0;
+   starts_base = 0;
+   num_jumps = 0;
+   current_level = 0;
+   SET_LEVEL_START;
+   num_open_registers = 0;
+   next_register = 1;
+   paren_depth = 0;
+   beginning_context = 1;
+   op = -1;
+   /* we use Rend dummy to ensure that pending jumps are updated (due to
+      low priority of Rend) before exiting the loop. */
+   pos = 0;
+   while (op != Rend)
+   {
+      if (pos >= size)
+	 op = Rend;
+      else
+      {
+	 NEXTCHAR(ch);
+	 if (translate)
+	    ch = translate[(unsigned char)ch];
+	 op = regexp_plain_ops[(unsigned char)ch];
+	 if (op == Rquote)
+	 {
+	    NEXTCHAR(ch);
+	    op = regexp_quoted_ops[(unsigned char)ch];
+	    if (op == Rnormal && regexp_ansi_sequences)
+	       ANSI_TRANSLATE(ch);
+	 }
+      }
+      level = regexp_precedences[op];
+      /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
+	 ch, op, level, current_level, CURRENT_LEVEL_START); */
+      if (level > current_level)
+      {
+	 for (current_level++; current_level < level; current_level++)
+	    SET_LEVEL_START;
+	 SET_LEVEL_START;
+      }
+      else
+	 if (level < current_level)
+	 {
+	    current_level = level;
+	    for (;num_jumps > 0 &&
+		    future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
+		 num_jumps--)
+	       PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
+	 }
+      switch (op)
+      {
+	 case Rend:
+	 {
+	    break;
+	 }
+	 case Rnormal:
+	 {
+	   normal_char:
+	    opcode = Cexact;
 	   store_opcode_and_arg: /* opcode & ch must be set */
 	    SET_LEVEL_START;
 	    ALLOC(2);
 	    STORE(opcode);
 	    STORE(ch);
 	    break;
+	 }
 	 case Ranychar:
+	 {
 	    opcode = Canychar;
 	   store_opcode:
 	    SET_LEVEL_START;
 	    ALLOC(1);
 	    STORE(opcode);
 	    break;
+	 }
 	 case Rquote:
+	 {
 	    abort();
 	    /*NOTREACHED*/
+	 }
 	 case Rbol:
+	 {
 	    if (!beginning_context)
 	       if (regexp_context_indep_ops)
 		  goto op_error;
@@ -500,7 +1212,9 @@ regexp_t bufp;
 		  goto normal_char;
 	    opcode = Cbol;
 	    goto store_opcode;
+	 }
 	 case Reol:
+	 {
 	    if (!((pos >= size) ||
 		  ((regexp_syntax & RE_NO_BK_VBAR) ?
 		   (regex[pos] == '\174') :
@@ -518,7 +1232,9 @@ regexp_t bufp;
 	    goto store_opcode;
 	    /* NOTREACHED */
 	    break;
+	 }
 	 case Roptional:
+	 {
 	    if (beginning_context)
 	       if (regexp_context_indep_ops)
 		  goto op_error;
@@ -530,8 +1246,10 @@ regexp_t bufp;
 	    INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
 			pattern_offset + 3);
 	    break;
+	 }
 	 case Rstar:
 	 case Rplus:
+	 {
 	    if (beginning_context)
 	       if (regexp_context_indep_ops)
 		  goto op_error;
@@ -547,7 +1265,9 @@ regexp_t bufp;
 	       INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
 			   CURRENT_LEVEL_START + 6);
 	    break;
+	 }
 	 case Ror:
+	 {
 	    ALLOC(6);
 	    INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
 			pattern_offset + 6);
@@ -559,7 +1279,9 @@ regexp_t bufp;
 	    STORE(0);
 	    SET_LEVEL_START;
 	    break;
+	 }
 	 case Ropenpar:
+	 {
 	    SET_LEVEL_START;
 	    if (next_register < RE_NREGS)
 	    {
@@ -575,7 +1297,9 @@ regexp_t bufp;
 	    current_level = 0;
 	    SET_LEVEL_START;
 	    break;
+	 }
 	 case Rclosepar:
+	 {
 	    if (paren_depth <= 0)
 	       goto parenthesis_error;
 	    POP_LEVEL_STARTS;
@@ -590,7 +1314,9 @@ regexp_t bufp;
 	       STORE(open_registers[num_open_registers]);
 	    }
 	    break;
+	 }
 	 case Rmemory:
+	 {
 	    if (ch == '0')
 	       goto bad_match_register;
 	    assert(ch >= '0' && ch <= '9');
@@ -598,7 +1324,9 @@ regexp_t bufp;
 	    opcode = Cmatch_memory;
 	    ch -= '0';
 	    goto store_opcode_and_arg;
+	 }
 	 case Rextended_memory:
+	 {
 	    NEXTCHAR(ch);
 	    if (ch < '0' || ch > '9')
 	       goto bad_match_register;
@@ -611,9 +1339,14 @@ regexp_t bufp;
 	    bufp->uses_registers = 1;
 	    opcode = Cmatch_memory;
 	    goto store_opcode_and_arg;
+	 }
 	 case Ropenset:
 	 {
-	    int complement,prev,offset,range,firstchar;
+	    int complement;
+	    int prev;
+	    int offset;
+	    int range;
+	    int firstchar;
 	    
 	    SET_LEVEL_START;
 	    ALLOC(1+256/8);
@@ -673,53 +1406,52 @@ regexp_t bufp;
 	    break;
 	 }
 	 case Rbegbuf:
+	 {
 	    opcode = Cbegbuf;
 	    goto store_opcode;
+	 }
 	 case Rendbuf:
+	 {
 	    opcode = Cendbuf;
 	    goto store_opcode;
+	 }
 	 case Rwordchar:
+	 {
 	    opcode = Csyntaxspec;
 	    ch = Sword;
 	    goto store_opcode_and_arg;
+	 }
 	 case Rnotwordchar:
+	 {
 	    opcode = Cnotsyntaxspec;
 	    ch = Sword;
 	    goto store_opcode_and_arg;
+	 }
 	 case Rwordbeg:
+	 {
 	    opcode = Cwordbeg;
 	    goto store_opcode;
+	 }
 	 case Rwordend:
+	 {
 	    opcode = Cwordend;
 	    goto store_opcode;
+	 }
 	 case Rwordbound:
+	 {
 	    opcode = Cwordbound;
 	    goto store_opcode;
+	 }
 	 case Rnotwordbound:
+	 {
 	    opcode = Cnotwordbound;
 	    goto store_opcode;
-#ifdef emacs
-	case Remacs_at_dot:
-	  opcode = Cemacs_at_dot;
-	  goto store_opcode;
-	case Remacs_syntaxspec:
-	  NEXTCHAR(ch);
-	  if (translate)
-	    ch = translate[(unsigned char)ch];
-	  opcode = Csyntaxspec;
-	  ch = syntax_spec_code[(unsigned char)ch];
-	  goto store_opcode_and_arg;
-	case Remacs_notsyntaxspec:
-	  NEXTCHAR(ch);
-	  if (translate)
-	    ch = translate[(unsigned char)ch];
-	  opcode = Cnotsyntaxspec;
-	  ch = syntax_spec_code[(unsigned char)ch];
-	  goto store_opcode_and_arg;
-#endif /* emacs */
+	 }
 	 default:
+	 {
 	    abort();
 	 }
+      }
      beginning_context = (op == Ropenpar || op == Ror);
   }
   if (starts_base != 0)
@@ -728,6 +1460,8 @@ regexp_t bufp;
   ALLOC(1);
   STORE(Cend);
   SET_FIELDS;
+   if(!re_optimize(bufp))
+      return "Optimization error";
   return NULL;

  op_error:
@@ -758,6 +1492,7 @@ regexp_t bufp;
   SET_FIELDS;
   return "Regular expression too complex";
 }
+
 #undef CHARAT
 #undef NEXTCHAR
 #undef GETHEX
@@ -772,643 +1507,349 @@ regexp_t bufp;
 #undef SETBIT
 #undef SET_FIELDS

-static void re_compile_fastmap_aux
-	Py_PROTO((char *, int, char *, char *, char *));
-static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
-char *code, *visited, *can_be_null, *fastmap;
-int pos;
-{
-  int a, b, syntaxcode;
-
-  if (visited[pos])
-    return;  /* we have already been here */
-  visited[pos] = 1;
-  for (;;)
-    switch (code[pos++])
-      {
-      case Cend:
-	*can_be_null = 1;
-	return;
-      case Cbol:
-      case Cbegbuf:
-      case Cendbuf:
-      case Cwordbeg:
-      case Cwordend:
-      case Cwordbound:
-      case Cnotwordbound:
-#ifdef emacs
-      case Cemacs_at_dot:
-#endif /* emacs */
-	break;
-      case Csyntaxspec:
-	syntaxcode = code[pos++];
-	for (a = 0; a < 256; a++)
-	  if (SYNTAX(a) == syntaxcode)
-	    fastmap[a] = 1;
-	return;
-      case Cnotsyntaxspec:
-	syntaxcode = code[pos++];
-	for (a = 0; a < 256; a++)
-	  if (SYNTAX(a) != syntaxcode)
-	    fastmap[a] = 1;
-	return;
-      case Ceol:
-	fastmap['\n'] = 1;
-	if (*can_be_null == 0)
-	  *can_be_null = 2;  /* can match null, but only at end of buffer*/
-	return;
-      case Cset:
-	for (a = 0; a < 256/8; a++)
-	  if (code[pos + a] != 0)
-	    for (b = 0; b < 8; b++)
-	      if (code[pos + a] & (1 << b))
-		fastmap[(a << 3) + b] = 1;
-	pos += 256/8;
-	return;
-      case Cexact:
-	fastmap[(unsigned char)code[pos]] = 1;
-	return;
-      case Canychar:
-	for (a = 0; a < 256; a++)
-	  if (a != '\n')
-	    fastmap[a] = 1;
-	return;
-      case Cstart_memory:
-      case Cend_memory:
-	pos++;
-	break;
-      case Cmatch_memory:
-	/* should this ever happen for sensible patterns??? */
-	*can_be_null = 1;
-	return;
-      case Cjump:
-      case Cdummy_failure_jump:
-      case Cupdate_failure_jump:
-      case Cstar_jump:
-	a = (unsigned char)code[pos++];
-	a |= (unsigned char)code[pos++] << 8;
-	pos += (int)(short)a;
-	if (visited[pos])
-	  {
-	    /* argh... the regexp contains empty loops.  This is not
-	       good, as this may cause a failure stack overflow when
-	       matching.  Oh well. */
-	    /* this path leads nowhere; pursue other paths. */
-	    return;
-	  }
-	visited[pos] = 1;
-	break;
-      case Cfailure_jump:
-	a = (unsigned char)code[pos++];
-	a |= (unsigned char)code[pos++] << 8;
-	a = pos + (int)(short)a;
-	re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
-	break;
-      default:
-	abort();  /* probably some opcode is missing from this switch */
-	/*NOTREACHED*/
-      }
-}
-
-static int re_do_compile_fastmap Py_PROTO((char *, int, int, char *, char *));
-static int re_do_compile_fastmap(buffer, used, pos, can_be_null, fastmap)
-char *buffer, *fastmap, *can_be_null;
-int used, pos;
-{
-  char small_visited[512], *visited;
-
-  if (used <= sizeof(small_visited))
-    visited = small_visited;
-  else
-    {
-      visited = malloc(used);
-      if (!visited)
-	return 0;
-    }
-  *can_be_null = 0;
-  memset(fastmap, 0, 256);
-  memset(visited, 0, used);
-  re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
-  if (visited != small_visited)
-    free(visited);
-  return 1;
-}
+#define PREFETCH if (text == textend) goto fail

-void re_compile_fastmap(bufp)
-regexp_t bufp;
+#define NEXTCHAR(var) \
+PREFETCH; \
+var = (unsigned char)*text++; \
+if (translate) \
+   var = translate[var]
+
+int re_match(regexp_t bufp,
+	     char *string,
+	     int size,
+	     int pos,
+	     regexp_registers_t old_regs)
 {
-  if (!bufp->fastmap || bufp->fastmap_accurate)
-    return;
-  assert(bufp->used > 0);
-  if (!re_do_compile_fastmap(bufp->buffer, bufp->used, 0, &bufp->can_be_null,
-			     bufp->fastmap))
-    return;
-  if (bufp->buffer[0] == Cbol)
-    bufp->anchor = 1;   /* begline */
-  else
-    if (bufp->buffer[0] == Cbegbuf)
-      bufp->anchor = 2; /* begbuf */
-    else
-      bufp->anchor = 0; /* none */
-  bufp->fastmap_accurate = 1;
-}
-
-#define INITIAL_FAILURES  128  /* initial # failure points to allocate */
-#define MAX_FAILURES     4100L /* max # of failure points before failing */
+  char *code;
+  char *translate;
+  char *text;
+  char *textstart;
+  char *textend;
+  int a;
+  int b;
+  int ch;
+  int reg;
+  int match_end;
+  char *regstart;
+  char *regend;
+  int regsize;
+  match_state state;
+  
+  assert(pos >= 0 && size >= 0);
+  assert(pos <= size);
+  
+  text = string + pos;
+  textstart = string;
+  textend = string + size;
  
-int re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop)
-regexp_t bufp;
-char *string1, *string2;
-int size1, size2, pos, mstop;
-regexp_registers_t regs;
-{
-  struct failure_point { char *text, *partend, *code; }
-    *failure_stack_start, *failure_sp, *failure_stack_end,
-    initial_failure_stack[INITIAL_FAILURES];
-  char *code, *translate, *text, *textend, *partend, *part_2_end;
-  char *regstart_text[RE_NREGS], *regstart_partend[RE_NREGS];
-  char *regend_text[RE_NREGS], *regend_partend[RE_NREGS];
-  int a, b, ch, reg, regch, match_end;
-  char *regtext, *regpartend, *regtextend;
-
-#define PREFETCH					\
-  MACRO_BEGIN						\
-    if (text == partend)				\
-      {							\
-	if (text == textend)				\
-	  goto fail;					\
-	text = string2;					\
-	partend = part_2_end;				\
-      }							\
-  MACRO_END
+  code = bufp->buffer;
  
-#define NEXTCHAR(var)				\
-  MACRO_BEGIN					\
-    PREFETCH;					\
-    (var) = (unsigned char)*text++;		\
-    if (translate)				\
-      (var) = (unsigned char)translate[(var)];	\
-  MACRO_END
+  translate = bufp->translate;
+/*   translated = NULL; */
+/*   if (bufp->translate) */
+/*   { */
+/*      char *t1; */
+/*      char *t2; */
     
-  assert(pos >= 0 && size1 >= 0 && size2 >= 0 && mstop >= 0);
-  assert(mstop <= size1 + size2);
-  assert(pos <= mstop);
+/*      translated = malloc(size); */
+/*      if (translated == NULL) */
+/* 	goto error; */

-  if (pos <= size1)
-    {
-      text = string1 + pos;
-      if (mstop <= size1)
-	{
-	  partend = string1 + mstop;
-	  textend = partend;
-	}
-      else
-	{
-	  partend = string1 + size1;
-	  textend = string2 + mstop - size1;
-	}
-      part_2_end = string2 + mstop - size1;
-    }
-  else
-    {
-      text = string2 + pos - size1;
-      partend = string2 + mstop - size1;
-      textend = partend;
-      part_2_end = partend;
-    }
+/*      t1 = string; */
+/*      t2 = translated; */
+/*      while(t1 < textend) */
+/* 	*t2++ = bufp->translate[*t1++]; */
     
-  if (bufp->uses_registers && regs != NULL)
-    for (a = 0; a < RE_NREGS; a++)
-      regend_text[a] = NULL;
+/*      text = translated + pos; */
+/*      textstart = translated; */
+/*      textend = translated + size; */
+/*   } */
  
-  code = bufp->buffer;
-  translate = bufp->translate;
-  failure_stack_start = failure_sp = initial_failure_stack;
-  failure_stack_end = initial_failure_stack + INITIAL_FAILURES;
-
-#if 0
-  /* re_search_2 has already done this, and otherwise we get little benefit
-     from this.  So I'll leave this out. */
-  if (bufp->fastmap_accurate && !bufp->can_be_null &&
-      text != textend &&
-      !bufp->fastmap[translate ?
-		     (unsigned char)translate[(unsigned char)*text] :
-		     (unsigned char)*text])
-    return -1;  /* it can't possibly match */
-#endif
+  NEW_STATE(state);
  
  continue_matching:
-  for (;;)
-    {
  switch (*code++)
  {
     case Cend:
-	  if (partend != part_2_end)
-	    match_end = text - string1;
-	  else
-	    match_end = text - string2 + size1;
-	  if (regs)
     {
-	      regs->start[0] = pos;
-	      regs->end[0] = match_end;
+	match_end = text - textstart;
+	if (old_regs)
+	{
+	   old_regs->start[0] = pos;
+	   old_regs->end[0] = match_end;
 	   if (!bufp->uses_registers)
 	   {
 	      for (a = 1; a < RE_NREGS; a++)
 	      {
-		      regs->start[a] = -1;
-		      regs->end[a] = -1;
+		 old_regs->start[a] = -1;
+		 old_regs->end[a] = -1;
 	      }
 	   }
 	   else
 	   {
 	      for (a = 1; a < RE_NREGS; a++)
 	      {
-		      if (regend_text[a] == NULL)
+		 if ((GET_REG_START(state, a) == NULL) ||
+		     (GET_REG_END(state, a) == NULL))
 		 {
-			  regs->start[a] = -1;
-			  regs->end[a] = -1;
+		    old_regs->start[a] = -1;
+		    old_regs->end[a] = -1;
 		    continue;
 		 }
-		      if (regstart_partend[a] != part_2_end)
-			regs->start[a] = regstart_text[a] - string1;
-		      else
-			regs->start[a] = regstart_text[a] - string2 + size1;
-		      if (regend_partend[a] != part_2_end)
-			regs->end[a] = regend_text[a] - string1;
-		      else
-			regs->end[a] = regend_text[a] - string2 + size1;
+		 old_regs->start[a] = GET_REG_START(state, a) - textstart;
+		 old_regs->end[a] = GET_REG_END(state, a) - textstart;
 	      }
 	   }
 	}
-	  if (failure_stack_start != initial_failure_stack)
-	    free((char *)failure_stack_start);
+/* 	if(translated) */
+/* 	   free(translated); */
+	FREE_STATE(state);
 	return match_end - pos;
-	case Cbol:
-	  if (text == string1 || text[-1] == '\n') /* text[-1] always valid */
-	    break;
-	  goto fail;
-	case Ceol:
-	  if (text == string2 + size2 ||
-	      (text == string1 + size1 ?
-	       (size2 == 0 || *string2 == '\n') :
-	       *text == '\n'))
-	    break;
-	  goto fail;
-	case Cset:
-	  NEXTCHAR(ch);
-	  if (code[ch/8] & (1<<(ch & 7)))
-	    {
-	      code += 256/8;
-	      break;
     }
-	  goto fail;
-	case Cexact:
-	  NEXTCHAR(ch);
-	  if (ch != (unsigned char)*code++)
-	    goto fail;
-	  break;
-	case Canychar:
-	  NEXTCHAR(ch);
-	  if (ch == '\n')
-	    goto fail;
-	  break;
-	case Cstart_memory:
-	  reg = *code++;
-	  regstart_text[reg] = text;
-	  regstart_partend[reg] = partend;
-	  break;
-	case Cend_memory:
-	  reg = *code++;
-	  regend_text[reg] = text;
-	  regend_partend[reg] = partend;
-	  break;
-	case Cmatch_memory:
-	  reg = *code++;
-	  if (regend_text[reg] == NULL)
-	    goto fail;  /* or should we just match nothing? */
-	  regtext = regstart_text[reg];
-	  regtextend = regend_text[reg];
-	  if (regstart_partend[reg] == regend_partend[reg])
-	    regpartend = regtextend;
-	  else
-	    regpartend = string1 + size1;
-	  
-	  for (;regtext != regtextend;)
+     case Cbol:
     {
-	      NEXTCHAR(ch);
-	      if (regtext == regpartend)
-		regtext = string2;
-	      regch = (unsigned char)*regtext++;
-	      if (translate)
-		regch = (unsigned char)translate[regch];
-	      if (regch != ch)
+	if (text == textstart || text[-1] == '\n')
+	   goto continue_matching;
 	goto fail;
     }
-	  break;
-	case Cstar_jump:
-	  /* star is coded as:
-	       1: failure_jump 2
-	          ... code for operand of star
-		  star_jump 1
-	       2: ... code after star
-	     We change the star_jump to update_failure_jump if we can determine
-	     that it is safe to do so; otherwise we change it to an ordinary
-	     jump.
-	     plus is coded as
-	          jump 2
-	       1: failure_jump 3
-	       2: ... code for operand of plus
-	          star_jump 1
-	       3: ... code after plus
-	     For star_jump considerations this is processed identically
-	     to star. */
-	  a = (unsigned char)*code++;
-	  a |= (unsigned char)*code++ << 8;
-	  a = (int)(short)a;
-	  {
-	    char map[256], can_be_null;
-	    char *p1, *p2;
-
-	    p1 = code + a + 3; /* skip the failure_jump */
-	    assert(p1[-3] == Cfailure_jump);
-	    p2 = code;
-	    /* p1 points inside loop, p2 points to after loop */
-	    if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
-				       p2 - bufp->buffer, &can_be_null, map))
-	      goto make_normal_jump;
-	    /* If we might introduce a new update point inside the loop,
-	       we can't optimize because then update_jump would update a
-	       wrong failure point.  Thus we have to be quite careful here. */
-	  loop_p1:
-	    /* loop until we find something that consumes a character */
-	    switch (*p1++)
-	      {
-              case Cbol:
     case Ceol:
-              case Cbegbuf:
-              case Cendbuf:
-              case Cwordbeg:
-              case Cwordend:
-              case Cwordbound:
-              case Cnotwordbound:
-#ifdef emacs
-              case Cemacs_at_dot:
-#endif /* emacs */
-                goto loop_p1;
-              case Cstart_memory:
-              case Cend_memory:
-                p1++;
-                goto loop_p1;
-	      case Cexact:
-		ch = (unsigned char)*p1++;
-		if (map[ch])
-		  goto make_normal_jump;
-		break;
-	      case Canychar:
-		for (b = 0; b < 256; b++)
-		  if (b != '\n' && map[b])
-		    goto make_normal_jump;
-		break;
-	      case Cset:
-		for (b = 0; b < 256; b++)
-		  if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
-		    goto make_normal_jump;
-		p1 += 256/8;
-		break;
-	      default:
-		goto make_normal_jump;
-	      }
-	    /* now we know that we can't backtrack. */
-	    while (p1 != p2 - 3)
-	      {
-		switch (*p1++)
     {
-		  case Cend:
-		    abort();  /* we certainly shouldn't get this inside loop */
-		    /*NOTREACHED*/
-		  case Cbol:
-		  case Ceol:
-		  case Canychar:
-		  case Cbegbuf:
-		  case Cendbuf:
-		  case Cwordbeg:
-		  case Cwordend:
-		  case Cwordbound:
-		  case Cnotwordbound:
-#ifdef emacs
-		  case Cemacs_at_dot:
-#endif /* emacs */
-		    break;
+	if (text == textend || *text == '\n')
+	   goto continue_matching;
+	goto fail;
+     }
     case Cset:
-		    p1 += 256/8;
-		    break;
+     {
+	NEXTCHAR(ch);
+	if (code[ch/8] & (1<<(ch & 7)))
+	{
+	   code += 256/8;
+	   goto continue_matching;
+	}
+	goto fail;
+     }
     case Cexact:
+     {
+	NEXTCHAR(ch);
+	if (ch != (unsigned char)*code++)
+	   goto fail;
+/* 	{ */
+/* 	   char *p1 = code - 2; */
+/* 	   ch = *(code - 1); */
+/* 	   POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/* 	   while ((code == p1) && (*text != ch)) */
+/* 	      POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/* 	   if ((code == p1) && (*text == ch)) */
+/* 	   { */
+/* 	      code += 2; */
+/* 	      text++; */
+/* 	   } */
+/* 	} */
+	goto continue_matching;
+     }
+     case Canychar:
+     {
+	NEXTCHAR(ch);
+	if (ch == '\n')
+	   goto fail;
+	goto continue_matching;
+     }
     case Cstart_memory:
+     {
+	reg = *code++;
+	SET_REG_START(state, reg, text, goto error);
+	goto continue_matching;
+     }
     case Cend_memory:
-		  case Cmatch_memory:
-		  case Csyntaxspec:
-		  case Cnotsyntaxspec:
-		    p1++;
-		    break;
-		  case Cjump:
-		  case Cstar_jump:
-		  case Cfailure_jump:
-		  case Cupdate_failure_jump:
-		  case Cdummy_failure_jump:
-		    goto make_normal_jump;
-		  default:
-		    printf("regexpr.c: processing star_jump: unknown op %d\n", p1[-1]);
-		    break;
+     {
+	reg = *code++;
+	SET_REG_END(state, reg, text, goto error);
+	goto continue_matching;
     }
+     case Cmatch_memory:
+     {
+	reg = *code++;
+	regstart = GET_REG_START(state, reg);
+	regend = GET_REG_END(state, reg);
+	if ((regstart == NULL) || (regend == NULL))
+	   goto fail;  /* or should we just match nothing? */
+	regsize = regend - regstart;
+
+	if (regsize > (textend - text))
+	   goto fail;
+	if(translate)
+	{
+	   for (; regstart < regend; regstart++, text++)
+	      if (translate[*regstart] != translate[*text])
+		 goto fail;
 	}
-	    goto make_update_jump;
+	else
+	   for (; regstart < regend; regstart++, text++)
+	      if (*regstart != *text)
+		 goto fail;
+/*	if (memcmp(text, regstart, regsize) != 0)
+	   goto fail;
+	text += regsize; */
+	goto continue_matching;
     }
-	make_normal_jump:
-	  /* printf("changing to normal jump\n"); */
-	  code -= 3;
-	  *code = Cjump;
-	  break;
-	make_update_jump:
-	  /* printf("changing to update jump\n"); */
-	  code -= 2;
-	  a += 3;  /* jump to after the Cfailure_jump */
-	  code[-1] = Cupdate_failure_jump;
-	  code[0] = a & 0xff;
-	  code[1] = a >> 8;
-	  /* fall to next case */
     case Cupdate_failure_jump:
-	  failure_sp[-1].text = text;
-	  failure_sp[-1].partend = partend;
+     {
+	UPDATE_FAILURE(state, text, goto error);
 	/* fall to next case */
+     }
+     /* treat Cstar_jump just like Cjump if it hasn't been optimized */
+     case Cstar_jump:
     case Cjump:
+     {
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
 	code += (int)(short)a;
-	  break;
+	goto continue_matching;
+     }
     case Cdummy_failure_jump:
-	case Cfailure_jump:
-	  if (failure_sp == failure_stack_end)
-	    {
-	      if (failure_stack_start != initial_failure_stack)
-		goto error;
-	      failure_stack_start = (struct failure_point *)
-		malloc(MAX_FAILURES * sizeof(*failure_stack_start));
-	      if (failure_stack_start == NULL)
     {
-		  failure_stack_start = initial_failure_stack;
-		  goto error;
-		}
-	      failure_stack_end = failure_stack_start + MAX_FAILURES;
-	      memcpy((char *)failure_stack_start, (char *)initial_failure_stack,
-		     INITIAL_FAILURES * sizeof(*failure_stack_start));
-	      failure_sp = failure_stack_start + INITIAL_FAILURES;
-	    }
 	a = (unsigned char)*code++;
 	a |= (unsigned char)*code++ << 8;
 	a = (int)(short)a;
-	  if (code[-3] == Cdummy_failure_jump)
-	    { /* this is only used in plus */
 	assert(*code == Cfailure_jump);
 	b = (unsigned char)code[1];
 	b |= (unsigned char)code[2] << 8;
-	      failure_sp->code = code + (int)(short)b + 3;
-	      failure_sp->text = NULL;
+	PUSH_FAILURE(state, code + (int)(short)b + 3, NULL, goto error);
 	code += a;
+	goto continue_matching;
     }
-	  else
+     case Cfailure_jump:
     {
-	      failure_sp->code = code + a;
-	      failure_sp->text = text;
-	      failure_sp->partend = partend;
+	a = (unsigned char)*code++;
+	a |= (unsigned char)*code++ << 8;
+	a = (int)(short)a;
+	PUSH_FAILURE(state, code + a, text, goto error);
+	goto continue_matching;
     }
-	  failure_sp++;
-	  break;
     case Cbegbuf:
-	  if (text == string1)
-	    break;
+     {
+	if (text == textstart)
+	   goto continue_matching;
 	goto fail;
+     }
     case Cendbuf:
-	  if (size2 == 0 ? text == string1 + size1 : text == string2 + size2)
-	    break;
+     {
+	if (text == textend)
+	   goto continue_matching;
 	goto fail;
+     }
     case Cwordbeg:
-	  if (text == string2 + size2)
-	    goto fail;
-	  if (size2 == 0 && text == string1 + size1)
+     {
+	if (text == textend)
 	   goto fail;
-	  if (SYNTAX(text == string1 + size1 ? *string1 : *text) != Sword)
+	if (SYNTAX(*text) != Sword)
 	   goto fail;
-	  if (text == string1)
-	    break;
+	if (text == textstart)
+	   goto continue_matching;
 	if (SYNTAX(text[-1]) != Sword)
-	    break;
+	   goto continue_matching;
 	goto fail;
+     }
     case Cwordend:
-	  if (text == string1)
+     {
+	if (text == textstart)
 	   goto fail;
 	if (SYNTAX(text[-1]) != Sword)
 	   goto fail;
-	  if (text == string2 + size2)
-	    break;
-	  if (size2 == 0 && text == string1 + size1)
-	    break;
+	if (text == textend)
+	   goto continue_matching;
 	if (SYNTAX(*text) == Sword)
 	   goto fail;
-	  break;
+	goto continue_matching;
+     }
     case Cwordbound:
+     {
 	/* Note: as in gnu regexp, this also matches at the beginning
-	     and end of buffer. */
-	  if (text == string1 || text == string2 + size2 ||
-	      (size2 == 0 && text == string1 + size1))
-	    break;
-	  if ((SYNTAX(text[-1]) == Sword) ^
-	      (SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword))
-	    break;
+	 * and end of buffer.  */
+
+	if (text == textstart || text == textend)
+	   goto continue_matching;
+	if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+	   goto continue_matching;
 	goto fail;
+     }
     case Cnotwordbound:
+     {
 	/* Note: as in gnu regexp, this never matches at the beginning
-	     and end of buffer. */
-	  if (text == string1 || text == string2 + size2 ||
-	      (size2 == 0 && text == string1 + size1))
+	 * and end of buffer.  */
+	if (text == textstart || text == textend)
 	   goto fail;
-	  if (!((SYNTAX(text[-1]) == Sword) ^
-		(SYNTAX(text == string1 + size1 ? *string2 : *text) == Sword)))
+	if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
 	   goto fail;
-	  break;
+	goto continue_matching;
+     }
     case Csyntaxspec:
+     {
 	NEXTCHAR(ch);
 	if (SYNTAX(ch) != (unsigned char)*code++)
 	   goto fail;
-	  break;
+	   goto continue_matching;
+     }
     case Cnotsyntaxspec:
+     {
 	NEXTCHAR(ch);
 	if (SYNTAX(ch) != (unsigned char)*code++)
 	   break;
-	  goto fail;
-#ifdef emacs
-	case Cemacs_at_dot:
-	  if (PTR_CHAR_POS((unsigned char *)text) + 1 != point)
-	    goto fail;
-	  break;
-#endif /* emacs */
+	goto continue_matching;
+     }
     default:
+     {
 	abort();
 	/*NOTREACHED*/
     }
  }
+
 #if 0 /* This line is never reached --Guido */
  abort();
 #endif
-  /*NOTREACHED*/
+  /*
+   *NOTREACHED
+   */
  
  fail:
-  if (failure_sp != failure_stack_start)
-    {
-      failure_sp--;
-      text = failure_sp->text;
-      if (text == NULL)
-	goto fail;
-      partend = failure_sp->partend;
-      code = failure_sp->code;
+  POP_FAILURE(state, code, text, goto done_matching, goto error);
  goto continue_matching;
-    }
-  if (failure_stack_start != initial_failure_stack)
-    free((char *)failure_stack_start);
+  
+  done_matching:
+/*   if(translated != NULL) */
+/*      free(translated); */
+  FREE_STATE(state);
  return -1;

  error:
-  if (failure_stack_start != initial_failure_stack)
-    free((char *)failure_stack_start);
+/*   if (translated != NULL) */
+/*      free(translated); */
+  FREE_STATE(state);
  return -2;
 }

 #undef PREFETCH
 #undef NEXTCHAR
-#undef PUSH_FAILURE

-int re_match(bufp, string, size, pos, regs)
-regexp_t bufp;
-char *string;
-int size, pos;
-regexp_registers_t regs;
+int re_search(regexp_t bufp,
+	      char *string,
+	      int size,
+	      int pos,
+	      int range,
+	      regexp_registers_t regs)
 {
-  return re_match_2(bufp, string, size, (char *)NULL, 0, pos, regs, size);
-}
-
-int re_search_2(bufp, string1, size1, string2, size2, pos, range, regs,
-		mstop)
-regexp_t bufp;
-char *string1, *string2;
-int size1, size2, pos, range, mstop;
-regexp_registers_t regs;
-{
-  char *fastmap, *translate, *text, *partstart, *partend;
-  int dir, ret;
+  char *fastmap;
+  char *translate;
+  char *text;
+  char *partstart;
+  char *partend;
+  int dir;
+  int ret;
  char anchor;
  
-  assert(size1 >= 0 && size2 >= 0 && pos >= 0 && mstop >= 0);
-  assert(pos + range >= 0 && pos + range <= size1 + size2); /* Bugfix by ylo */
-  assert(pos <= mstop);
+  assert(size >= 0 && pos >= 0);
+  assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
  
  fastmap = bufp->fastmap;
  translate = bufp->translate;
@@ -1417,6 +1858,7 @@ regexp_registers_t regs;
  anchor = bufp->anchor;
  if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
     fastmap = NULL;
+
  if (range < 0)
  {
     dir = -1;
@@ -1424,59 +1866,39 @@ regexp_registers_t regs;
  }
  else
     dir = 1;
+
  if (anchor == 2)
     if (pos != 0)
 	return -1;
     else
 	range = 0;
+
  for (; range >= 0; range--, pos += dir)
  {
     if (fastmap)
     {
 	if (dir == 1)
 	{ /* searching forwards */
-	      if (pos < size1)
-		{
-		  text = string1 + pos;
-		  if (pos + range > size1)
-		    partend = string1 + size1;
-		  else
-		    partend = string1 + pos + range;
-		}
-	      else
-		{
-		  text = string2 + pos - size1;
-		  partend = string2 + pos + range - size1;
-		}
+
+	   text = string + pos;
+	   partend = string + size;
 	   partstart = text;
 	   if (translate)
 	      while (text != partend &&
-		       !fastmap[(unsigned char)
-				translate[(unsigned char)*text]])
+		     !fastmap[(unsigned char) translate[(unsigned char)*text]])
 		 text++;
 	   else
 	      while (text != partend && !fastmap[(unsigned char)*text])
 		 text++;
 	   pos += text - partstart;
 	   range -= text - partstart;
-	      if (pos == size1 + size2 && bufp->can_be_null == 0)
+	   if (pos == size && bufp->can_be_null == 0)
 	      return -1;
 	}
 	else
 	{ /* searching backwards */
-	      if (pos <= size1)
-		{
-		  text = string1 + pos;
-		  partstart = string1 + pos - range;
-		}
-	      else
-		{
-		  text = string2 + pos - size1;
-		  if (range < pos - size1)
-		    partstart = string2 + pos - size1 - range;
-		  else
-		    partstart = string2;
-		}
+	   text = string + pos;
+	   partstart = string + pos - range;
 	   partend = text;
 	   if (translate)
 	      while (text != partstart &&
@@ -1493,13 +1915,11 @@ regexp_registers_t regs;
     }
     if (anchor == 1)
     { /* anchored to begline */
-	  if (pos > 0 &&
-	      (pos <= size1 ? string1[pos - 1] :
-	       string2[pos - size1 - 1]) != '\n')
+	if (pos > 0 && string[pos - 1])
 	   continue;
     }
-      assert(pos >= 0 && pos <= size1 + size2);
-      ret = re_match_2(bufp, string1, size1, string2, size2, pos, regs, mstop);
+     assert(pos >= 0 && pos <= size);
+     ret = re_match(bufp, string, size, pos, regs);
     if (ret >= 0)
 	return pos;
     if (ret == -2)
@@ -1507,198 +1927,3 @@ regexp_registers_t regs;
  }
  return -1;
 }
-
-int re_search(bufp, string, size, startpos, range, regs)
-regexp_t bufp;
-char *string;
-int size, startpos, range;
-regexp_registers_t regs;
-{
-  return re_search_2(bufp, string, size, (char *)NULL, 0,
-		     startpos, range, regs, size);
-}
-
-#ifdef UNUSED
-
-static struct re_pattern_buffer re_comp_buf;
-
-char *re_comp(s)
-char *s;
-{
-  if (s == NULL)
-    {
-      if (!re_comp_buf.buffer)
-	return "Out of memory";
-      return NULL;
-    }
-  if (!re_comp_buf.buffer)
-    {
-      /* the buffer will be allocated automatically */
-      re_comp_buf.fastmap = malloc(256);
-      re_comp_buf.translate = NULL;
-      if (re_comp_buf.fastmap == NULL)
-	return "Out of memory";
-    }
-  return re_compile_pattern(s, strlen(s), &re_comp_buf);
-}
-
-int re_exec(s)
-char *s;
-{
-  int len = strlen(s);
-  
-  return re_search(&re_comp_buf, s, len, 0, len, (regexp_registers_t)NULL) >= 0;
-}
-
-#endif
-
-#ifdef TEST_REGEXP
-
-int main()
-{
-  char buf[500];
-  char *cp;
-  struct re_pattern_buffer exp;
-  struct re_registers regs;
-  int a,pos;
-  char fastmap[256];
-
-  exp.allocated = 0;
-  exp.buffer = 0;
-  exp.translate = NULL;
-  exp.fastmap = fastmap;
-
-  /* re_set_syntax(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_ANSI_HEX); */
-
-  while (1)
-    {
-      printf("Enter regexp:\n");
-      gets(buf);
-      cp=re_compile_pattern(buf, strlen(buf), &exp);
-      if (cp)
-	{
-	  printf("Error: %s\n", cp);
-	  continue;
-	}
-      re_compile_fastmap(&exp);
-      printf("dump:\n");
-      for (pos = 0; pos < exp.used;)
-	{
-	  printf("%d: ", pos);
-	  switch (exp.buffer[pos++])
-	    {
-	    case Cend:
-	      strcpy(buf, "end");
-	      break;
-	    case Cbol:
-	      strcpy(buf, "bol");
-	      break;
-	    case Ceol:
-	      strcpy(buf, "eol");
-	      break;
-	    case Cset:
-	      strcpy(buf, "set ");
-	      for (a = 0; a < 256/8; a++)
-		sprintf(buf+strlen(buf)," %02x",
-			(unsigned char)exp.buffer[pos++]);
-	      break;
-	    case Cexact:
-	      sprintf(buf, "exact '%c' 0x%x", exp.buffer[pos],
-		      (unsigned char)exp.buffer[pos]);
-	      pos++;
-	      break;
-	    case Canychar:
-	      strcpy(buf, "anychar");
-	      break;
-	    case Cstart_memory:
-	      sprintf(buf, "start_memory %d", exp.buffer[pos++]);
-	      break;
-	    case Cend_memory:
-	      sprintf(buf, "end_memory %d", exp.buffer[pos++]);
-	      break;
-	    case Cmatch_memory:
-	      sprintf(buf, "match_memory %d", exp.buffer[pos++]);
-	      break;
-	    case Cjump:
-	    case Cdummy_failure_jump:
-	    case Cstar_jump:
-	    case Cfailure_jump:
-	    case Cupdate_failure_jump:
-	      a = (unsigned char)exp.buffer[pos++];
-	      a += (unsigned char)exp.buffer[pos++] << 8;
-	      a = (int)(short)a;
-	      switch (exp.buffer[pos-3])
-		{
-		case Cjump:
-		  cp = "jump";
-		  break;
-		case Cstar_jump:
-		  cp = "star_jump";
-		  break;
-		case Cfailure_jump:
-		  cp = "failure_jump";
-		  break;
-		case Cupdate_failure_jump:
-		  cp = "update_failure_jump";
-		  break;
-		case Cdummy_failure_jump:
-		  cp = "dummy_failure_jump";
-		  break;
-		default:
-		  cp = "unknown jump";
-		  break;
-		}
-	      sprintf(buf, "%s %d", cp, a + pos);
-	      break;
-	    case Cbegbuf:
-	      strcpy(buf,"begbuf");
-	      break;
-	    case Cendbuf:
-	      strcpy(buf,"endbuf");
-	      break;
-	    case Cwordbeg:
-	      strcpy(buf,"wordbeg");
-	      break;
-	    case Cwordend:
-	      strcpy(buf,"wordend");
-	      break;
-	    case Cwordbound:
-	      strcpy(buf,"wordbound");
-	      break;
-	    case Cnotwordbound:
-	      strcpy(buf,"notwordbound");
-	      break;
-	    default:
-	      sprintf(buf, "unknown code %d",
-		      (unsigned char)exp.buffer[pos - 1]);
-	      break;
-	    }
-	  printf("%s\n", buf);
-	}
-      printf("can_be_null = %d uses_registers = %d anchor = %d\n",
-	     exp.can_be_null, exp.uses_registers, exp.anchor);
-      
-      printf("fastmap:");
-      for (a = 0; a < 256; a++)
-	if (exp.fastmap[a])
-	  printf(" %d", a);
-      printf("\n");
-      printf("Enter strings.  An empty line terminates.\n");
-      while (fgets(buf, sizeof(buf), stdin))
-	{
-	  if (buf[0] == '\n')
-	    break;
-	  a = re_search(&exp, buf, strlen(buf), 0, strlen(buf), &regs);
-	  printf("search returns %d\n", a);
-	  if (a != -1)
-	    {
-	      for (a = 0; a < RE_NREGS; a++)
-		{
-		  printf("buf %d: %d to %d\n", a, regs.start[a], regs.end[a]);
-		}
-	    }
-	}
-    }
-}
-
-#endif /* TEST_REGEXP */
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@@ -69,9 +69,7 @@ typedef struct re_registers
 #define re_set_syntax _Py_re_set_syntax
 #define re_compile_pattern _Py_re_compile_pattern
 #define re_match _Py_re_match
-#define re_match_2 _Py_re_match_2
 #define re_search _Py_re_search
-#define re_search_2 _Py_re_search_2
 #define re_compile_fastmap _Py_re_compile_fastmap
 #define re_comp _Py_re_comp
 #define re_exec _Py_re_exec
@@ -96,20 +94,12 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
   translation table, or NULL if it is not used. */

 int re_match(regexp_t compiled, char *string, int size, int pos,
-	     regexp_registers_t regs);
+	     regexp_registers_t old_regs);
 /* This tries to match the regexp against the string.  This returns the
   length of the matched portion, or -1 if the pattern could not be
   matched and -2 if an error (such as failure stack overflow) is
   encountered. */

-int re_match_2(regexp_t compiled, char *string1, int size1,
-	      char *string2, int size2, int pos, regexp_registers_t regs,
-	       int mstop);
-/* This tries to match the regexp to the concatenation of string1 and
-   string2.  This returns the length of the matched portion, or -1 if the
-   pattern could not be matched and -2 if an error (such as failure stack
-   overflow) is encountered. */
-
 int re_search(regexp_t compiled, char *string, int size, int startpos,
 	      int range, regexp_registers_t regs);
 /* This rearches for a substring matching the regexp.  This returns the first
@@ -119,12 +109,6 @@ int re_search(regexp_t compiled, char *string, int size, int startpos,
   which a match must not go.  This returns -1 if no match is found, and
   -2 if an error (such as failure stack overflow) is encountered. */

-int re_search_2(regexp_t compiled, char *string1, int size1,
-		char *string2, int size2, int startpos, int range,
-		regexp_registers_t regs, int mstop);
-/* This is like re_search, but search from the concatenation of string1 and
-   string2.  */
-
 void re_compile_fastmap(regexp_t compiled);
 /* This computes the fastmap for the regexp.  For this to have any effect,
   the calling program must have initialized the fastmap field to point
@@ -146,9 +130,7 @@ extern int re_syntax;
 int re_set_syntax();
 char *re_compile_pattern();
 int re_match();
-int re_match_2();
 int re_search();
-int re_search_2();
 void re_compile_fastmap();
 char *re_comp();
 int re_exec();