Remove pcre module

5ededdd6 · Andrew M. Kuchling · f7e836e6 · f7e836e6 · f7e836e6 · f7e836e6
Commit 5ededdd6 authored Jun 02, 2004 by Andrew M. Kuchling
Showing with 0 additions and 5771 deletions

Modules/pcre-int.h Modules/pcre-int.h +0 -295

Modules/pcre.h Modules/pcre.h +0 -84

Modules/pcremodule.c Modules/pcremodule.c +0 -636

Modules/pypcre.c Modules/pypcre.c +0 -4756

No files found.
--- a/Modules/pcre-int.h
+++ b/Modules/pcre-int.h
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-
-#define PCRE_VERSION       "1.09 28-Apr-1998"
-
-
-/* This is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language. See
-the file Tech.Notes for some information on the internals.
-
-Written by: Philip Hazel <ph10@cam.ac.uk>
-
-           Copyright (c) 1998 University of Cambridge
-
-----------------------------------------------------------------------------
-Permission is granted to anyone to use this software for any purpose on any
-computer system, and to redistribute it freely, subject to the following
-restrictions:
-
-1. This software is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-2. The origin of this software must not be misrepresented, either by
-   explicit claim or by omission.
-
-3. Altered versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-----------------------------------------------------------------------------
-*/
-
-/* This header contains definitions that are shared between the different
-modules, but which are not relevant to the outside. */
-
-
-/* Standard C headers plus the external interface definition */
-
-#include <ctype.h>
-#include <limits.h>
-#include <setjmp.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pcre.h"
-
-/* In case there is no definition of offsetof() provided - though any proper
-Standard C system should have one. */
-
-#ifndef offsetof
-#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
-#endif
-
-/* Private options flags start at the most significant end of the two bytes.
-The public options defined in pcre.h start at the least significant end. Make
-sure they don't overlap! */
-
-#define PCRE_FIRSTSET           0x8000  /* first_char is set */
-#define PCRE_STARTLINE          0x4000  /* start after \n for multiline */
-#define PCRE_COMPILED_CASELESS  0x2000  /* like it says */
-
-/* Options for the "extra" block produced by pcre_study(). */
-
-#define PCRE_STUDY_CASELESS 0x01     /* study was caseless */
-#define PCRE_STUDY_MAPPED   0x02     /* a map of starting chars exists */
-
-/* Masks for identifying the public options: all permitted at compile time,
-only some permitted at run or study time. */
-
-#ifdef FOR_PYTHON
-#define PUBLIC_OPTIONS \
-  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY| \
-   PCRE_LOCALE)
-#else
-#define PUBLIC_OPTIONS \
-  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY)
-#endif
-#define PUBLIC_EXEC_OPTIONS \
-  (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY)
-
-#define PUBLIC_STUDY_OPTIONS (PCRE_CASELESS)
-
-/* Magic number to provide a small check against being handed junk. */
-
-#define MAGIC_NUMBER  0x50435245   /* 'PCRE' */
-
-/* Miscellaneous definitions */
-
-typedef int BOOL;
-
-#define FALSE   0
-#define TRUE    1
-
-/* These are escaped items that aren't just an encoding of a particular data
-value such as \n. They must have non-zero values, as check_escape() returns
-their negation. Also, they must appear in the same order as in the opcode
-definitions below, up to ESC_Z. The final one must be ESC_REF as subsequent
-values are used for \1, \2, \3, etc. There is a test in the code for an escape
-greater than ESC_b and less than ESC_X to detect the types that may be
-repeated. If any new escapes are put in-between that don't consume a character,
-that code will have to change. */
-
-enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w,
-
-                    /* These are not Perl escapes, so can't appear in the */
-       ESC_X,       /* simple table-lookup because they must be conditional */
-                    /* on PCRE_EXTRA. */
-       ESC_Z,
-       ESC_REF };
-
-/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
-that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
-OP_EOD must correspond in order to the list of escapes immediately above. */
-
-enum {
-  OP_END,            /* End of pattern */
-
-  /* Values corresponding to backslashed metacharacters */
-
-  OP_SOD,            /* Start of data: \A */
-  OP_NOT_WORD_BOUNDARY,  /* \B */
-  OP_WORD_BOUNDARY,      /* \b */
-  OP_NOT_DIGIT,          /* \D */
-  OP_DIGIT,              /* \d */
-  OP_NOT_WHITESPACE,     /* \S */
-  OP_WHITESPACE,         /* \s */
-  OP_NOT_WORDCHAR,       /* \W */
-  OP_WORDCHAR,           /* \w */
-  OP_CUT,            /* The analogue of Prolog's "cut" operation (extension) */
-  OP_EOD,            /* End of data: \Z. */
-
-  OP_NOT_WORD_BOUNDARY_L,  /* localized \B */
-  OP_WORD_BOUNDARY_L,      /* localized \b */
-  OP_NOT_WORDCHAR_L,       /* localized \W */
-  OP_WORDCHAR_L,           /* localized \w */
-
-  OP_CIRC,           /* Start of line - varies with multiline switch */
-  OP_DOLL,           /* End of line - varies with multiline switch */
-  OP_ANY,            /* Match any character */
-  OP_CHARS,          /* Match string of characters */
-  OP_NOT,            /* Match anything but the following char */
-
-  OP_STAR,           /* The maximizing and minimizing versions of */
-  OP_MINSTAR,        /* all these opcodes must come in pairs, with */
-  OP_PLUS,           /* the minimizing one second. */
-  OP_MINPLUS,        /* This first set applies to single characters */
-  OP_QUERY,
-  OP_MINQUERY,
-  OP_UPTO,           /* From 0 to n matches */
-  OP_MINUPTO,
-  OP_EXACT,          /* Exactly n matches */
-
-  OP_NOTSTAR,        /* The maximizing and minimizing versions of */
-  OP_NOTMINSTAR,     /* all these opcodes must come in pairs, with */
-  OP_NOTPLUS,        /* the minimizing one second. */
-  OP_NOTMINPLUS,     /* This first set applies to "not" single characters */
-  OP_NOTQUERY,
-  OP_NOTMINQUERY,
-  OP_NOTUPTO,        /* From 0 to n matches */
-  OP_NOTMINUPTO,
-  OP_NOTEXACT,       /* Exactly n matches */
-
-  OP_TYPESTAR,       /* The maximizing and minimizing versions of */
-  OP_TYPEMINSTAR,    /* all these opcodes must come in pairs, with */
-  OP_TYPEPLUS,       /* the minimizing one second. These codes must */
-  OP_TYPEMINPLUS,    /* be in exactly the same order as those above. */
-  OP_TYPEQUERY,      /* This set applies to character types such as \d */
-  OP_TYPEMINQUERY,
-  OP_TYPEUPTO,       /* From 0 to n matches */
-  OP_TYPEMINUPTO,
-  OP_TYPEEXACT,      /* Exactly n matches */
-
-  OP_CRSTAR,         /* The maximizing and minimizing versions of */
-  OP_CRMINSTAR,      /* all these opcodes must come in pairs, with */
-  OP_CRPLUS,         /* the minimizing one second. These codes must */
-  OP_CRMINPLUS,      /* be in exactly the same order as those above. */
-  OP_CRQUERY,        /* These are for character classes and back refs */
-  OP_CRMINQUERY,
-  OP_CRRANGE,        /* These are different to the three seta above. */
-  OP_CRMINRANGE,
-
-  OP_CLASS,          /* Match a character class */
-  OP_NEGCLASS,       /* Match a character class, specified negatively */
-  OP_CLASS_L,        /* Match a character class */
-  OP_REF,            /* Match a back reference */
-
-  OP_ALT,            /* Start of alternation */
-  OP_KET,            /* End of group that doesn't have an unbounded repeat */
-  OP_KETRMAX,        /* These two must remain together and in this */
-  OP_KETRMIN,        /* order. They are for groups the repeat for ever. */
-
-  OP_ASSERT,
-  OP_ASSERT_NOT,
-  OP_ONCE,           /* Once matched, don't back up into the subpattern */
-
-  OP_BRAZERO,        /* These two must remain together and in this */
-  OP_BRAMINZERO,     /* order. */
-
-  OP_BRA             /* This and greater values are used for brackets that
-                        extract substrings. */
-};
-
-/* The highest extraction number. This is limited by the number of opcodes
-left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */
-
-#define EXTRACT_MAX  99
-
-/* The texts of compile-time error messages are defined as macros here so that
-they can be accessed by the POSIX wrapper and converted into error codes.  Yes,
-I could have used error codes in the first place, but didn't feel like changing
-just to accommodate the POSIX wrapper. */
-
-#define ERR1  "\\ at end of pattern"
-#define ERR2  "\\c at end of pattern"
-#define ERR3  "unrecognized character follows \\"
-#define ERR4  "numbers out of order in {} quantifier"
-#define ERR5  "number too big in {} quantifier"
-#define ERR6  "missing terminating ] for character class"
-#define ERR7  "invalid escape sequence in character class"
-#define ERR8  "range out of order in character class"
-#define ERR9  "nothing to repeat"
-#define ERR10 "operand of unlimited repeat could match the empty string"
-#define ERR11 "internal error: unexpected repeat"
-#define ERR12 "unrecognized character after (?"
-#define ERR13 "too many capturing parenthesized sub-patterns"
-#define ERR14 "missing )"
-#define ERR15 "back reference to non-existent subpattern"
-#define ERR16 "erroffset passed as NULL"
-#define ERR17 "unknown option bit(s) set"
-#define ERR18 "missing ) after comment"
-#define ERR19 "too many sets of parentheses"
-#define ERR20 "regular expression too large"
-#define ERR21 "failed to get memory"
-#define ERR22 "unmatched brackets"
-#define ERR23 "internal error: code overflow"
-
-/* All character handling must be done as unsigned characters. Otherwise there
-are problems with top-bit-set characters and functions such as isspace().
-However, we leave the interface to the outside world as char *, because that
-should make things easier for callers. We define a short type for unsigned char
-to save lots of typing. I tried "uchar", but it causes problems on Digital
-Unix, where it is defined in sys/types, so use "uschar" instead. */
-
-typedef unsigned char uschar;
-
-/* The real format of the start of the pcre block; the actual code vector
-runs on as long as necessary after the end. */
-
-typedef struct real_pcre {
-  unsigned int  magic_number;
-  unsigned short int options;
-  unsigned char top_bracket;
-  unsigned char top_backref;
-  unsigned char first_char;
-  unsigned char code[1];
-} real_pcre;
-
-/* The real format of the extra block returned by pcre_study(). */
-
-typedef struct real_pcre_extra {
-  unsigned char options;
-  unsigned char start_bits[32];
-} real_pcre_extra;
-
-/* Global tables from chartables.c */
-
-extern uschar pcre_lcc[];
-extern uschar pcre_fcc[];
-extern uschar pcre_cbits[];
-extern uschar pcre_ctypes[];
-
-/* Bit definitions for entries in pcre_ctypes[]. */
-
-#define ctype_space   0x01
-#define ctype_letter  0x02
-#define ctype_digit   0x04
-#define ctype_xdigit  0x08
-#define ctype_word    0x10   /* alphameric or '_' */
-#define ctype_odigit  0x20   /* octal digit */
-#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
-
-/* Offsets for the bitmap tables */
-
-#define cbit_digit    0
-#define cbit_letter  32
-#define cbit_word    64
-#define cbit_space   96
-#define cbit_length 128      /* Length of the cbits table */
-
-/* End of internal.h */
--- a/Modules/pcre.h
+++ b/Modules/pcre.h
-/*************************************************
-*       Perl-Compatible Regular Expressions      *
-*************************************************/
-
-/* Copyright (c) 1998 University of Cambridge */
-
-#ifndef _PCRE_H
-#define _PCRE_H
-
-#ifdef FOR_PYTHON
-#include "Python.h"
-#endif
-
-/* Have to include stdlib.h in order to ensure that size_t is defined;
-it is needed here for malloc. */
-
-#ifndef DONT_HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#include <stdlib.h>
-
-/* Allow for C++ users */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Options */
-
-#define PCRE_CASELESS        0x0001
-#define PCRE_EXTENDED        0x0002
-#define PCRE_ANCHORED        0x0004
-#define PCRE_MULTILINE       0x0008
-#define PCRE_DOTALL          0x0010
-#define PCRE_DOLLAR_ENDONLY  0x0020
-#define PCRE_EXTRA           0x0040
-#define PCRE_NOTBOL          0x0080
-#define PCRE_NOTEOL          0x0100
-#define PCRE_UNGREEDY        0x0400
-#ifdef FOR_PYTHON
-#define PCRE_LOCALE          0x0200
-#endif
-
-/* Exec-time error codes */
-
-#define PCRE_ERROR_NOMATCH        (-1)
-#define PCRE_ERROR_BADREF         (-2)
-#define PCRE_ERROR_NULL           (-3)
-#define PCRE_ERROR_BADOPTION      (-4)
-#define PCRE_ERROR_BADMAGIC       (-5)
-#define PCRE_ERROR_UNKNOWN_NODE   (-6)
-#define PCRE_ERROR_NOMEMORY       (-7)
-
-/* Types */
-
-typedef void pcre;
-typedef void pcre_extra;
-
-/* Store get and free functions. These can be set to alternative malloc/free
-functions if required. */
-
-extern void *(*pcre_malloc)(size_t);
-extern void  (*pcre_free)(void *);
-
-/* Functions */
-
-#ifdef FOR_PYTHON
-extern pcre *pcre_compile(const char *, int, const char **, int *, PyObject *);
-extern int pcre_exec(const pcre *, const pcre_extra *, const char *,
-  int, int, int, int *, int);
-#else
-extern pcre *pcre_compile(const char *, int, const char **, int *);
-extern int pcre_exec(const pcre *, const pcre_extra *, const char *,
-  int, int, int *, int);
-#endif
-extern int pcre_info(const pcre *, int *, int *);
-extern pcre_extra *pcre_study(const pcre *, int, const char **);
-extern const char *pcre_version(void);
-
-#ifdef __cplusplus
-}  /* extern "C" */
-#endif
-
-#endif /* End of pcre.h */
--- a/Modules/pcremodule.c
+++ b/Modules/pcremodule.c
-/* Pcre objects */
-
-#include "Python.h"
-
-#ifndef Py_eval_input
-/* For Python 1.4, graminit.h has to be explicitly included */
-#include "graminit.h"
-#define Py_eval_input eval_input
-#endif
-
-#ifndef FOR_PYTHON
-#define FOR_PYTHON
-#endif
-
-#include "pcre.h"
-#include "pcre-int.h"
-
-static PyObject *ErrorObject;
-
-typedef struct {
-	PyObject_HEAD
-	pcre *regex;
-	pcre_extra *regex_extra;
-        int num_groups;
-} PcreObject;
-
-static PyTypeObject Pcre_Type;
-
-#define PcreObject_Check(v)	((v)->ob_type == &Pcre_Type)
-#define NORMAL			0
-#define CHARCLASS		1
-#define REPLACEMENT		2
-
-#define CHAR 			0
-#define MEMORY_REFERENCE 	1
-#define SYNTAX 			2
-#define NOT_SYNTAX 		3
-#define SET			4
-#define WORD_BOUNDARY		5
-#define NOT_WORD_BOUNDARY	6
-#define BEGINNING_OF_BUFFER	7
-#define END_OF_BUFFER		8
-#define STRING                  9
-
-static PcreObject *
-newPcreObject(PyObject *args)
-{
-	PcreObject *self;
-	self = PyObject_New(PcreObject, &Pcre_Type);
-	if (self == NULL)
-		return NULL;
-	self->regex = NULL;
-	self->regex_extra = NULL;
-	return self;
-}
-
-/* Pcre methods */
-
-static void
-PyPcre_dealloc(PcreObject *self)
-{
-	if (self->regex) (pcre_free)(self->regex);
-	if (self->regex_extra) (pcre_free)(self->regex_extra);
-	PyObject_Del(self);
-}
-
-
-static PyObject *
-PyPcre_exec(PcreObject *self, PyObject *args)
-{
-        char *string;
-	int stringlen, pos = 0, options=0, endpos = -1, i, count;
-	int offsets[100*2]; 
-	PyObject *list;
-
-	if (!PyArg_ParseTuple(args, "t#|iii:match", &string, &stringlen, 
-                                     &pos, &endpos, &options))
-		return NULL;
-	if (endpos == -1) {endpos = stringlen;}
-	count = pcre_exec(self->regex, self->regex_extra, 
-			  string, endpos, pos, options,
-			  offsets, sizeof(offsets)/sizeof(int) );
-	/* If an error occurred during the match, and an exception was raised,
-	   just return NULL and leave the exception alone.  The most likely
-	   problem to cause this would be running out of memory for
-	   the failure stack. */
-	if (PyErr_Occurred())
-	{
-		return NULL;
-	}
-	if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
-	if (count<0)
-	{
-		PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
-		PyErr_SetObject(ErrorObject, errval);
-		Py_XDECREF(errval);
-		return NULL;
-	}
-	
-	list=PyList_New(self->num_groups+1);
-	if (list==NULL) return NULL;
-	for(i=0; i<=self->num_groups; i++)
-	{
-		PyObject *v;
-		int start=offsets[i*2], end=offsets[i*2+1];
-		/* If the group wasn't affected by the match, return -1, -1 */
-		if (start<0 || count<=i) 
-		{start=end=-1;}
-		v=Py_BuildValue("ii", start, end);
-		if (v==NULL) {Py_DECREF(list); return NULL;}
-		PyList_SetItem(list, i, v);
-	}
-	return list;
-}
-
-static PyMethodDef Pcre_methods[] = {
-	{"match",	(PyCFunction)PyPcre_exec,	METH_VARARGS},
-	{NULL,		NULL}		/* sentinel */
-};
-
-static PyObject *
-PyPcre_getattr(PcreObject *self, char *name)
-{
-	return Py_FindMethod(Pcre_methods, (PyObject *)self, name);
-}
-
-
-static PyTypeObject Pcre_Type = {
-	PyObject_HEAD_INIT(NULL)
-	0,			/*ob_size*/
-	"pcre.Pcre",		/*tp_name*/
-	sizeof(PcreObject),	/*tp_basicsize*/
-	0,			/*tp_itemsize*/
-	/* methods */
-	(destructor)PyPcre_dealloc, /*tp_dealloc*/
-	0,			/*tp_print*/
-	(getattrfunc)PyPcre_getattr, /*tp_getattr*/
-	0,                      /*tp_setattr*/
-	0,			/*tp_compare*/
-	0,			/*tp_repr*/
-	0,			/*tp_as_number*/
-	0,			/*tp_as_sequence*/
-	0,			/*tp_as_mapping*/
-	0,			/*tp_hash*/
-};
-/* --------------------------------------------------------------------- */
-
-static PyObject *
-PyPcre_compile(PyObject *self, PyObject *args)
-{
-	PcreObject *rv;
-	PyObject *dictionary;
-	char *pattern;
-	const char *error;
-	
-	int options, erroroffset;
-	if (!PyArg_ParseTuple(args, "siO!:pcre_compile", &pattern, &options,
-			      &PyDict_Type, &dictionary))
-		return NULL;
-	rv = newPcreObject(args);
-	if ( rv == NULL )
-		return NULL;
-
-	rv->regex = pcre_compile((char*)pattern, options, 
-				 &error, &erroroffset, dictionary);
-	if (rv->regex==NULL) 
-	{
-		Py_DECREF(rv);
-		if (!PyErr_Occurred())
-		{
-			PyObject *errval = Py_BuildValue("si", error, erroroffset);
-			PyErr_SetObject(ErrorObject, errval);
-			Py_XDECREF(errval);
-		}
-		return NULL;
-	}
-	rv->regex_extra=pcre_study(rv->regex, 0, &error);
-	if (rv->regex_extra==NULL && error!=NULL) 
-	{
-		PyObject *errval = Py_BuildValue("si", error, 0);
-		Py_DECREF(rv);
-		PyErr_SetObject(ErrorObject, errval);
-		Py_XDECREF(errval);
-		return NULL;
-	}
-        rv->num_groups = pcre_info(rv->regex, NULL, NULL);
-	if (rv->num_groups<0) 
-	{
-		PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
-		PyErr_SetObject(ErrorObject, errval);
-		Py_XDECREF(errval);
-		Py_DECREF(rv);
-		return NULL;
-	}
-	return (PyObject *)rv;
-}
-
-static PyObject *
-PyPcre_expand_escape(unsigned char *pattern, int pattern_len,
-                     int *indexptr, int *typeptr)
-{
-	unsigned char c;
-	int index = *indexptr;
-  
-	if (pattern_len<=index)
-	{
-		PyErr_SetString(ErrorObject, "escape ends too soon");
-		return NULL;
-	}
-	c=pattern[index]; index++;
-	*typeptr=CHAR;
-
-	switch (c)
-	{
-	case('t'):
-		*indexptr=index;
-		return Py_BuildValue("c", (char)9);
-	case('n'):
-		*indexptr = index;
-		return Py_BuildValue("c", (char)10);
-	case('v'):
-		*indexptr = index;
-		return Py_BuildValue("c", (char)11);
-	case('r'):
-		*indexptr = index;
-		return Py_BuildValue("c", (char)13);
-	case('f'):
-		*indexptr = index;
-		return Py_BuildValue("c", (char)12);
-	case('a'):
-		*indexptr = index;
-		return Py_BuildValue("c", (char)7);
-	case('b'):
-		*indexptr=index;
-		return Py_BuildValue("c", (char)8);
-	case('\\'):
-		*indexptr=index;
-		return Py_BuildValue("c", '\\');
-
-	case('x'):
-	{
-		int x, ch, end;
-
-		x = 0; end = index;
-		while ( (end<pattern_len && pcre_ctypes[ pattern[end] ] & ctype_xdigit) != 0)
-		{
-			ch = pattern[end];
-			x = x * 16 + pcre_lcc[ch] -
-				(((pcre_ctypes[ch] & ctype_digit) != 0)? '0' : 'W');
-			x &= 255;
-			end++;
-		}
-		if (end==index)
-		{
-			PyErr_SetString(ErrorObject, "\\x must be followed by hex digits");
-			return NULL;
-		}
-		*indexptr = end;
-		return Py_BuildValue("c", (char)x);
-	}
-
-	case('E'):    case('G'):    case('L'):    case('Q'):
-	case('U'):    case('l'):    case('u'):
-	{
-		char message[50];
-		PyOS_snprintf(message, sizeof(message),
-			      "\\%c is not allowed", c);
-		PyErr_SetString(ErrorObject, message);
-		return NULL;
-	}
-
-	case('g'):
-	{
-		int end, i;
-		int group_num = 0, is_number=0;
-
-		if (pattern_len<=index)
-		{
-			PyErr_SetString(ErrorObject, "unfinished symbolic reference");
-			return NULL;
-		}
-		if (pattern[index]!='<')
-		{
-			PyErr_SetString(ErrorObject, "missing < in symbolic reference");
-			return NULL;
-		}
-		index++;
-		end=index;
-		while (end<pattern_len && pattern[end]!='>')
-			end++;
-		if (end==pattern_len)
-		{
-			PyErr_SetString(ErrorObject, "unfinished symbolic reference");
-			return NULL;
-		}
-
-		if (index==end)		/* Zero-length name */
-		{
-			/* XXX should include the text of the reference */
-			PyErr_SetString(ErrorObject, "zero-length symbolic reference");
-			return NULL;
-		}
-		if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
-		{
-		        is_number = 1;
-			group_num = pattern[index] - '0';
-		}
-
-		for(i=index+1; i<end; i++)
-		{
-		        if (is_number && 
-			    !(pcre_ctypes[pattern[i]] & ctype_digit) )
-			{
-				/* XXX should include the text of the reference */
-				PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
-				return NULL;			       
-			}
-			else {group_num = group_num * 10 + pattern[i] - '0';}
-			if (!(pcre_ctypes[pattern[i]] & ctype_word) )
-			{
-				/* XXX should include the text of the reference */
-				PyErr_SetString(ErrorObject, "illegal symbolic reference");
-				return NULL;
-			}
-		}	
-	    
-		*typeptr = MEMORY_REFERENCE;
-		*indexptr = end+1;
-		/* If it's a number, return the integer value of the group */
-		if (is_number) return Py_BuildValue("i", group_num);
-		/* Otherwise, return a string containing the group name */
-		return Py_BuildValue("s#", pattern+index, end-index);
-	}
-
-	case('0'):
-	{
-		/* \0 always indicates an octal escape, so we consume up to 3
-		   characters, as long as they're all octal digits */
-		int octval=0, i;
-		index--;
-		for(i=index;
-		    i<=index+2 && i<pattern_len 
-			    && (pcre_ctypes[ pattern[i] ] & ctype_odigit );
-		    i++)
-		{
-			octval = octval * 8 + pattern[i] - '0';
-		}
-		if (octval>255)
-		{
-			PyErr_SetString(ErrorObject, "octal value out of range");
-			return NULL;
-		}
-		*indexptr = i;
-		return Py_BuildValue("c", (unsigned char)octval);
-	}
-
-	case('1'):    case('2'):    case('3'):    case('4'):
-	case('5'):    case('6'):    case('7'):    case('8'):
-	case('9'):
-	{
-		/* Handle \?, where ? is from 1 through 9 */
-		int value=0;
-		index--;
-		/* If it's at least a two-digit reference, like \34, it might
-		   either be a 3-digit octal escape (\123) or a 2-digit
-		   decimal memory reference (\34) */
-
-		if ( (index+1) <pattern_len && 
-		     (pcre_ctypes[ pattern[index+1] ] & ctype_digit) )
-		{
-			if ( (index+2) <pattern_len && 
-			     (pcre_ctypes[ pattern[index+2] ] & ctype_odigit) &&
-			     (pcre_ctypes[ pattern[index+1] ] & ctype_odigit) &&
-			     (pcre_ctypes[ pattern[index  ] ] & ctype_odigit)
-				)
-			{
-				/* 3 octal digits */
-				value= 8*8*(pattern[index  ]-'0') +
-					8*(pattern[index+1]-'0') +
-					(pattern[index+2]-'0');
-				if (value>255)
-				{
-					PyErr_SetString(ErrorObject, "octal value out of range");
-					return NULL;
-				}
-				*indexptr = index+3;
-				return Py_BuildValue("c", (unsigned char)value);
-			}
-			else
-			{
-				/* 2-digit form, so it's a memory reference */
-				value= 10*(pattern[index  ]-'0') +
-					(pattern[index+1]-'0');
-				if (value<1 || EXTRACT_MAX<=value)
-				{
-					PyErr_SetString(ErrorObject, "memory reference out of range");
-					return NULL;
-				}
-				*typeptr = MEMORY_REFERENCE;
-				*indexptr = index+2;
-				return Py_BuildValue("i", value);
-			}
-		}
-		else 
-		{
-			/* Single-digit form, like \2, so it's a memory reference */
-			*typeptr = MEMORY_REFERENCE;
-			*indexptr = index+1;
-			return Py_BuildValue("i", pattern[index]-'0');
-		}
-	}
-
-	default:
-	  /* It's some unknown escape like \s, so return a string containing
-	     \s */
-		*typeptr = STRING;
-		*indexptr = index;
-		return Py_BuildValue("s#", pattern+index-2, 2);
-	}
-}
-
-static PyObject *
-PyPcre_expand(PyObject *self, PyObject *args)
-{
-	PyObject *results, *match_obj;
-	PyObject *repl_obj, *newstring;
-	unsigned char *repl;
-	int size, total_len, i, start, pos;
-
-	if (!PyArg_ParseTuple(args, "OS:pcre_expand", &match_obj, &repl_obj)) 
-		return NULL;
-
-	repl=(unsigned char *)PyString_AsString(repl_obj);
-	size=PyString_Size(repl_obj);
-	results=PyList_New(0);
-	if (results==NULL) return NULL;
-	for(start=total_len=i=0; i<size; i++)
-	{
-		if (repl[i]=='\\')
-		{
-			PyObject *value;
-			int escape_type;
-
-			if (start!=i)
-			{
-				int status;
-				PyObject *s = PyString_FromStringAndSize(
-					(char *)repl+start, i-start);
-				if (s == NULL) {
-					Py_DECREF(results);
-					return NULL;
-				}
-				status = PyList_Append(results, s);
-				Py_DECREF(s);
-				if (status < 0) {
-					Py_DECREF(results);
-					return NULL;
-				}
-				total_len += i-start;
-			}
-			i++;
-			value=PyPcre_expand_escape(repl, size, &i, &escape_type);
-			if (value==NULL)
-			{
-				/* PyPcre_expand_escape triggered an exception of some sort,
-				   so just return */
-				Py_DECREF(results);
-				return NULL;
-			}
-			switch (escape_type)
-			{
-			case (CHAR):
-				PyList_Append(results, value);
-				total_len += PyString_Size(value);
-				break;
-			case(MEMORY_REFERENCE):
-			{
-				PyObject *r, *tuple, *result;
-				r=PyObject_GetAttrString(match_obj, "group");
-				if (r == NULL) {
-					Py_DECREF(results);
-					return NULL;
-				}
-				tuple=PyTuple_New(1);
-				Py_INCREF(value);
-				PyTuple_SetItem(tuple, 0, value);
-				result=PyEval_CallObject(r, tuple);
-				Py_DECREF(r); Py_DECREF(tuple);
-				if (result==NULL)
-				{
-					/* The group() method triggered an exception of some sort */
-					Py_DECREF(results);
-					Py_DECREF(value);
-					return NULL;
-				}
-				if (result==Py_None)
-				{
-					char message[50];
-					PyOS_snprintf(message, sizeof(message),
-						"group did not contribute to the match");
-					PyErr_SetString(ErrorObject, 
-							message);
-					Py_DECREF(result);
-					Py_DECREF(value);
-					Py_DECREF(results);
-					return NULL;
-				}
-				/* typecheck that it's a string! */
-				if (!PyString_Check(result))
-				{
-					Py_DECREF(results);
-					Py_DECREF(result);
-					PyErr_SetString(ErrorObject, 
-							"group() must return a string value for replacement");
-					return NULL;
-				}
-				PyList_Append(results, result);
-				total_len += PyString_Size(result);
-				Py_DECREF(result);
-			}
-			break;
-			case(STRING):
-			  {
-			    PyList_Append(results, value);
-			    total_len += PyString_Size(value);
-			    break;
-			  }
-			default:
-				Py_DECREF(results);
-				PyErr_SetString(ErrorObject, 
-						"bad escape in replacement");
-				return NULL;
-			}
-			Py_DECREF(value);
-			start=i;
-			i--; /* Decrement now, because the 'for' loop will increment it */
-		}
-	} /* endif repl[i]!='\\' */
-
-	if (start!=i)
-	{
-		int status;
-		PyObject *s = PyString_FromStringAndSize((char *)repl+start, 
-							 i-start);
-		if (s == NULL) {
-			Py_DECREF(results);
-			return NULL;
-		}
-		status = PyList_Append(results, s);
-		Py_DECREF(s);
-		if (status < 0) {
-			Py_DECREF(results);
-			return NULL;
-		}
-		total_len += i-start;
-	}
-
-	/* Whew!  Now we've constructed a list containing various pieces of
-	   strings that will make up our final result.  So, iterate over 
-	   the list concatenating them.  A new string measuring total_len
-	   bytes is allocated and filled in. */
-     
-	newstring=PyString_FromStringAndSize(NULL, total_len);
-	if (newstring==NULL)
-	{
-		Py_DECREF(results);
-		return NULL;
-	}
-
-	repl=(unsigned char *)PyString_AsString(newstring);
-	for (pos=i=0; i<PyList_Size(results); i++)
-	{
-		PyObject *item=PyList_GetItem(results, i);
-		memcpy(repl+pos, PyString_AsString(item), PyString_Size(item) );
-		pos += PyString_Size(item);
-	}
-	Py_DECREF(results);
-	return newstring;
-}
-
-
-/* List of functions defined in the module */
-
-static PyMethodDef pcre_methods[] = {
-	{"pcre_compile",		PyPcre_compile,		METH_VARARGS},
-	{"pcre_expand",		PyPcre_expand,		METH_VARARGS},
-	{NULL,		NULL}		/* sentinel */
-};
-
-
-/*
- * Convenience routine to export an integer value.
- * For simplicity, errors (which are unlikely anyway) are ignored.
- */
-
-static void
-insint(PyObject *d, char *name, int value)
-{
-	PyObject *v = PyInt_FromLong((long) value);
-	if (v == NULL) {
-		/* Don't bother reporting this error */
-		PyErr_Clear();
-	}
-	else {
-		PyDict_SetItemString(d, name, v);
-		Py_DECREF(v);
-	}
-}
-
-
-/* Initialization function for the module (*must* be called initpcre) */
-
-PyMODINIT_FUNC
-initpcre(void)
-{
-	PyObject *m, *d;
-
-        Pcre_Type.ob_type = &PyType_Type;
-
-	/* Create the module and add the functions */
-	m = Py_InitModule("pcre", pcre_methods);
-
-	/* Add some symbolic constants to the module */
-	d = PyModule_GetDict(m);
-	ErrorObject = PyErr_NewException("pcre.error", NULL, NULL);
-	PyDict_SetItemString(d, "error", ErrorObject);
-
-	/* Insert the flags */
-	insint(d, "IGNORECASE", PCRE_CASELESS);
-	insint(d, "ANCHORED", PCRE_ANCHORED);
-	insint(d, "MULTILINE", PCRE_MULTILINE);
-	insint(d, "DOTALL", PCRE_DOTALL);
-	insint(d, "VERBOSE", PCRE_EXTENDED);
-	insint(d, "LOCALE", PCRE_LOCALE);
-}
-
--- a/Modules/pypcre.c
+++ b/Modules/pypcre.c