Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
db25f328
Commit
db25f328
authored
Jul 10, 1997
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
New versions straight from Jeffrey Ollie's web site
parent
db9e20f4
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
700 additions
and
169 deletions
+700
-169
Modules/regexpr.c
Modules/regexpr.c
+267
-117
Modules/regexpr.h
Modules/regexpr.h
+58
-52
Modules/reopmodule.c
Modules/reopmodule.c
+375
-0
No files found.
Modules/regexpr.c
View file @
db25f328
This diff is collapsed.
Click to expand it.
Modules/regexpr.h
View file @
db25f328
/*
* -*- mode: c-mode; c-file-style: python -*-
*/
#ifndef Py_REGEXPR_H
#ifndef Py_REGEXPR_H
#define Py_REGEXPR_H
#define Py_REGEXPR_H
#ifdef __cplusplus
#ifdef __cplusplus
...
@@ -5,22 +9,22 @@ extern "C" {
...
@@ -5,22 +9,22 @@ extern "C" {
#endif
#endif
/*
/*
* regexpr.h
regexpr.h
*
* Author: Tatu Ylonen <ylo@ngs.fi>
Author: Tatu Ylonen <ylo@ngs.fi>
*
* Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
*
* Permission to use, copy, modify, distribute, and sell this software
Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies. This
provided that the above copyright notice appear in all copies. This
* software is provided "as is" without express or implied warranty.
software is provided "as is" without express or implied warranty.
*
* Created: Thu Sep 26 17:15:36 1991 ylo
Created: Thu Sep 26 17:15:3
6 1991 ylo
* Last modified: Mon Nov 4 15:49:4
6 1991 ylo
Last modified: Mon Nov 4 15:49:46 1991 ylo
*/
*/
/* $Id$
*/
#ifndef REGEXPR_H
#ifndef REGEXPR_H
#define REGEXPR_H
#define REGEXPR_H
...
@@ -37,6 +41,7 @@ typedef struct re_pattern_buffer
...
@@ -37,6 +41,7 @@ typedef struct re_pattern_buffer
char
fastmap_accurate
;
/* true if fastmap is valid */
char
fastmap_accurate
;
/* true if fastmap is valid */
char
can_be_null
;
/* true if can match empty string */
char
can_be_null
;
/* true if can match empty string */
char
uses_registers
;
/* registers are used and need to be initialized */
char
uses_registers
;
/* registers are used and need to be initialized */
int
num_registers
;
/* number of registers used */
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
}
*
regexp_t
;
}
*
regexp_t
;
...
@@ -77,52 +82,53 @@ typedef struct re_registers
...
@@ -77,52 +82,53 @@ typedef struct re_registers
#ifdef HAVE_PROTOTYPES
#ifdef HAVE_PROTOTYPES
extern
int
re_syntax
;
extern
int
re_syntax
;
/* This is the actual syntax mask. It was added so that Python
/* This is the actual syntax mask. It was added so that Python
could do
could do
syntax-dependent munging of patterns before compilation. */
*
syntax-dependent munging of patterns before compilation. */
int
re_set_syntax
(
int
syntax
);
int
re_set_syntax
(
int
syntax
);
/* This sets the syntax to use and returns the previous syntax. The
/* This sets the syntax to use and returns the previous syntax. The
syntax is specified by a bit mask of the above defined bits. */
*
syntax is specified by a bit mask of the above defined bits. */
char
*
re_compile_pattern
(
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
char
*
re_compile_pattern
(
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
/* This compiles the regexp (given in regex and length in regex_size).
/* This compiles the regexp (given in regex and length in regex_size).
This returns NULL if the regexp compiled successfully, and an error
*
This returns NULL if the regexp compiled successfully, and an error
message if an error was encountered. The buffer field must be
*
message if an error was encountered. The buffer field must be
initialized to a memory area allocated by malloc (or to NULL) before
*
initialized to a memory area allocated by malloc (or to NULL) before
use, and the allocated field must be set to its length (or 0 if buffer is
* use, and the allocated field must be set to its length (or 0 if
NULL). Also, the translate field must be set to point to a valid
* buffer is NULL). Also, the translate field must be set to point to a
translation table, or NULL if it is not used. */
* valid
translation table, or NULL if it is not used. */
int
re_match
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
pos
,
int
re_match
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
pos
,
regexp_registers_t
old_regs
);
regexp_registers_t
old_regs
);
/* This tries to match the regexp against the string. This returns the
/* This tries to match the regexp against the string. This returns the
length of the matched portion, or -1 if the pattern could not be
*
length of the matched portion, or -1 if the pattern could not be
matched and -2 if an error (such as failure stack overflow) is
*
matched and -2 if an error (such as failure stack overflow) is
encountered. */
*
encountered. */
int
re_search
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
startpos
,
int
re_search
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
startpos
,
int
range
,
regexp_registers_t
regs
);
int
range
,
regexp_registers_t
regs
);
/* This rearches for a substring matching the regexp. This returns the first
/* This rearches for a substring matching the regexp. This returns the
index at which a match is found. range specifies at how many positions to
* first index at which a match is found. range specifies at how many
try matching; positive values indicate searching forwards, and negative
* positions to try matching; positive values indicate searching
values indicate searching backwards. mstop specifies the offset beyond
* forwards, and negative values indicate searching backwards. mstop
which a match must not go. This returns -1 if no match is found, and
* specifies the offset beyond which a match must not go. This returns
-2 if an error (such as failure stack overflow) is encountered. */
* -1 if no match is found, and -2 if an error (such as failure stack
* overflow) is encountered. */
void
re_compile_fastmap
(
regexp_t
compiled
);
void
re_compile_fastmap
(
regexp_t
compiled
);
/* This computes the fastmap for the regexp. For this to have any effect,
/* This computes the fastmap for the regexp. For this to have any effect,
the calling program must have initialized the fastmap field to point
*
the calling program must have initialized the fastmap field to point
to an array of 256 characters. */
*
to an array of 256 characters. */
char
*
re_comp
(
char
*
s
);
char
*
re_comp
(
char
*
s
);
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
an internal buffer. This returns NULL if the regexp was compiled
*
an internal buffer. This returns NULL if the regexp was compiled
successfully, and an error message if there was an error. */
*
successfully, and an error message if there was an error. */
int
re_exec
(
char
*
s
);
int
re_exec
(
char
*
s
);
/* BSD 4.2 regexp library routine re_exec. This returns true if the
string
/* BSD 4.2 regexp library routine re_exec. This returns true if the
matches the regular expression (that is, a matching part is found
* string matches the regular expression (that is, a matching part is
anywhere in the string). */
* found
anywhere in the string). */
#else
/* HAVE_PROTOTYPES */
#else
/* HAVE_PROTOTYPES */
...
...
Modules/reopmodule.c
0 → 100644
View file @
db25f328
/***********************************************************
Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
The Netherlands.
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the names of Stichting Mathematisch
Centrum or CWI or Corporation for National Research Initiatives or
CNRI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
While CWI is the initial source for this software, a modified version
is made available by the Corporation for National Research Initiatives
(CNRI) at the Internet address ftp://ftp.python.org.
STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
******************************************************************/
/* $Id$ */
/* Regular expression objects */
/* This uses Tatu Ylonen's copyleft-free reimplementation of
GNU regular expressions */
#include "Python.h"
#include <ctype.h>
#include "regexpr.h"
static
PyObject
*
ReopError
;
/* Exception */
static
PyObject
*
makeresult
(
regs
,
num_regs
)
struct
re_registers
*
regs
;
int
num_regs
;
{
PyObject
*
v
;
int
i
;
static
PyObject
*
filler
=
NULL
;
if
(
filler
==
NULL
)
{
filler
=
Py_BuildValue
(
"(ii)"
,
-
1
,
-
1
);
if
(
filler
==
NULL
)
return
NULL
;
}
v
=
PyTuple_New
(
num_regs
);
if
(
v
==
NULL
)
return
NULL
;
for
(
i
=
0
;
i
<
num_regs
;
i
++
)
{
int
lo
=
regs
->
start
[
i
];
int
hi
=
regs
->
end
[
i
];
PyObject
*
w
;
if
(
lo
==
-
1
&&
hi
==
-
1
)
{
w
=
filler
;
Py_INCREF
(
w
);
}
else
w
=
Py_BuildValue
(
"(ii)"
,
lo
,
hi
);
if
(
w
==
NULL
||
PyTuple_SetItem
(
v
,
i
,
w
)
<
0
)
{
Py_DECREF
(
v
);
return
NULL
;
}
}
return
v
;
}
static
PyObject
*
reop_match
(
self
,
args
)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
num_regs
,
flags
,
pos
,
result
;
struct
re_pattern_buffer
bufp
;
struct
re_registers
re_regs
;
if
(
!
PyArg_Parse
(
args
,
"(s#iiis#is#i)"
,
&
(
bufp
.
buffer
),
&
(
bufp
.
allocated
),
&
num_regs
,
&
flags
,
&
can_be_null
,
&
(
bufp
.
fastmap
),
&
fastmaplen
,
&
anchor
,
&
string
,
&
stringlen
,
&
pos
))
return
NULL
;
/* XXX sanity-check the input data */
bufp
.
used
=
bufp
.
allocated
;
bufp
.
translate
=
NULL
;
bufp
.
fastmap_accurate
=
1
;
bufp
.
can_be_null
=
can_be_null
;
bufp
.
uses_registers
=
1
;
bufp
.
num_registers
=
num_regs
;
bufp
.
anchor
=
anchor
;
for
(
i
=
0
;
i
<
num_regs
;
i
++
)
{
re_regs
.
start
[
i
]
=-
1
;
re_regs
.
end
[
i
]
=-
1
;}
result
=
re_match
(
&
bufp
,
string
,
stringlen
,
pos
,
&
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
return
makeresult
(
&
re_regs
,
num_regs
);
}
static
PyObject
*
reop_search
(
self
,
args
)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
num_regs
,
flags
,
pos
,
result
;
struct
re_pattern_buffer
bufp
;
struct
re_registers
re_regs
;
if
(
!
PyArg_Parse
(
args
,
"(s#iiis#is#i)"
,
&
(
bufp
.
buffer
),
&
(
bufp
.
allocated
),
&
num_regs
,
&
flags
,
&
can_be_null
,
&
(
bufp
.
fastmap
),
&
fastmaplen
,
&
anchor
,
&
string
,
&
stringlen
,
&
pos
))
return
NULL
;
/* XXX sanity-check the input data */
bufp
.
used
=
bufp
.
allocated
;
bufp
.
translate
=
NULL
;
bufp
.
fastmap_accurate
=
1
;
bufp
.
can_be_null
=
can_be_null
;
bufp
.
uses_registers
=
1
;
bufp
.
num_registers
=
1
;
bufp
.
anchor
=
anchor
;
for
(
i
=
0
;
i
<
num_regs
;
i
++
)
{
re_regs
.
start
[
i
]
=-
1
;
re_regs
.
end
[
i
]
=-
1
;}
result
=
re_search
(
&
bufp
,
string
,
stringlen
,
pos
,
stringlen
-
pos
,
&
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
return
makeresult
(
&
re_regs
,
num_regs
);
}
#if 0
/* Functions originally in the regsub module.
Added June 1, 1997.
*/
/* A cache of previously used patterns is maintained. Notice that if
you change the reop syntax flag, entries in the cache are
invalidated.
XXX Solution: use (syntax flag, pattern) as keys? Clear the cache
every so often, or once it gets past a certain size?
*/
static PyObject *cache_dict=NULL;
/* Accept an object; if it's a reop pattern, Py_INCREF it and return
it. If it's a string, a reop object is compiled and cached.
*/
static reopobject *
cached_compile(pattern)
PyObject *pattern;
{
reopobject *p2;
if (!PyString_Check(pattern))
{
/* It's not a string, so assume it's a compiled reop object */
/* XXX check that! */
Py_INCREF(pattern);
return (reopobject*)pattern;
}
if (cache_dict==NULL)
{
cache_dict=PyDict_New();
if (cache_dict==NULL)
{
return (reopobject*)NULL;
}
}
/* See if the pattern has already been cached; if so, return that
reop object */
p2=(reopobject*)PyDict_GetItem(cache_dict, pattern);
if (p2)
{
Py_INCREF(p2);
return (reopobject*)p2;
}
/* Compile the pattern and cache it */
p2=(reopobject*)newreopobject(pattern, NULL, pattern, NULL);
if (!p2) return p2;
PyDict_SetItem(cache_dict, pattern, (PyObject*)p2);
return p2;
}
static PyObject *
internal_split(args, retain)
PyObject *args;
int retain;
{
PyObject *newlist, *s;
reopobject *pattern;
int maxsplit=0, count=0, length, next=0, result;
int match_end=0; /* match_start is defined below */
char *start;
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
&maxsplit))
{
PyErr_Clear();
if (!PyArg_ParseTuple(args, "s#O", &start, &length, &pattern))
return NULL;
}
pattern=cached_compile((PyObject *)pattern);
if (!pattern) return NULL;
newlist=PyList_New(0);
if (!newlist) return NULL;
do
{
result = re_search(&pattern->re_patbuf,
start, length, next, length-next,
&pattern->re_regs);
if (result < -1)
{ /* Erk... an error happened during the reop search */
Py_DECREF(newlist);
PyErr_SetString(ReopError, "match failure");
return NULL;
}
if (next<=result)
{
int match_start=pattern->re_regs.start[0];
int oldmatch_end=match_end;
match_end=pattern->re_regs.end[0];
if (match_start==match_end)
{ /* A zero-length match; increment to the next position */
next=result+1;
match_end=oldmatch_end;
continue;
}
/* Append the string up to the start of the match */
s=PyString_FromStringAndSize(start+oldmatch_end, match_start-oldmatch_end);
if (!s)
{
Py_DECREF(newlist);
return NULL;
}
PyList_Append(newlist, s);
Py_DECREF(s);
if (retain)
{
/* Append a string containing whatever matched */
s=PyString_FromStringAndSize(start+match_start, match_end-match_start);
if (!s)
{
Py_DECREF(newlist);
return NULL;
}
PyList_Append(newlist, s);
Py_DECREF(s);
}
/* Update the pointer, and increment the count of splits */
next=match_end; count++;
}
} while (result!=-1 && !(maxsplit && maxsplit==count) &&
next<length);
s=PyString_FromStringAndSize(start+match_end, length-match_end);
if (!s)
{
Py_DECREF(newlist);
return NULL;
}
PyList_Append(newlist, s);
Py_DECREF(s);
Py_DECREF(pattern);
return newlist;
}
static PyObject *
reop_split(self, args)
PyObject *self;
PyObject *args;
{
return internal_split(args, 0);
}
static PyObject *
reop_splitx(self, args)
PyObject *self;
PyObject *args;
{
return internal_split(args, 1);
}
#endif
static
struct
PyMethodDef
reop_global_methods
[]
=
{
{
"match"
,
reop_match
,
0
},
{
"search"
,
reop_search
,
0
},
#if 0
{"split", reop_split, 0},
{"splitx", reop_splitx, 0},
#endif
{
NULL
,
NULL
}
/* sentinel */
};
void
initreop
()
{
PyObject
*
m
,
*
d
,
*
v
;
int
i
;
char
*
s
;
m
=
Py_InitModule
(
"reop"
,
reop_global_methods
);
d
=
PyModule_GetDict
(
m
);
/* Initialize reop.error exception */
v
=
ReopError
=
PyString_FromString
(
"reop.error"
);
if
(
v
==
NULL
||
PyDict_SetItemString
(
d
,
"error"
,
v
)
!=
0
)
goto
finally
;
/* Initialize reop.casefold constant */
if
(
!
(
v
=
PyString_FromStringAndSize
((
char
*
)
NULL
,
256
)))
goto
finally
;
if
(
!
(
s
=
PyString_AsString
(
v
)))
goto
finally
;
for
(
i
=
0
;
i
<
256
;
i
++
)
{
if
(
isupper
(
i
))
s
[
i
]
=
tolower
(
i
);
else
s
[
i
]
=
i
;
}
if
(
PyDict_SetItemString
(
d
,
"casefold"
,
v
)
<
0
)
goto
finally
;
Py_DECREF
(
v
);
if
(
!
PyErr_Occurred
())
return
;
finally:
Py_FatalError
(
"can't initialize reop module"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment