Commit 10be10cb authored by Neal Norwitz's avatar Neal Norwitz

Remove regsub, reconvert, regex, regex_syntax and everything under lib-old.

parent efbeaef1
......@@ -6,7 +6,7 @@
import sys
import string
import regex
import re
import getopt
import time
......@@ -35,9 +35,9 @@ def main():
for rev in allrevs:
formatrev(rev, prefix)
parsedateprog = regex.compile(
'^date: \([0-9]+\)/\([0-9]+\)/\([0-9]+\) ' +
'\([0-9]+\):\([0-9]+\):\([0-9]+\); author: \([^ ;]+\)')
parsedateprog = re.compile(
'^date: ([0-9]+)/([0-9]+)/([0-9]+) ' +
'([0-9]+):([0-9]+):([0-9]+); author: ([^ ;]+)')
authormap = {
'guido': 'Guido van Rossum <guido@cnri.reston.va.us>',
......@@ -70,7 +70,7 @@ def formatrev(rev, prefix):
print
print
startprog = regex.compile("^Working file: \(.*\)$")
startprog = re.compile("^Working file: (.*)$")
def getnextfile(f):
while 1:
......
......@@ -6,12 +6,12 @@
# Python script for bumping up an RCS major revision number.
import sys
import regex
import re
import rcslib
import string
WITHLOCK = 1
majorrev_re = regex.compile('^[0-9]+')
majorrev_re = re.compile('^[0-9]+')
dir = rcslib.RCS()
......
......@@ -8,7 +8,7 @@ files and (possibly) corresponding work files.
import fnmatch
import os
import regsub
import re
import string
import tempfile
......@@ -150,7 +150,7 @@ class RCS:
cmd = 'ci %s%s -t%s %s %s' % \
(lockflag, rev, f.name, otherflags, name)
else:
message = regsub.gsub('\([\\"$`]\)', '\\\\\\1', message)
message = re.sub(r'([\"$`])', r'\\\1', message)
cmd = 'ci %s%s -m"%s" %s %s' % \
(lockflag, rev, message, otherflags, name)
return self._system(cmd)
......
......@@ -29,7 +29,7 @@
# into a program for a different change to Python programs...
import sys
import regex
import re
import os
from stat import *
import string
......@@ -53,7 +53,7 @@ def main():
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
......@@ -104,7 +104,7 @@ def fix(filename):
if lineno == 1 and g is None and line[:2] == '#!':
# Check for non-Python scripts
words = string.split(line[2:])
if words and regex.search('[pP]ython', words[0]) < 0:
if words and re.search('[pP]ython', words[0]) < 0:
msg = filename + ': ' + words[0]
msg = msg + ' script; not fixed\n'
err(msg)
......
......@@ -13,12 +13,12 @@
import os
import sys
import regex
import re
import string
import getopt
pat = '^\([a-zA-Z0-9 :]*\)!\(.*\)!\(.*\)!\([<>].*\)!\([0-9]+\)!\([0-9]+\)$'
prog = regex.compile(pat)
pat = '^([a-zA-Z0-9 :]*)!(.*)!(.*)!([<>].*)!([0-9]+)!([0-9]+)$'
prog = re.compile(pat)
def main():
maxitems = 25
......
......@@ -10,7 +10,7 @@ import time
import os
import stat
import getopt
import regex
import re
def main():
dofile = mmdf
......@@ -45,7 +45,7 @@ def main():
if sts:
sys.exit(sts)
numeric = regex.compile('[1-9][0-9]*')
numeric = re.compile('[1-9][0-9]*')
def mh(dir):
sts = 0
......
......@@ -8,10 +8,10 @@
import os
import sys
import regex
import re
pat = '^\([^: \t\n]+\):\([1-9][0-9]*\):'
prog = regex.compile(pat)
pat = '^([^: \t\n]+):([1-9][0-9]*):'
prog = re.compile(pat)
class FileObj:
def __init__(self, filename):
......
......@@ -13,7 +13,6 @@ MYGROUP = '225.0.0.250'
import sys
import time
import struct
import regsub
from socket import *
......
# Widget to display a man page
import regex
import re
from Tkinter import *
from Tkinter import _tkinter
from ScrolledText import ScrolledText
......@@ -11,10 +11,10 @@ ITALICFONT = '*-Courier-Medium-O-Normal-*-120-*'
# XXX Recognizing footers is system dependent
# (This one works for IRIX 5.2 and Solaris 2.2)
footerprog = regex.compile(
footerprog = re.compile(
'^ Page [1-9][0-9]*[ \t]+\|^.*Last change:.*[1-9][0-9]*\n')
emptyprog = regex.compile('^[ \t]*\n')
ulprog = regex.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n')
emptyprog = re.compile('^[ \t]*\n')
ulprog = re.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n')
# Basic Man Page class -- does not disable editing
class EditableManPage(ScrolledText):
......
......@@ -4,7 +4,7 @@
import os
import sys
import regex
import re
import getopt
import string
import mhlib
......@@ -157,7 +157,7 @@ def scan_unpost(e):
scanmenu.unpost()
scanmenu.invoke('active')
scanparser = regex.compile('^ *\([0-9]+\)')
scanparser = re.compile('^ *([0-9]+)')
def open_folder(e=None):
global folder, mhf
......
......@@ -5,7 +5,7 @@
import sys
import os
import string
import regex
import re
from Tkinter import *
from ManPage import ManPage
......@@ -208,15 +208,15 @@ class SelectionBox:
print 'Empty search string'
return
if not self.casevar.get():
map = regex.casefold
map = re.IGNORECASE
else:
map = None
try:
if map:
prog = regex.compile(search, map)
prog = re.compile(search, map)
else:
prog = regex.compile(search)
except regex.error, msg:
prog = re.compile(search)
except re.error, msg:
self.frame.bell()
print 'Regex error:', msg
return
......
......@@ -33,11 +33,8 @@ This document is available from
The \module{re} module was added in Python 1.5, and provides
Perl-style regular expression patterns. Earlier versions of Python
came with the \module{regex} module, which provides Emacs-style
patterns. Emacs-style patterns are slightly less readable and
don't provide as many features, so there's not much reason to use
the \module{regex} module when writing new code, though you might
encounter old code that uses it.
came with the \module{regex} module, which provided Emacs-style
patterns. \module{regex} module was removed in Python 2.5.
Regular expressions (or REs) are essentially a tiny, highly
specialized programming language embedded inside Python and made
......@@ -1458,7 +1455,7 @@ Jeffrey Friedl's \citetitle{Mastering Regular Expressions}, published
by O'Reilly. Unfortunately, it exclusively concentrates on Perl and
Java's flavours of regular expressions, and doesn't contain any Python
material at all, so it won't be useful as a reference for programming
in Python. (The first edition covered Python's now-obsolete
in Python. (The first edition covered Python's now-removed
\module{regex} module, which won't help you much.) Consider checking
it out from your library.
......
......@@ -87,7 +87,6 @@ and how to embed it in other applications.
\input{libstrings} % String Services
\input{libstring}
\input{libre}
\input{libreconvert}
\input{libstruct} % XXX also/better in File Formats?
\input{libdifflib}
\input{libstringio}
......@@ -454,8 +453,6 @@ and how to embed it in other applications.
%\input{libcmpcache}
%\input{libcmp}
%\input{libni}
%\input{libregex}
%\input{libregsub}
\chapter{Reporting Bugs}
\input{reportingbugs}
......
......@@ -566,9 +566,6 @@ ignored.
>>> re.split('\W+', 'Words, words, words.', 1)
['Words', 'words, words.']
\end{verbatim}
This function combines and extends the functionality of
the old \function{regsub.split()} and \function{regsub.splitx()}.
\end{funcdesc}
\begin{funcdesc}{findall}{pattern, string\optional{, flags}}
......@@ -943,7 +940,7 @@ the message \code{maximum recursion limit} exceeded. For example,
>>> re.match('Begin (\w| )*? end', s).end()
Traceback (most recent call last):
File "<stdin>", line 1, in ?
File "/usr/local/lib/python2.3/sre.py", line 132, in match
File "/usr/local/lib/python2.5/re.py", line 132, in match
return _compile(pattern, flags).match(string)
RuntimeError: maximum recursion limit exceeded
\end{verbatim}
......
\section{\module{reconvert} ---
Convert regular expressions from regex to re form}
\declaremodule{standard}{reconvert}
\moduleauthor{Andrew M. Kuchling}{amk@amk.ca}
\sectionauthor{Skip Montanaro}{skip@pobox.com}
\modulesynopsis{Convert regex-, emacs- or sed-style regular expressions
to re-style syntax.}
This module provides a facility to convert regular expressions from the
syntax used by the deprecated \module{regex} module to those used by the
newer \module{re} module. Because of similarity between the regular
expression syntax of \code{sed(1)} and \code{emacs(1)} and the
\module{regex} module, it is also helpful to convert patterns written for
those tools to \module{re} patterns.
When used as a script, a Python string literal (or any other expression
evaluating to a string) is read from stdin, and the translated expression is
written to stdout as a string literal. Unless stdout is a tty, no trailing
newline is written to stdout. This is done so that it can be used with
Emacs \code{C-U M-|} (shell-command-on-region) which filters the region
through the shell command.
\begin{seealso}
\seetitle{Mastering Regular Expressions}{Book on regular expressions
by Jeffrey Friedl, published by O'Reilly. The second
edition of the book no longer covers Python at all,
but the first edition covered writing good regular expression
patterns in great detail.}
\end{seealso}
\subsection{Module Contents}
\nodename{Contents of Module reconvert}
The module defines two functions and a handful of constants.
\begin{funcdesc}{convert}{pattern\optional{, syntax=None}}
Convert a \var{pattern} representing a \module{regex}-stype regular
expression into a \module{re}-style regular expression. The optional
\var{syntax} parameter is a bitwise-or'd set of flags that control what
constructs are converted. See below for a description of the various
constants.
\end{funcdesc}
\begin{funcdesc}{quote}{s\optional{, quote=None}}
Convert a string object to a quoted string literal.
This is similar to \function{repr} but will return a "raw" string (r'...'
or r"...") when the string contains backslashes, instead of doubling all
backslashes. The resulting string does not always evaluate to the same
string as the original; however it will do just the right thing when passed
into re.compile().
The optional second argument forces the string quote; it must be a single
character which is a valid Python string quote. Note that prior to Python
2.5 this would not accept triple-quoted string delimiters.
\end{funcdesc}
\begin{datadesc}{RE_NO_BK_PARENS}
Suppress paren conversion. This should be omitted when converting
\code{sed}-style or \code{emacs}-style regular expressions.
\end{datadesc}
\begin{datadesc}{RE_NO_BK_VBAR}
Suppress vertical bar conversion. This should be omitted when converting
\code{sed}-style or \code{emacs}-style regular expressions.
\end{datadesc}
\begin{datadesc}{RE_BK_PLUS_QM}
Enable conversion of \code{+} and \code{?} characters. This should be
added to the \var{syntax} arg of \function{convert} when converting
\code{sed}-style regular expressions and omitted when converting
\code{emacs}-style regular expressions.
\end{datadesc}
\begin{datadesc}{RE_NEWLINE_OR}
When set, newline characters are replaced by \code{|}.
\end{datadesc}
This diff is collapsed.
\section{\module{regsub} ---
String operations using regular expressions}
\declaremodule{standard}{regsub}
\modulesynopsis{Substitution and splitting operations that use
regular expressions. \strong{Obsolete!}}
This module defines a number of functions useful for working with
regular expressions (see built-in module \refmodule{regex}).
Warning: these functions are not thread-safe.
\strong{Obsolescence note:}
This module is obsolete as of Python version 1.5; it is still being
maintained because much existing code still uses it. All new code in
need of regular expressions should use the new \refmodule{re} module, which
supports the more powerful and regular Perl-style regular expressions.
Existing code should be converted. The standard library module
\module{reconvert} helps in converting \refmodule{regex} style regular
expressions to \refmodule{re} style regular expressions. (For more
conversion help, see Andrew Kuchling's\index{Kuchling, Andrew}
``regex-to-re HOWTO'' at
\url{http://www.python.org/doc/howto/regex-to-re/}.)
\begin{funcdesc}{sub}{pat, repl, str}
Replace the first occurrence of pattern \var{pat} in string
\var{str} by replacement \var{repl}. If the pattern isn't found,
the string is returned unchanged. The pattern may be a string or an
already compiled pattern. The replacement may contain references
\samp{\e \var{digit}} to subpatterns and escaped backslashes.
\end{funcdesc}
\begin{funcdesc}{gsub}{pat, repl, str}
Replace all (non-overlapping) occurrences of pattern \var{pat} in
string \var{str} by replacement \var{repl}. The same rules as for
\code{sub()} apply. Empty matches for the pattern are replaced only
when not adjacent to a previous match, so e.g.
\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}.
\end{funcdesc}
\begin{funcdesc}{split}{str, pat\optional{, maxsplit}}
Split the string \var{str} in fields separated by delimiters matching
the pattern \var{pat}, and return a list containing the fields. Only
non-empty matches for the pattern are considered, so e.g.
\code{split('a:b', ':*')} returns \code{['a', 'b']} and
\code{split('abc', '')} returns \code{['abc']}. The \var{maxsplit}
defaults to 0. If it is nonzero, only \var{maxsplit} number of splits
occur, and the remainder of the string is returned as the final
element of the list.
\end{funcdesc}
\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}}
Split the string \var{str} in fields separated by delimiters matching
the pattern \var{pat}, and return a list containing the fields as well
as the separators. For example, \code{splitx('a:::b', ':*')} returns
\code{['a', ':::', 'b']}. Otherwise, this function behaves the same
as \code{split}.
\end{funcdesc}
\begin{funcdesc}{capwords}{s\optional{, pat}}
Capitalize words separated by optional pattern \var{pat}. The default
pattern uses any characters except letters, digits and underscores as
word delimiters. Capitalization is done by changing the first
character of each word to upper case.
\end{funcdesc}
\begin{funcdesc}{clear_cache}{}
The regsub module maintains a cache of compiled regular expressions,
keyed on the regular expression string and the syntax of the regex
module at the time the expression was compiled. This function clears
that cache.
\end{funcdesc}
......@@ -137,18 +137,6 @@ now just as good).
\item[\module{rand}]
--- Old interface to the random number generator.
\item[\module{regex}]
--- Emacs-style regular expression support; may still be used in some
old code (extension module). Refer to the
\citetitle[http://www.python.org/doc/1.6/lib/module-regex.html]{Python
1.6 Documentation} for documentation.
\item[\module{regsub}]
--- Regular expression based string replacement utilities, for use
with \module{regex} (extension module). Refer to the
\citetitle[http://www.python.org/doc/1.6/lib/module-regsub.html]{Python
1.6 Documentation} for documentation.
\item[\module{statcache}]
--- Caches the results of os.stat(). Using the cache can be fragile
and error-prone, just use \code{os.stat()} directly.
......
This diff is collapsed.
# This module provides standard support for "packages".
#
# The idea is that large groups of related modules can be placed in
# their own subdirectory, which can be added to the Python search path
# in a relatively easy way.
#
# The current version takes a package name and searches the Python
# search path for a directory by that name, and if found adds it to
# the module search path (sys.path). It maintains a list of packages
# that have already been added so adding the same package many times
# is OK.
#
# It is intended to be used in a fairly stylized manner: each module
# that wants to use a particular package, say 'Foo', is supposed to
# contain the following code:
#
# from addpack import addpack
# addpack('Foo')
# <import modules from package Foo>
#
# Additional arguments, when present, provide additional places where
# to look for the package before trying sys.path (these may be either
# strings or lists/tuples of strings). Also, if the package name is a
# full pathname, first the last component is tried in the usual way,
# then the full pathname is tried last. If the package name is a
# *relative* pathname (UNIX: contains a slash but doesn't start with
# one), then nothing special is done. The packages "/foo/bar/bletch"
# and "bletch" are considered the same, but unrelated to "bar/bletch".
#
# If the algorithm finds more than one suitable subdirectory, all are
# added to the search path -- this makes it possible to override part
# of a package. The same path will not be added more than once.
#
# If no directory is found, ImportError is raised.
_packs = {} # {pack: [pathname, ...], ...}
def addpack(pack, *locations):
import os
if os.path.isabs(pack):
base = os.path.basename(pack)
else:
base = pack
if _packs.has_key(base):
return
import sys
path = []
for loc in _flatten(locations) + sys.path:
fn = os.path.join(loc, base)
if fn not in path and os.path.isdir(fn):
path.append(fn)
if pack != base and pack not in path and os.path.isdir(pack):
path.append(pack)
if not path: raise ImportError, 'package ' + pack + ' not found'
_packs[base] = path
for fn in path:
if fn not in sys.path:
sys.path.append(fn)
def _flatten(locations):
locs = []
for loc in locations:
if type(loc) == type(''):
locs.append(loc)
else:
locs = locs + _flatten(loc)
return locs
"""Efficiently compare files, boolean outcome only (equal / not equal).
Tricks (used in this order):
- Files with identical type, size & mtime are assumed to be clones
- Files with different type or size cannot be identical
- We keep a cache of outcomes of earlier comparisons
- We don't fork a process to run 'cmp' but read the files ourselves
"""
import os
cache = {}
def cmp(f1, f2, shallow=1):
"""Compare two files, use the cache if possible.
Return 1 for identical files, 0 for different.
Raise exceptions if either file could not be statted, read, etc."""
s1, s2 = sig(os.stat(f1)), sig(os.stat(f2))
if s1[0] != 8 or s2[0] != 8:
# Either is a not a plain file -- always report as different
return 0
if shallow and s1 == s2:
# type, size & mtime match -- report same
return 1
if s1[:2] != s2[:2]: # Types or sizes differ, don't bother
# types or sizes differ -- report different
return 0
# same type and size -- look in the cache
key = (f1, f2)
try:
cs1, cs2, outcome = cache[key]
# cache hit
if s1 == cs1 and s2 == cs2:
# cached signatures match
return outcome
# stale cached signature(s)
except KeyError:
# cache miss
pass
# really compare
outcome = do_cmp(f1, f2)
cache[key] = s1, s2, outcome
return outcome
def sig(st):
"""Return signature (i.e., type, size, mtime) from raw stat data
0-5: st_mode, st_ino, st_dev, st_nlink, st_uid, st_gid
6-9: st_size, st_atime, st_mtime, st_ctime"""
type = st[0] / 4096
size = st[6]
mtime = st[8]
return type, size, mtime
def do_cmp(f1, f2):
"""Compare two files, really."""
bufsize = 8*1024 # Could be tuned
fp1 = open(f1, 'rb')
fp2 = open(f2, 'rb')
while 1:
b1 = fp1.read(bufsize)
b2 = fp2.read(bufsize)
if b1 != b2: return 0
if not b1: return 1
"""Efficiently compare files, boolean outcome only (equal / not equal).
Tricks (used in this order):
- Use the statcache module to avoid statting files more than once
- Files with identical type, size & mtime are assumed to be clones
- Files with different type or size cannot be identical
- We keep a cache of outcomes of earlier comparisons
- We don't fork a process to run 'cmp' but read the files ourselves
"""
import os
from stat import *
import statcache
# The cache.
#
cache = {}
def cmp(f1, f2, shallow=1):
"""Compare two files, use the cache if possible.
May raise os.error if a stat or open of either fails.
Return 1 for identical files, 0 for different.
Raise exceptions if either file could not be statted, read, etc."""
s1, s2 = sig(statcache.stat(f1)), sig(statcache.stat(f2))
if not S_ISREG(s1[0]) or not S_ISREG(s2[0]):
# Either is a not a plain file -- always report as different
return 0
if shallow and s1 == s2:
# type, size & mtime match -- report same
return 1
if s1[:2] != s2[:2]: # Types or sizes differ, don't bother
# types or sizes differ -- report different
return 0
# same type and size -- look in the cache
key = f1 + ' ' + f2
if cache.has_key(key):
cs1, cs2, outcome = cache[key]
# cache hit
if s1 == cs1 and s2 == cs2:
# cached signatures match
return outcome
# stale cached signature(s)
# really compare
outcome = do_cmp(f1, f2)
cache[key] = s1, s2, outcome
return outcome
def sig(st):
"""Return signature (i.e., type, size, mtime) from raw stat data."""
return S_IFMT(st[ST_MODE]), st[ST_SIZE], st[ST_MTIME]
def do_cmp(f1, f2):
"""Compare two files, really."""
#print ' cmp', f1, f2 # XXX remove when debugged
bufsize = 8*1024 # Could be tuned
fp1 = open(f1, 'rb')
fp2 = open(f2, 'rb')
while 1:
b1 = fp1.read(bufsize)
b2 = fp2.read(bufsize)
if b1 != b2: return 0
if not b1: return 1
# A subroutine for extracting a function name from a code object
# (with cache)
import sys
from stat import *
import string
import os
import linecache
# XXX The functions getcodename() and getfuncname() are now obsolete
# XXX as code and function objects now have a name attribute --
# XXX co.co_name and f.func_name.
# XXX getlineno() is now also obsolete because of the new attribute
# XXX of code objects, co.co_firstlineno.
# Extract the function or class name from a code object.
# This is a bit of a hack, since a code object doesn't contain
# the name directly. So what do we do:
# - get the filename (which *is* in the code object)
# - look in the code string to find the first SET_LINENO instruction
# (this must be the first instruction)
# - get the line from the file
# - if the line starts with 'class' or 'def' (after possible whitespace),
# extract the following identifier
#
# This breaks apart when the function was read from <stdin>
# or constructed by exec(), when the file is not accessible,
# and also when the file has been modified or when a line is
# continued with a backslash before the function or class name.
#
# Because this is a pretty expensive hack, a cache is kept.
SET_LINENO = 127 # The opcode (see "opcode.h" in the Python source)
identchars = string.ascii_letters + string.digits + '_' # Identifier characters
_namecache = {} # The cache
def getcodename(co):
try:
return co.co_name
except AttributeError:
pass
key = `co` # arbitrary but uniquely identifying string
if _namecache.has_key(key): return _namecache[key]
filename = co.co_filename
code = co.co_code
name = ''
if ord(code[0]) == SET_LINENO:
lineno = ord(code[1]) | ord(code[2]) << 8
line = linecache.getline(filename, lineno)
words = line.split()
if len(words) >= 2 and words[0] in ('def', 'class'):
name = words[1]
for i in range(len(name)):
if name[i] not in identchars:
name = name[:i]
break
_namecache[key] = name
return name
# Use the above routine to find a function's name.
def getfuncname(func):
try:
return func.func_name
except AttributeError:
pass
return getcodename(func.func_code)
# A part of the above code to extract just the line number from a code object.
def getlineno(co):
try:
return co.co_firstlineno
except AttributeError:
pass
code = co.co_code
if ord(code[0]) == SET_LINENO:
return ord(code[1]) | ord(code[2]) << 8
else:
return -1
"""A class to build directory diff tools on."""
import os
import dircache
import cmpcache
import statcache
from stat import *
class dircmp:
"""Directory comparison class."""
def new(self, a, b):
"""Initialize."""
self.a = a
self.b = b
# Properties that caller may change before calling self.run():
self.hide = [os.curdir, os.pardir] # Names never to be shown
self.ignore = ['RCS', 'tags'] # Names ignored in comparison
return self
def run(self):
"""Compare everything except common subdirectories."""
self.a_list = filter(dircache.listdir(self.a), self.hide)
self.b_list = filter(dircache.listdir(self.b), self.hide)
self.a_list.sort()
self.b_list.sort()
self.phase1()
self.phase2()
self.phase3()
def phase1(self):
"""Compute common names."""
self.a_only = []
self.common = []
for x in self.a_list:
if x in self.b_list:
self.common.append(x)
else:
self.a_only.append(x)
self.b_only = []
for x in self.b_list:
if x not in self.common:
self.b_only.append(x)
def phase2(self):
"""Distinguish files, directories, funnies."""
self.common_dirs = []
self.common_files = []
self.common_funny = []
for x in self.common:
a_path = os.path.join(self.a, x)
b_path = os.path.join(self.b, x)
ok = 1
try:
a_stat = statcache.stat(a_path)
except os.error, why:
# print 'Can\'t stat', a_path, ':', why[1]
ok = 0
try:
b_stat = statcache.stat(b_path)
except os.error, why:
# print 'Can\'t stat', b_path, ':', why[1]
ok = 0
if ok:
a_type = S_IFMT(a_stat[ST_MODE])
b_type = S_IFMT(b_stat[ST_MODE])
if a_type != b_type:
self.common_funny.append(x)
elif S_ISDIR(a_type):
self.common_dirs.append(x)
elif S_ISREG(a_type):
self.common_files.append(x)
else:
self.common_funny.append(x)
else:
self.common_funny.append(x)
def phase3(self):
"""Find out differences between common files."""
xx = cmpfiles(self.a, self.b, self.common_files)
self.same_files, self.diff_files, self.funny_files = xx
def phase4(self):
"""Find out differences between common subdirectories.
A new dircmp object is created for each common subdirectory,
these are stored in a dictionary indexed by filename.
The hide and ignore properties are inherited from the parent."""
self.subdirs = {}
for x in self.common_dirs:
a_x = os.path.join(self.a, x)
b_x = os.path.join(self.b, x)
self.subdirs[x] = newdd = dircmp().new(a_x, b_x)
newdd.hide = self.hide
newdd.ignore = self.ignore
newdd.run()
def phase4_closure(self):
"""Recursively call phase4() on subdirectories."""
self.phase4()
for x in self.subdirs.keys():
self.subdirs[x].phase4_closure()
def report(self):
"""Print a report on the differences between a and b."""
# Assume that phases 1 to 3 have been executed
# Output format is purposely lousy
print 'diff', self.a, self.b
if self.a_only:
print 'Only in', self.a, ':', self.a_only
if self.b_only:
print 'Only in', self.b, ':', self.b_only
if self.same_files:
print 'Identical files :', self.same_files
if self.diff_files:
print 'Differing files :', self.diff_files
if self.funny_files:
print 'Trouble with common files :', self.funny_files
if self.common_dirs:
print 'Common subdirectories :', self.common_dirs
if self.common_funny:
print 'Common funny cases :', self.common_funny
def report_closure(self):
"""Print reports on self and on subdirs.
If phase 4 hasn't been done, no subdir reports are printed."""
self.report()
try:
x = self.subdirs
except AttributeError:
return # No subdirectories computed
for x in self.subdirs.keys():
print
self.subdirs[x].report_closure()
def report_phase4_closure(self):
"""Report and do phase 4 recursively."""
self.report()
self.phase4()
for x in self.subdirs.keys():
print
self.subdirs[x].report_phase4_closure()
def cmpfiles(a, b, common):
"""Compare common files in two directories.
Return:
- files that compare equal
- files that compare different
- funny cases (can't stat etc.)"""
res = ([], [], [])
for x in common:
res[cmp(os.path.join(a, x), os.path.join(b, x))].append(x)
return res
def cmp(a, b):
"""Compare two files.
Return:
0 for equal
1 for different
2 for funny cases (can't stat, etc.)"""
try:
if cmpcache.cmp(a, b): return 0
return 1
except os.error:
return 2
def filter(list, skip):
"""Return a copy with items that occur in skip removed."""
result = []
for item in list:
if item not in skip: result.append(item)
return result
def demo():
"""Demonstration and testing."""
import sys
import getopt
options, args = getopt.getopt(sys.argv[1:], 'r')
if len(args) != 2:
raise getopt.error, 'need exactly two args'
dd = dircmp().new(args[0], args[1])
dd.run()
if ('-r', '') in options:
dd.report_phase4_closure()
else:
dd.report()
if __name__ == "__main__":
demo()
# Module 'dump'
#
# Print python code that reconstructs a variable.
# This only works in certain cases.
#
# It works fine for:
# - ints and floats (except NaNs and other weird things)
# - strings
# - compounds and lists, provided it works for all their elements
# - imported modules, provided their name is the module name
#
# It works for top-level dictionaries but not for dictionaries
# contained in other objects (could be made to work with some hassle
# though).
#
# It does not work for functions (all sorts), classes, class objects,
# windows, files etc.
#
# Finally, objects referenced by more than one name or contained in more
# than one other object lose their sharing property (this is bad for
# strings used as exception identifiers, for instance).
# Dump a whole symbol table
#
def dumpsymtab(dict):
for key in dict.keys():
dumpvar(key, dict[key])
# Dump a single variable
#
def dumpvar(name, x):
import sys
t = type(x)
if t == type({}):
print name, '= {}'
for key in x.keys():
item = x[key]
if not printable(item):
print '#',
print name, '[', `key`, '] =', `item`
elif t in (type(''), type(0), type(0.0), type([]), type(())):
if not printable(x):
print '#',
print name, '=', `x`
elif t == type(sys):
print 'import', name, '#', x
else:
print '#', name, '=', x
# check if a value is printable in a way that can be read back with input()
#
def printable(x):
t = type(x)
if t in (type(''), type(0), type(0.0)):
return 1
if t in (type([]), type(())):
for item in x:
if not printable(item):
return 0
return 1
if x == {}:
return 1
return 0
import fnmatch
import os
_debug = 0
_prune = ['(*)']
def find(pattern, dir = os.curdir):
list = []
names = os.listdir(dir)
names.sort()
for name in names:
if name in (os.curdir, os.pardir):
continue
fullname = os.path.join(dir, name)
if fnmatch.fnmatch(name, pattern):
list.append(fullname)
if os.path.isdir(fullname) and not os.path.islink(fullname):
for p in _prune:
if fnmatch.fnmatch(name, p):
if _debug: print "skip", `fullname`
break
else:
if _debug: print "descend into", `fullname`
list = list + find(pattern, fullname)
return list
This diff is collapsed.
# 'grep'
import regex
from regex_syntax import *
opt_show_where = 0
opt_show_filename = 0
opt_show_lineno = 1
def grep(pat, *files):
return ggrep(RE_SYNTAX_GREP, pat, files)
def egrep(pat, *files):
return ggrep(RE_SYNTAX_EGREP, pat, files)
def emgrep(pat, *files):
return ggrep(RE_SYNTAX_EMACS, pat, files)
def ggrep(syntax, pat, files):
if len(files) == 1 and type(files[0]) == type([]):
files = files[0]
global opt_show_filename
opt_show_filename = (len(files) != 1)
syntax = regex.set_syntax(syntax)
try:
prog = regex.compile(pat)
finally:
syntax = regex.set_syntax(syntax)
for filename in files:
fp = open(filename, 'r')
lineno = 0
while 1:
line = fp.readline()
if not line: break
lineno = lineno + 1
if prog.search(line) >= 0:
showline(filename, lineno, line, prog)
fp.close()
def pgrep(pat, *files):
if len(files) == 1 and type(files[0]) == type([]):
files = files[0]
global opt_show_filename
opt_show_filename = (len(files) != 1)
import re
prog = re.compile(pat)
for filename in files:
fp = open(filename, 'r')
lineno = 0
while 1:
line = fp.readline()
if not line: break
lineno = lineno + 1
if prog.search(line):
showline(filename, lineno, line, prog)
fp.close()
def showline(filename, lineno, line, prog):
if line[-1:] == '\n': line = line[:-1]
if opt_show_lineno:
prefix = `lineno`.rjust(3) + ': '
else:
prefix = ''
if opt_show_filename:
prefix = filename + ': ' + prefix
print prefix + line
if opt_show_where:
start, end = prog.regs()[0]
line = line[:start]
if '\t' not in line:
prefix = ' ' * (len(prefix) + start)
else:
prefix = ' ' * len(prefix)
for c in line:
if c != '\t': c = ' '
prefix = prefix + c
if start == end: prefix = prefix + '\\'
else: prefix = prefix + '^'*(end-start)
print prefix
import struct, fcntl
def writelock(f):
_lock(f, fcntl.F_WRLCK)
def readlock(f):
_lock(f, fcntl.F_RDLCK)
def unlock(f):
_lock(f, fcntl.F_UNLCK)
def _lock(f, op):
dummy = fcntl.fcntl(f.fileno(), fcntl.F_SETLKW,
struct.pack('2h8l', op,
0, 0, 0, 0, 0, 0, 0, 0, 0))
# New dir() function
# This should be the new dir(), except that it should still list
# the current local name space by default
def listattrs(x):
try:
dictkeys = x.__dict__.keys()
except (AttributeError, TypeError):
dictkeys = []
#
try:
methods = x.__methods__
except (AttributeError, TypeError):
methods = []
#
try:
members = x.__members__
except (AttributeError, TypeError):
members = []
#
try:
the_class = x.__class__
except (AttributeError, TypeError):
the_class = None
#
try:
bases = x.__bases__
except (AttributeError, TypeError):
bases = ()
#
total = dictkeys + methods + members
if the_class:
# It's a class instace; add the class's attributes
# that are functions (methods)...
class_attrs = listattrs(the_class)
class_methods = []
for name in class_attrs:
if is_function(getattr(the_class, name)):
class_methods.append(name)
total = total + class_methods
elif bases:
# It's a derived class; add the base class attributes
for base in bases:
base_attrs = listattrs(base)
total = total + base_attrs
total.sort()
return total
i = 0
while i+1 < len(total):
if total[i] == total[i+1]:
del total[i+1]
else:
i = i+1
return total
# Helper to recognize functions
def is_function(x):
return type(x) == type(is_function)
# Approximation of builtin dir(); but note that this lists the user's
# variables by default, not the current local name space.
def dir(x = None):
if x is not None:
return listattrs(x)
else:
import __main__
return listattrs(__main__)
This diff is collapsed.
# Module 'packmail' -- create a self-unpacking shell archive.
# This module works on UNIX and on the Mac; the archives can unpack
# themselves only on UNIX.
import os
from stat import ST_MTIME
# Print help
def help():
print 'All fns have a file open for writing as first parameter'
print 'pack(f, fullname, name): pack fullname as name'
print 'packsome(f, directory, namelist): selected files from directory'
print 'packall(f, directory): pack all files from directory'
print 'packnotolder(f, directory, name): pack all files from directory'
print ' that are not older than a file there'
print 'packtree(f, directory): pack entire directory tree'
# Pack one file
def pack(outfp, file, name):
fp = open(file, 'r')
outfp.write('echo ' + name + '\n')
outfp.write('sed "s/^X//" >"' + name + '" <<"!"\n')
while 1:
line = fp.readline()
if not line: break
if line[-1:] != '\n':
line = line + '\n'
outfp.write('X' + line)
outfp.write('!\n')
fp.close()
# Pack some files from a directory
def packsome(outfp, dirname, names):
for name in names:
print name
file = os.path.join(dirname, name)
pack(outfp, file, name)
# Pack all files from a directory
def packall(outfp, dirname):
names = os.listdir(dirname)
try:
names.remove('.')
except:
pass
try:
names.remove('..')
except:
pass
names.sort()
packsome(outfp, dirname, names)
# Pack all files from a directory that are not older than a give one
def packnotolder(outfp, dirname, oldest):
names = os.listdir(dirname)
try:
names.remove('.')
except:
pass
try:
names.remove('..')
except:
pass
oldest = os.path.join(dirname, oldest)
st = os.stat(oldest)
mtime = st[ST_MTIME]
todo = []
for name in names:
print name, '...',
st = os.stat(os.path.join(dirname, name))
if st[ST_MTIME] >= mtime:
print 'Yes.'
todo.append(name)
else:
print 'No.'
todo.sort()
packsome(outfp, dirname, todo)
# Pack a whole tree (no exceptions)
def packtree(outfp, dirname):
print 'packtree', dirname
outfp.write('mkdir ' + unixfix(dirname) + '\n')
names = os.listdir(dirname)
try:
names.remove('.')
except:
pass
try:
names.remove('..')
except:
pass
subdirs = []
for name in names:
fullname = os.path.join(dirname, name)
if os.path.isdir(fullname):
subdirs.append(fullname)
else:
print 'pack', fullname
pack(outfp, fullname, unixfix(fullname))
for subdirname in subdirs:
packtree(outfp, subdirname)
def unixfix(name):
comps = name.split(os.sep)
res = ''
for comp in comps:
if comp:
if res: res = res + '/'
res = res + comp
return res
# module 'poly' -- Polynomials
# A polynomial is represented by a list of coefficients, e.g.,
# [1, 10, 5] represents 1*x**0 + 10*x**1 + 5*x**2 (or 1 + 10x + 5x**2).
# There is no way to suppress internal zeros; trailing zeros are
# taken out by normalize().
def normalize(p): # Strip unnecessary zero coefficients
n = len(p)
while n:
if p[n-1]: return p[:n]
n = n-1
return []
def plus(a, b):
if len(a) < len(b): a, b = b, a # make sure a is the longest
res = a[:] # make a copy
for i in range(len(b)):
res[i] = res[i] + b[i]
return normalize(res)
def minus(a, b):
neg_b = map(lambda x: -x, b[:])
return plus(a, neg_b)
def one(power, coeff): # Representation of coeff * x**power
res = []
for i in range(power): res.append(0)
return res + [coeff]
def times(a, b):
res = []
for i in range(len(a)):
for j in range(len(b)):
res = plus(res, one(i+j, a[i]*b[j]))
return res
def power(a, n): # Raise polynomial a to the positive integral power n
if n == 0: return [1]
if n == 1: return a
if n/2*2 == n:
b = power(a, n/2)
return times(b, b)
return times(power(a, n-1), a)
def der(a): # First derivative
res = a[1:]
for i in range(len(res)):
res[i] = res[i] * (i+1)
return res
# Computing a primitive function would require rational arithmetic...
# Module 'rand'
# Don't use unless you want compatibility with C's rand()!
import whrandom
def srand(seed):
whrandom.seed(seed%256, seed/256%256, seed/65536%256)
def rand():
return int(whrandom.random() * 32768.0) % 32768
def choice(seq):
return seq[rand() % len(seq)]
"""Maintain a cache of stat() information on files.
There are functions to reset the cache or to selectively remove items.
"""
import warnings
warnings.warn("The statcache module is obsolete. Use os.stat() instead.",
DeprecationWarning)
del warnings
import os as _os
from stat import *
__all__ = ["stat","reset","forget","forget_prefix","forget_dir",
"forget_except_prefix","isdir"]
# The cache. Keys are pathnames, values are os.stat outcomes.
# Remember that multiple threads may be calling this! So, e.g., that
# path in cache returns 1 doesn't mean the cache will still contain
# path on the next line. Code defensively.
cache = {}
def stat(path):
"""Stat a file, possibly out of the cache."""
ret = cache.get(path, None)
if ret is None:
cache[path] = ret = _os.stat(path)
return ret
def reset():
"""Clear the cache."""
cache.clear()
# For thread saftey, always use forget() internally too.
def forget(path):
"""Remove a given item from the cache, if it exists."""
try:
del cache[path]
except KeyError:
pass
def forget_prefix(prefix):
"""Remove all pathnames with a given prefix."""
for path in cache.keys():
if path.startswith(prefix):
forget(path)
def forget_dir(prefix):
"""Forget a directory and all entries except for entries in subdirs."""
# Remove trailing separator, if any. This is tricky to do in a
# x-platform way. For example, Windows accepts both / and \ as
# separators, and if there's nothing *but* a separator we want to
# preserve that this is the root. Only os.path has the platform
# knowledge we need.
from os.path import split, join
prefix = split(join(prefix, "xxx"))[0]
forget(prefix)
for path in cache.keys():
# First check that the path at least starts with the prefix, so
# that when it doesn't we can avoid paying for split().
if path.startswith(prefix) and split(path)[0] == prefix:
forget(path)
def forget_except_prefix(prefix):
"""Remove all pathnames except with a given prefix.
Normally used with prefix = '/' after a chdir().
"""
for path in cache.keys():
if not path.startswith(prefix):
forget(path)
def isdir(path):
"""Return True if directory, else False."""
try:
st = stat(path)
except _os.error:
return False
return S_ISDIR(st.st_mode)
# Print tracebacks, with a dump of local variables.
# Also an interactive stack trace browser.
# Note -- this module is obsolete -- use pdb.pm() instead.
import sys
import os
from stat import *
import linecache
def br(): browser(sys.last_traceback)
def tb(): printtb(sys.last_traceback)
def browser(tb):
if not tb:
print 'No traceback.'
return
tblist = []
while tb:
tblist.append(tb)
tb = tb.tb_next
ptr = len(tblist)-1
tb = tblist[ptr]
while 1:
if tb != tblist[ptr]:
tb = tblist[ptr]
print `ptr` + ':',
printtbheader(tb)
try:
line = raw_input('TB: ')
except KeyboardInterrupt:
print '\n[Interrupted]'
break
except EOFError:
print '\n[EOF]'
break
cmd = line.strip()
if cmd:
if cmd == 'quit':
break
elif cmd == 'list':
browserlist(tb)
elif cmd == 'up':
if ptr-1 >= 0: ptr = ptr-1
else: print 'Bottom of stack.'
elif cmd == 'down':
if ptr+1 < len(tblist): ptr = ptr+1
else: print 'Top of stack.'
elif cmd == 'locals':
printsymbols(tb.tb_frame.f_locals)
elif cmd == 'globals':
printsymbols(tb.tb_frame.f_globals)
elif cmd in ('?', 'help'):
browserhelp()
else:
browserexec(tb, cmd)
def browserlist(tb):
filename = tb.tb_frame.f_code.co_filename
lineno = tb.tb_lineno
last = lineno
first = max(1, last-10)
for i in range(first, last+1):
if i == lineno: prefix = '***' + `i`.rjust(4) + ':'
else: prefix = `i`.rjust(7) + ':'
line = linecache.getline(filename, i)
if line[-1:] == '\n': line = line[:-1]
print prefix + line
def browserexec(tb, cmd):
locals = tb.tb_frame.f_locals
globals = tb.tb_frame.f_globals
try:
exec cmd+'\n' in globals, locals
except:
t, v = sys.exc_info()[:2]
print '*** Exception:',
if type(t) is type(''):
print t,
else:
print t.__name__,
if v is not None:
print ':', v,
print
print 'Type help to get help.'
def browserhelp():
print
print ' This is the traceback browser. Commands are:'
print ' up : move one level up in the call stack'
print ' down : move one level down in the call stack'
print ' locals : print all local variables at this level'
print ' globals : print all global variables at this level'
print ' list : list source code around the failure'
print ' help : print help (what you are reading now)'
print ' quit : back to command interpreter'
print ' Typing any other 1-line statement will execute it'
print ' using the current level\'s symbol tables'
print
def printtb(tb):
while tb:
print1tb(tb)
tb = tb.tb_next
def print1tb(tb):
printtbheader(tb)
if tb.tb_frame.f_locals is not tb.tb_frame.f_globals:
printsymbols(tb.tb_frame.f_locals)
def printtbheader(tb):
filename = tb.tb_frame.f_code.co_filename
lineno = tb.tb_lineno
info = '"' + filename + '"(' + `lineno` + ')'
line = linecache.getline(filename, lineno)
if line:
info = info + ': ' + line.strip()
print info
def printsymbols(d):
keys = d.keys()
keys.sort()
for name in keys:
print ' ' + name.ljust(12) + ':',
printobject(d[name], 4)
print
def printobject(v, maxlevel):
if v is None:
print 'None',
elif type(v) in (type(0), type(0.0)):
print v,
elif type(v) is type(''):
if len(v) > 20:
print `v[:17] + '...'`,
else:
print `v`,
elif type(v) is type(()):
print '(',
printlist(v, maxlevel)
print ')',
elif type(v) is type([]):
print '[',
printlist(v, maxlevel)
print ']',
elif type(v) is type({}):
print '{',
printdict(v, maxlevel)
print '}',
else:
print v,
def printlist(v, maxlevel):
n = len(v)
if n == 0: return
if maxlevel <= 0:
print '...',
return
for i in range(min(6, n)):
printobject(v[i], maxlevel-1)
if i+1 < n: print ',',
if n > 6: print '...',
def printdict(v, maxlevel):
keys = v.keys()
n = len(keys)
if n == 0: return
if maxlevel <= 0:
print '...',
return
keys.sort()
for i in range(min(6, n)):
key = keys[i]
print `key` + ':',
printobject(v[key], maxlevel-1)
if i+1 < n: print ',',
if n > 6: print '...',
"""Parse a timezone specification."""
# XXX Unfinished.
# XXX Only the typical form "XXXhhYYY;ddd/hh,ddd/hh" is currently supported.
import warnings
warnings.warn(
"The tzparse module is obsolete and will disappear in the future",
DeprecationWarning)
tzpat = ('^([A-Z][A-Z][A-Z])([-+]?[0-9]+)([A-Z][A-Z][A-Z]);'
'([0-9]+)/([0-9]+),([0-9]+)/([0-9]+)$')
tzprog = None
def tzparse(tzstr):
"""Given a timezone spec, return a tuple of information
(tzname, delta, dstname, daystart, hourstart, dayend, hourend),
where 'tzname' is the name of the timezone, 'delta' is the offset
in hours from GMT, 'dstname' is the name of the daylight-saving
timezone, and 'daystart'/'hourstart' and 'dayend'/'hourend'
specify the starting and ending points for daylight saving time."""
global tzprog
if tzprog is None:
import re
tzprog = re.compile(tzpat)
match = tzprog.match(tzstr)
if not match:
raise ValueError, 'not the TZ syntax I understand'
subs = []
for i in range(1, 8):
subs.append(match.group(i))
for i in (1, 3, 4, 5, 6):
subs[i] = eval(subs[i])
[tzname, delta, dstname, daystart, hourstart, dayend, hourend] = subs
return (tzname, delta, dstname, daystart, hourstart, dayend, hourend)
def tzlocaltime(secs, params):
"""Given a Unix time in seconds and a tuple of information about
a timezone as returned by tzparse(), return the local time in the
form (year, month, day, hour, min, sec, yday, wday, tzname)."""
import time
(tzname, delta, dstname, daystart, hourstart, dayend, hourend) = params
year, month, days, hours, mins, secs, yday, wday, isdst = \
time.gmtime(secs - delta*3600)
if (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend):
tzname = dstname
hours = hours + 1
return year, month, days, hours, mins, secs, yday, wday, tzname
def tzset():
"""Determine the current timezone from the "TZ" environment variable."""
global tzparams, timezone, altzone, daylight, tzname
import os
tzstr = os.environ['TZ']
tzparams = tzparse(tzstr)
timezone = tzparams[1] * 3600
altzone = timezone - 3600
daylight = 1
tzname = tzparams[0], tzparams[2]
def isdst(secs):
"""Return true if daylight-saving time is in effect for the given
Unix time in the current timezone."""
import time
(tzname, delta, dstname, daystart, hourstart, dayend, hourend) = \
tzparams
year, month, days, hours, mins, secs, yday, wday, isdst = \
time.gmtime(secs - delta*3600)
return (daystart, hourstart) <= (yday+1, hours) < (dayend, hourend)
tzset()
def localtime(secs):
"""Get the local time in the current timezone."""
return tzlocaltime(secs, tzparams)
def test():
from time import asctime, gmtime
import time, sys
now = time.time()
x = localtime(now)
tm = x[:-1] + (0,)
print 'now =', now, '=', asctime(tm), x[-1]
now = now - now % (24*3600)
if sys.argv[1:]: now = now + eval(sys.argv[1])
x = gmtime(now)
tm = x[:-1] + (0,)
print 'gmtime =', now, '=', asctime(tm), 'yday =', x[-2]
jan1 = now - x[-2]*24*3600
x = localtime(jan1)
tm = x[:-1] + (0,)
print 'jan1 =', jan1, '=', asctime(tm), x[-1]
for d in range(85, 95) + range(265, 275):
t = jan1 + d*24*3600
x = localtime(t)
tm = x[:-1] + (0,)
print 'd =', d, 't =', t, '=', asctime(tm), x[-1]
# Module 'util' -- some useful functions that don't fit elsewhere
# NB: These are now built-in functions, but this module is provided
# for compatibility. Don't use in new programs unless you need backward
# compatibility (i.e. need to run with old interpreters).
# Remove an item from a list.
# No complaints if it isn't in the list at all.
# If it occurs more than once, remove the first occurrence.
#
def remove(item, list):
if item in list: list.remove(item)
# Return a string containing a file's contents.
#
def readfile(fn):
return readopenfile(open(fn, 'r'))
# Read an open file until EOF.
#
def readopenfile(fp):
return fp.read()
"""Wichman-Hill random number generator.
Wichmann, B. A. & Hill, I. D. (1982)
Algorithm AS 183:
An efficient and portable pseudo-random number generator
Applied Statistics 31 (1982) 188-190
see also:
Correction to Algorithm AS 183
Applied Statistics 33 (1984) 123
McLeod, A. I. (1985)
A remark on Algorithm AS 183
Applied Statistics 34 (1985),198-200
USE:
whrandom.random() yields double precision random numbers
uniformly distributed between 0 and 1.
whrandom.seed(x, y, z) must be called before whrandom.random()
to seed the generator
There is also an interface to create multiple independent
random generators, and to choose from other ranges.
Multi-threading note: the random number generator used here is not
thread-safe; it is possible that nearly simultaneous calls in
different theads return the same random value. To avoid this, you
have to use a lock around all calls. (I didn't want to slow this
down in the serial case by using a lock here.)
"""
import warnings
warnings.warn("the whrandom module is deprecated; please use the random module",
DeprecationWarning)
# Translated by Guido van Rossum from C source provided by
# Adrian Baddeley.
class whrandom:
def __init__(self, x = 0, y = 0, z = 0):
"""Initialize an instance.
Without arguments, initialize from current time.
With arguments (x, y, z), initialize from them."""
self.seed(x, y, z)
def seed(self, x = 0, y = 0, z = 0):
"""Set the seed from (x, y, z).
These must be integers in the range [0, 256)."""
if not type(x) == type(y) == type(z) == type(0):
raise TypeError, 'seeds must be integers'
if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
raise ValueError, 'seeds must be in range(0, 256)'
if 0 == x == y == z:
# Initialize from current time
import time
t = long(time.time() * 256)
t = int((t&0xffffff) ^ (t>>24))
t, x = divmod(t, 256)
t, y = divmod(t, 256)
t, z = divmod(t, 256)
# Zero is a poor seed, so substitute 1
self._seed = (x or 1, y or 1, z or 1)
def random(self):
"""Get the next random number in the range [0.0, 1.0)."""
# This part is thread-unsafe:
# BEGIN CRITICAL SECTION
x, y, z = self._seed
#
x = (171 * x) % 30269
y = (172 * y) % 30307
z = (170 * z) % 30323
#
self._seed = x, y, z
# END CRITICAL SECTION
#
return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
def uniform(self, a, b):
"""Get a random number in the range [a, b)."""
return a + (b-a) * self.random()
def randint(self, a, b):
"""Get a random integer in the range [a, b] including
both end points.
(Deprecated; use randrange below.)"""
return self.randrange(a, b+1)
def choice(self, seq):
"""Choose a random element from a non-empty sequence."""
return seq[int(self.random() * len(seq))]
def randrange(self, start, stop=None, step=1, int=int, default=None):
"""Choose a random item from range(start, stop[, step]).
This fixes the problem with randint() which includes the
endpoint; in Python this is usually not what you want.
Do not supply the 'int' and 'default' arguments."""
# This code is a bit messy to make it fast for the
# common case while still doing adequate error checking
istart = int(start)
if istart != start:
raise ValueError, "non-integer arg 1 for randrange()"
if stop is default:
if istart > 0:
return int(self.random() * istart)
raise ValueError, "empty range for randrange()"
istop = int(stop)
if istop != stop:
raise ValueError, "non-integer stop for randrange()"
if step == 1:
if istart < istop:
return istart + int(self.random() *
(istop - istart))
raise ValueError, "empty range for randrange()"
istep = int(step)
if istep != step:
raise ValueError, "non-integer step for randrange()"
if istep > 0:
n = (istop - istart + istep - 1) / istep
elif istep < 0:
n = (istop - istart + istep + 1) / istep
else:
raise ValueError, "zero step for randrange()"
if n <= 0:
raise ValueError, "empty range for randrange()"
return istart + istep*int(self.random() * n)
# Initialize from the current time
_inst = whrandom()
seed = _inst.seed
random = _inst.random
uniform = _inst.uniform
randint = _inst.randint
choice = _inst.choice
randrange = _inst.randrange
# module 'zmod'
# Compute properties of mathematical "fields" formed by taking
# Z/n (the whole numbers modulo some whole number n) and an
# irreducible polynomial (i.e., a polynomial with only complex zeros),
# e.g., Z/5 and X**2 + 2.
#
# The field is formed by taking all possible linear combinations of
# a set of d base vectors (where d is the degree of the polynomial).
#
# Note that this procedure doesn't yield a field for all combinations
# of n and p: it may well be that some numbers have more than one
# inverse and others have none. This is what we check.
#
# Remember that a field is a ring where each element has an inverse.
# A ring has commutative addition and multiplication, a zero and a one:
# 0*x = x*0 = 0, 0+x = x+0 = x, 1*x = x*1 = x. Also, the distributive
# property holds: a*(b+c) = a*b + b*c.
# (XXX I forget if this is an axiom or follows from the rules.)
import poly
# Example N and polynomial
N = 5
P = poly.plus(poly.one(0, 2), poly.one(2, 1)) # 2 + x**2
# Return x modulo y. Returns >= 0 even if x < 0.
def mod(x, y):
return divmod(x, y)[1]
# Normalize a polynomial modulo n and modulo p.
def norm(a, n, p):
a = poly.modulo(a, p)
a = a[:]
for i in range(len(a)): a[i] = mod(a[i], n)
a = poly.normalize(a)
return a
# Make a list of all n^d elements of the proposed field.
def make_all(mat):
all = []
for row in mat:
for a in row:
all.append(a)
return all
def make_elements(n, d):
if d == 0: return [poly.one(0, 0)]
sub = make_elements(n, d-1)
all = []
for a in sub:
for i in range(n):
all.append(poly.plus(a, poly.one(d-1, i)))
return all
def make_inv(all, n, p):
x = poly.one(1, 1)
inv = []
for a in all:
inv.append(norm(poly.times(a, x), n, p))
return inv
def checkfield(n, p):
all = make_elements(n, len(p)-1)
inv = make_inv(all, n, p)
all1 = all[:]
inv1 = inv[:]
all1.sort()
inv1.sort()
if all1 == inv1: print 'BINGO!'
else:
print 'Sorry:', n, p
print all
print inv
def rj(s, width):
if type(s) is not type(''): s = `s`
n = len(s)
if n >= width: return s
return ' '*(width - n) + s
def lj(s, width):
if type(s) is not type(''): s = `s`
n = len(s)
if n >= width: return s
return s + ' '*(width - n)
#! /usr/bin/env python
r"""Convert old ("regex") regular expressions to new syntax ("re").
When imported as a module, there are two functions, with their own
strings:
convert(s, syntax=None) -- convert a regex regular expression to re syntax
quote(s) -- return a quoted string literal
When used as a script, read a Python string literal (or any other
expression evaluating to a string) from stdin, and write the
translated expression to stdout as a string literal. Unless stdout is
a tty, no trailing \n is written to stdout. This is done so that it
can be used with Emacs C-U M-| (shell-command-on-region with argument
which filters the region through the shell command).
No attempt has been made at coding for performance.
Translation table...
\( ( (unless RE_NO_BK_PARENS set)
\) ) (unless RE_NO_BK_PARENS set)
\| | (unless RE_NO_BK_VBAR set)
\< \b (not quite the same, but alla...)
\> \b (not quite the same, but alla...)
\` \A
\' \Z
Not translated...
.
^
$
*
+ (unless RE_BK_PLUS_QM set, then to \+)
? (unless RE_BK_PLUS_QM set, then to \?)
\
\b
\B
\w
\W
\1 ... \9
Special cases...
Non-printable characters are always replaced by their 3-digit
escape code (except \t, \n, \r, which use mnemonic escapes)
Newline is turned into | when RE_NEWLINE_OR is set
XXX To be done...
[...] (different treatment of backslashed items?)
[^...] (different treatment of backslashed items?)
^ $ * + ? (in some error contexts these are probably treated differently)
\vDD \DD (in the regex docs but only works when RE_ANSI_HEX set)
"""
import warnings
warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__,
append=1)
import regex
from regex_syntax import * # RE_*
__all__ = ["convert","quote"]
# Default translation table
mastertable = {
r'\<': r'\b',
r'\>': r'\b',
r'\`': r'\A',
r'\'': r'\Z',
r'\(': '(',
r'\)': ')',
r'\|': '|',
'(': r'\(',
')': r'\)',
'|': r'\|',
'\t': r'\t',
'\n': r'\n',
'\r': r'\r',
}
def convert(s, syntax=None):
"""Convert a regex regular expression to re syntax.
The first argument is the regular expression, as a string object,
just like it would be passed to regex.compile(). (I.e., pass the
actual string object -- string quotes must already have been
removed and the standard escape processing has already been done,
e.g. by eval().)
The optional second argument is the regex syntax variant to be
used. This is an integer mask as passed to regex.set_syntax();
the flag bits are defined in regex_syntax. When not specified, or
when None is given, the current regex syntax mask (as retrieved by
regex.get_syntax()) is used -- which is 0 by default.
The return value is a regular expression, as a string object that
could be passed to re.compile(). (I.e., no string quotes have
been added -- use quote() below, or repr().)
The conversion is not always guaranteed to be correct. More
syntactical analysis should be performed to detect borderline
cases and decide what to do with them. For example, 'x*?' is not
translated correctly.
"""
table = mastertable.copy()
if syntax is None:
syntax = regex.get_syntax()
if syntax & RE_NO_BK_PARENS:
del table[r'\('], table[r'\)']
del table['('], table[')']
if syntax & RE_NO_BK_VBAR:
del table[r'\|']
del table['|']
if syntax & RE_BK_PLUS_QM:
table['+'] = r'\+'
table['?'] = r'\?'
table[r'\+'] = '+'
table[r'\?'] = '?'
if syntax & RE_NEWLINE_OR:
table['\n'] = '|'
res = ""
i = 0
end = len(s)
while i < end:
c = s[i]
i = i+1
if c == '\\':
c = s[i]
i = i+1
key = '\\' + c
key = table.get(key, key)
res = res + key
else:
c = table.get(c, c)
res = res + c
return res
def quote(s, quote=None):
"""Convert a string object to a quoted string literal.
This is similar to repr() but will return a "raw" string (r'...'
or r"...") when the string contains backslashes, instead of
doubling all backslashes. The resulting string does *not* always
evaluate to the same string as the original; however it will do
just the right thing when passed into re.compile().
The optional second argument forces the string quote; it must be
a single character which is a valid Python string quote.
"""
if quote is None:
q = "'"
altq = "'"
if q in s and altq not in s:
q = altq
else:
assert quote in ('"', "'", '"""', "'''")
q = quote
res = q
for c in s:
if c == q: c = '\\' + c
elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
res = res + c
res = res + q
if '\\' in res:
res = 'r' + res
return res
def main():
"""Main program -- called when run as a script."""
import sys
s = eval(sys.stdin.read())
sys.stdout.write(quote(convert(s)))
if sys.stdout.isatty():
sys.stdout.write("\n")
if __name__ == '__main__':
main()
"""Constants for selecting regexp syntaxes for the obsolete regex module.
This module is only for backward compatibility. "regex" has now
been replaced by the new regular expression module, "re".
These bits are passed to regex.set_syntax() to choose among
alternative regexp syntaxes.
"""
# 1 means plain parentheses serve as grouping, and backslash
# parentheses are needed for literal searching.
# 0 means backslash-parentheses are grouping, and plain parentheses
# are for literal searching.
RE_NO_BK_PARENS = 1
# 1 means plain | serves as the "or"-operator, and \| is a literal.
# 0 means \| serves as the "or"-operator, and | is a literal.
RE_NO_BK_VBAR = 2
# 0 means plain + or ? serves as an operator, and \+, \? are literals.
# 1 means \+, \? are operators and plain +, ? are literals.
RE_BK_PLUS_QM = 4
# 1 means | binds tighter than ^ or $.
# 0 means the contrary.
RE_TIGHT_VBAR = 8
# 1 means treat \n as an _OR operator
# 0 means treat it as a normal character
RE_NEWLINE_OR = 16
# 0 means that a special characters (such as *, ^, and $) always have
# their special meaning regardless of the surrounding context.
# 1 means that special characters may act as normal characters in some
# contexts. Specifically, this applies to:
# ^ - only special at the beginning, or after ( or |
# $ - only special at the end, or before ) or |
# *, +, ? - only special when not after the beginning, (, or |
RE_CONTEXT_INDEP_OPS = 32
# ANSI sequences (\n etc) and \xhh
RE_ANSI_HEX = 64
# No GNU extensions
RE_NO_GNU_EXTENSIONS = 128
# Now define combinations of bits for the standard possibilities.
RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR)
RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR)
RE_SYNTAX_EMACS = 0
# (Python's obsolete "regexp" module used a syntax similar to awk.)
"""Regexp-based split and replace using the obsolete regex module.
This module is only for backward compatibility. These operations
are now provided by the new regular expression module, "re".
sub(pat, repl, str): replace first occurrence of pattern in string
gsub(pat, repl, str): replace all occurrences of pattern in string
split(str, pat, maxsplit): split string using pattern as delimiter
splitx(str, pat, maxsplit): split string using pattern as delimiter plus
return delimiters
"""
import warnings
warnings.warn("the regsub module is deprecated; please use re.sub()",
DeprecationWarning)
# Ignore further deprecation warnings about this module
warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
import regex
__all__ = ["sub","gsub","split","splitx","capwords"]
# Replace first occurrence of pattern pat in string str by replacement
# repl. If the pattern isn't found, the string is returned unchanged.
# The replacement may contain references \digit to subpatterns and
# escaped backslashes. The pattern may be a string or an already
# compiled pattern.
def sub(pat, repl, str):
prog = compile(pat)
if prog.search(str) >= 0:
regs = prog.regs
a, b = regs[0]
str = str[:a] + expand(repl, regs, str) + str[b:]
return str
# Replace all (non-overlapping) occurrences of pattern pat in string
# str by replacement repl. The same rules as for sub() apply.
# Empty matches for the pattern are replaced only when not adjacent to
# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
def gsub(pat, repl, str):
prog = compile(pat)
new = ''
start = 0
first = 1
while prog.search(str, start) >= 0:
regs = prog.regs
a, b = regs[0]
if a == b == start and not first:
if start >= len(str) or prog.search(str, start+1) < 0:
break
regs = prog.regs
a, b = regs[0]
new = new + str[start:a] + expand(repl, regs, str)
start = b
first = 0
new = new + str[start:]
return new
# Split string str in fields separated by delimiters matching pattern
# pat. Only non-empty matches for the pattern are considered, so e.g.
# split('abc', '') returns ['abc'].
# The optional 3rd argument sets the number of splits that are performed.
def split(str, pat, maxsplit = 0):
return intsplit(str, pat, maxsplit, 0)
# Split string str in fields separated by delimiters matching pattern
# pat. Only non-empty matches for the pattern are considered, so e.g.
# split('abc', '') returns ['abc']. The delimiters are also included
# in the list.
# The optional 3rd argument sets the number of splits that are performed.
def splitx(str, pat, maxsplit = 0):
return intsplit(str, pat, maxsplit, 1)
# Internal function used to implement split() and splitx().
def intsplit(str, pat, maxsplit, retain):
prog = compile(pat)
res = []
start = next = 0
splitcount = 0
while prog.search(str, next) >= 0:
regs = prog.regs
a, b = regs[0]
if a == b:
next = next + 1
if next >= len(str):
break
else:
res.append(str[start:a])
if retain:
res.append(str[a:b])
start = next = b
splitcount = splitcount + 1
if (maxsplit and (splitcount >= maxsplit)):
break
res.append(str[start:])
return res
# Capitalize words split using a pattern
def capwords(str, pat='[^a-zA-Z0-9_]+'):
words = splitx(str, pat)
for i in range(0, len(words), 2):
words[i] = words[i].capitalize()
return "".join(words)
# Internal subroutines:
# compile(pat): compile a pattern, caching already compiled patterns
# expand(repl, regs, str): expand \digit escapes in replacement string
# Manage a cache of compiled regular expressions.
#
# If the pattern is a string a compiled version of it is returned. If
# the pattern has been used before we return an already compiled
# version from the cache; otherwise we compile it now and save the
# compiled version in the cache, along with the syntax it was compiled
# with. Instead of a string, a compiled regular expression can also
# be passed.
cache = {}
def compile(pat):
if type(pat) != type(''):
return pat # Assume it is a compiled regex
key = (pat, regex.get_syntax())
if key in cache:
prog = cache[key] # Get it from the cache
else:
prog = cache[key] = regex.compile(pat)
return prog
def clear_cache():
global cache
cache = {}
# Expand \digit in the replacement.
# Each occurrence of \digit is replaced by the substring of str
# indicated by regs[digit]. To include a literal \ in the
# replacement, double it; other \ escapes are left unchanged (i.e.
# the \ and the following character are both copied).
def expand(repl, regs, str):
if '\\' not in repl:
return repl
new = ''
i = 0
ord0 = ord('0')
while i < len(repl):
c = repl[i]; i = i+1
if c != '\\' or i >= len(repl):
new = new + c
else:
c = repl[i]; i = i+1
if '0' <= c <= '9':
a, b = regs[ord(c)-ord0]
new = new + str[a:b]
elif c == '\\':
new = new + c
else:
new = new + '\\' + c
return new
# Test program, reads sequences "pat repl str" from stdin.
# Optional argument specifies pattern used to split lines.
def test():
import sys
if sys.argv[1:]:
delpat = sys.argv[1]
else:
delpat = '[ \t\n]+'
while 1:
if sys.stdin.isatty(): sys.stderr.write('--> ')
line = sys.stdin.readline()
if not line: break
if line[-1] == '\n': line = line[:-1]
fields = split(line, delpat)
if len(fields) != 3:
print 'Sorry, not three fields'
print 'split:', repr(fields)
continue
[pat, repl, str] = split(line, delpat)
print 'sub :', repr(sub(pat, repl, str))
print 'gsub:', repr(gsub(pat, repl, str))
......@@ -136,7 +136,7 @@ class RExec(ihooks._Verbose):
ok_builtin_modules = ('audioop', 'array', 'binascii',
'cmath', 'errno', 'imageop',
'marshal', 'math', 'md5', 'operator',
'parser', 'regex', 'select',
'parser', 'select',
'sha', '_sre', 'strop', 'struct', 'time',
'_weakref')
......
......@@ -128,8 +128,6 @@ class AllTest(unittest.TestCase):
self.check_all("quopri")
self.check_all("random")
self.check_all("re")
self.check_all("reconvert")
self.check_all("regsub")
self.check_all("repr")
self.check_all("rexec")
self.check_all("rfc822")
......
from test.test_support import verbose, sortdict
import warnings
warnings.filterwarnings("ignore", "the regex module is deprecated",
DeprecationWarning, __name__)
import regex
from regex_syntax import *
re = 'a+b+c+'
print 'no match:', regex.match(re, 'hello aaaabcccc world')
print 'successful search:', regex.search(re, 'hello aaaabcccc world')
try:
cre = regex.compile('\(' + re)
except regex.error:
print 'caught expected exception'
else:
print 'expected regex.error not raised'
print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')
prev = regex.set_syntax(RE_SYNTAX_AWK)
print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb')
regex.set_syntax(prev)
print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')
re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)'
print 'matching with group names and compile()'
cre = regex.compile(re)
print cre.match('801 999')
try:
print cre.group('one')
except regex.error:
print 'caught expected exception'
else:
print 'expected regex.error not raised'
print 'matching with group names and symcomp()'
cre = regex.symcomp(re)
print cre.match('801 999')
print cre.group(0)
print cre.group('one')
print cre.group(1, 2)
print cre.group('one', 'two')
print 'realpat:', cre.realpat
print 'groupindex:', sortdict(cre.groupindex)
re = 'world'
cre = regex.compile(re)
print 'not case folded search:', cre.search('HELLO WORLD')
cre = regex.compile(re, regex.casefold)
print 'case folded search:', cre.search('HELLO WORLD')
print '__members__:', cre.__members__
print 'regs:', cre.regs
print 'last:', cre.last
print 'translate:', len(cre.translate)
print 'givenpat:', cre.givenpat
print 'match with pos:', cre.match('hello world', 7)
print 'search with pos:', cre.search('hello world there world', 7)
print 'bogus group:', cre.group(0, 1, 3)
try:
print 'no name:', cre.group('one')
except regex.error:
print 'caught expected exception'
else:
print 'expected regex.error not raised'
from regex_tests import *
if verbose: print 'Running regex_tests test suite'
for t in tests:
pattern=s=outcome=repl=expected=None
if len(t)==5:
pattern, s, outcome, repl, expected = t
elif len(t)==3:
pattern, s, outcome = t
else:
raise ValueError, ('Test tuples should have 3 or 5 fields',t)
try:
obj=regex.compile(pattern)
except regex.error:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
# Regex syntax errors aren't yet reported, so for
# the official test suite they'll be quietly ignored.
pass
#print '=== Syntax error:', t
else:
try:
result=obj.search(s)
except regex.error, msg:
print '=== Unexpected exception', t, repr(msg)
if outcome==SYNTAX_ERROR:
# This should have been a syntax error; forget it.
pass
elif outcome==FAIL:
if result==-1: pass # No match, as expected
else: print '=== Succeeded incorrectly', t
elif outcome==SUCCEED:
if result!=-1:
# Matched, as expected, so now we compute the
# result string and compare it to our expected result.
start, end = obj.regs[0]
found=s[start:end]
groups=obj.group(1,2,3,4,5,6,7,8,9,10)
vardict=vars()
for i in range(len(groups)):
vardict['g'+str(i+1)]=str(groups[i])
repl=eval(repl)
if repl!=expected:
print '=== grouping error', t, repr(repl)+' should be '+repr(expected)
else:
print '=== Failed incorrectly', t
......@@ -68,7 +68,6 @@ import posixfile
import profile
import pstats
import py_compile
#import reconvert
import repr
try:
import rlcompleter # not available on Windows
......
......@@ -176,8 +176,6 @@ class PyBuildExt(build_ext):
#
# Some modules that are normally always on:
exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) )
exts.append( Extension('_weakref', ['_weakref.c']) )
exts.append( Extension('_symtable', ['symtablemodule.c']) )
......
......@@ -291,7 +291,14 @@ Core and builtins
Extension Modules
-----------------
- Swapped re and sre, so help(re) provides full help. importing sre
- Everything under lib-old was removed. This includes the following modules:
Para, addpack, cmp, cmpcache, codehack, dircmp, dump, find, fmt, grep,
lockfile, newdir, ni, packmail, poly, rand, statcache, tb, tzparse,
util, whatsound, whrandom, zmod
- The following modules were removed: regsub, reconvert, regex, regex_syntax.
- re and sre were swapped, so help(re) provides full help. importing sre
is deprecated. The undocumented re.engine variable no longer exists.
- Bug #1448490: Fixed a bug that ISO-2022 codecs could not handle
......
......@@ -1956,8 +1956,6 @@ quopri Conversions to/from quoted-printable transport encoding.
rand Don't use unless you want compatibility with C's rand().
random Random variable generators
re Regular Expressions.
reconvert Convert old ("regex") regular expressions to new syntax
("re").
repr Redo repr() but with limits on most sizes.
rexec Restricted execution facilities ("safe" exec, eval, etc).
rfc822 RFC-822 message manipulation class.
......@@ -2035,7 +2033,6 @@ zipfile Read & write PK zipped files.
array Obj efficiently representing arrays of basic values
math Math functions of C standard
time Time-related functions (also the newer datetime module)
regex Regular expression matching operations
marshal Read and write some python values in binary format
struct Convert between python values and C structs
......
This diff is collapsed.
This diff is collapsed.
/*
* -*- mode: c-mode; c-file-style: python -*-
*/
#ifndef Py_REGEXPR_H
#define Py_REGEXPR_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* regexpr.h
*
* Author: Tatu Ylonen <ylo@ngs.fi>
*
* Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
*
* Permission to use, copy, modify, distribute, and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies. This
* software is provided "as is" without express or implied warranty.
*
* Created: Thu Sep 26 17:15:36 1991 ylo
* Last modified: Mon Nov 4 15:49:46 1991 ylo
*/
/* $Id$ */
#ifndef REGEXPR_H
#define REGEXPR_H
#define RE_NREGS 100 /* number of registers available */
typedef struct re_pattern_buffer
{
unsigned char *buffer; /* compiled pattern */
int allocated; /* allocated size of compiled pattern */
int used; /* actual length of compiled pattern */
unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */
unsigned char *translate; /* translation to apply during compilation/matching */
unsigned char fastmap_accurate; /* true if fastmap is valid */
unsigned char can_be_null; /* true if can match empty string */
unsigned char uses_registers; /* registers are used and need to be initialized */
int num_registers; /* number of registers used */
unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */
} *regexp_t;
typedef struct re_registers
{
int start[RE_NREGS]; /* start offset of region */
int end[RE_NREGS]; /* end offset of region */
} *regexp_registers_t;
/* bit definitions for syntax */
#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
#define RE_NEWLINE_OR 16 /* treat newline as or */
#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
/* definitions for some common regexp styles */
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
#define Sword 1
#define Swhitespace 2
#define Sdigit 4
#define Soctaldigit 8
#define Shexdigit 16
/* Rename all exported symbols to avoid conflicts with similarly named
symbols in some systems' standard C libraries... */
#define re_syntax _Py_re_syntax
#define re_syntax_table _Py_re_syntax_table
#define re_compile_initialize _Py_re_compile_initialize
#define re_set_syntax _Py_re_set_syntax
#define re_compile_pattern _Py_re_compile_pattern
#define re_match _Py_re_match
#define re_search _Py_re_search
#define re_compile_fastmap _Py_re_compile_fastmap
#define re_comp _Py_re_comp
#define re_exec _Py_re_exec
#ifdef HAVE_PROTOTYPES
extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
extern unsigned char re_syntax_table[256];
void re_compile_initialize(void);
int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled);
/* This compiles the regexp (given in regex and length in regex_size).
* This returns NULL if the regexp compiled successfully, and an error
* message if an error was encountered. The buffer field must be
* initialized to a memory area allocated by malloc (or to NULL) before
* use, and the allocated field must be set to its length (or 0 if
* buffer is NULL). Also, the translate field must be set to point to a
* valid translation table, or NULL if it is not used. */
int re_match(regexp_t compiled, unsigned char *string, int size, int pos,
regexp_registers_t old_regs);
/* This tries to match the regexp against the string. This returns the
* length of the matched portion, or -1 if the pattern could not be
* matched and -2 if an error (such as failure stack overflow) is
* encountered. */
int re_search(regexp_t compiled, unsigned char *string, int size, int startpos,
int range, regexp_registers_t regs);
/* This searches for a substring matching the regexp. This returns the
* first index at which a match is found. range specifies at how many
* positions to try matching; positive values indicate searching
* forwards, and negative values indicate searching backwards. mstop
* specifies the offset beyond which a match must not go. This returns
* -1 if no match is found, and -2 if an error (such as failure stack
* overflow) is encountered. */
void re_compile_fastmap(regexp_t compiled);
/* This computes the fastmap for the regexp. For this to have any effect,
* the calling program must have initialized the fastmap field to point
* to an array of 256 characters. */
#else /* HAVE_PROTOTYPES */
extern int re_syntax;
extern unsigned char re_syntax_table[256];
void re_compile_initialize();
int re_set_syntax();
char *re_compile_pattern();
int re_match();
int re_search();
void re_compile_fastmap();
#endif /* HAVE_PROTOTYPES */
#endif /* REGEXPR_H */
#ifdef __cplusplus
}
#endif
#endif /* !Py_REGEXPR_H */
......@@ -535,14 +535,6 @@ SOURCE=..\..\Objects\rangeobject.c
# End Source File
# Begin Source File
SOURCE=..\..\Modules\regexmodule.c
# End Source File
# Begin Source File
SOURCE=..\..\Modules\regexpr.c
# End Source File
# Begin Source File
SOURCE=..\..\Modules\rgbimgmodule.c
# End Source File
# Begin Source File
......
......@@ -304,8 +304,6 @@ SRC.MODULES= $(addprefix $(TOP), \
Modules/md5module.c \
Modules/operator.c \
Modules/_randommodule.c \
Modules/regexmodule.c \
Modules/regexpr.c \
Modules/rgbimgmodule.c \
Modules/shamodule.c \
Modules/_sre.c \
......
......@@ -948,34 +948,6 @@ readline.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h $(PY_INCLUDE)\class
$(PY_INCLUDE)\sliceobject.h $(PY_INCLUDE)\stringobject.h \
$(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h $(PY_INCLUDE)\tupleobject.h
regexmodule.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \
$(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
$(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \
$(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \
$(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \
$(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \
$(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \
$(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \
$(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \
$(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \
$(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \
$(PY_INCLUDE)\tupleobject.h
regexpr.obj: $(PY_INCLUDE)\abstract.h $(PY_INCLUDE)\ceval.h \
$(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
$(PY_INCLUDE)\floatobject.h $(PY_INCLUDE)\funcobject.h $(PY_INCLUDE)\import.h \
$(PY_INCLUDE)\intobject.h $(PY_INCLUDE)\intrcheck.h $(PY_INCLUDE)\listobject.h \
$(PY_INCLUDE)\longobject.h $(PY_INCLUDE)\methodobject.h \
$(PY_INCLUDE)\modsupport.h $(PY_INCLUDE)\moduleobject.h $(PY_INCLUDE)\mymalloc.h \
$(PY_INCLUDE)\myproto.h $(PY_INCLUDE)\object.h $(PY_INCLUDE)\objimpl.h \
$(PY_INCLUDE)\pydebug.h $(PY_INCLUDE)\pyerrors.h $(PY_INCLUDE)\pyfpe.h \
$(PY_INCLUDE)\pystate.h $(PY_INCLUDE)\python.h $(PY_INCLUDE)\pythonrun.h \
$(PY_INCLUDE)\rangeobject.h $(PY_MODULES)\regexpr.h $(PY_INCLUDE)\sliceobject.h \
$(PY_INCLUDE)\stringobject.h $(PY_INCLUDE)\sysmodule.h $(PY_INCLUDE)\traceback.h \
$(PY_INCLUDE)\tupleobject.h
resource.obj: $(PY_INCLUDE)\abstract.h $(OS2TCPIP)\Include\sys\time.h $(PY_INCLUDE)\ceval.h \
$(PY_INCLUDE)\classobject.h $(PY_INCLUDE)\cobject.h $(PY_INCLUDE)\complexobject.h \
pyconfig.h $(PY_INCLUDE)\dictobject.h $(PY_INCLUDE)\fileobject.h \
......
......@@ -699,30 +699,6 @@ readline.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \
pythonrun.h rangeobject.h sliceobject.h stringobject.h sysmodule.h \
traceback.h tupleobject.h
regexmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \
pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \
import.h intobject.h intrcheck.h listobject.h longobject.h \
methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \
object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \
pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \
sysmodule.h traceback.h tupleobject.h
regexpr.obj: abstract.h ceval.h classobject.h cobject.h \
complexobject.h pyconfig.h dictobject.h fileobject.h floatobject.h \
funcobject.h import.h intobject.h intrcheck.h listobject.h \
longobject.h methodobject.h modsupport.h moduleobject.h mymalloc.h \
myproto.h object.h objimpl.h pydebug.h pyerrors.h pyfpe.h \
pystate.h python.h pythonrun.h rangeobject.h regexpr.h \
sliceobject.h stringobject.h sysmodule.h traceback.h tupleobject.h
reopmodule.obj: abstract.h ceval.h classobject.h cobject.h complexobject.h \
pyconfig.h dictobject.h fileobject.h floatobject.h funcobject.h \
import.h intobject.h intrcheck.h listobject.h longobject.h \
methodobject.h modsupport.h moduleobject.h mymalloc.h myproto.h \
object.h objimpl.h pydebug.h pyerrors.h pyfpe.h pystate.h python.h \
pythonrun.h rangeobject.h regexpr.h sliceobject.h stringobject.h \
sysmodule.h traceback.h tupleobject.h
resource.obj: abstract.h c:\mptn\include\sys\time.h ceval.h classobject.h \
cobject.h complexobject.h pyconfig.h dictobject.h fileobject.h \
floatobject.h funcobject.h import.h intobject.h intrcheck.h \
......
......@@ -5,23 +5,23 @@ import sys
# change this module too.
try:
import string
import os
except:
print """Could not import the standard "string" module.
print """Could not import the standard "os" module.
Please check your PYTHONPATH environment variable."""
sys.exit(1)
try:
import regex_syntax
import symbol
except:
print """Could not import the standard "regex_syntax" module. If this is
print """Could not import the standard "symbol" module. If this is
a PC, you should add the dos_8x3 directory to your PYTHONPATH."""
sys.exit(1)
import os
for dir in sys.path:
file = os.path.join(dir, "string.py")
file = os.path.join(dir, "os.py")
if os.path.isfile(file):
test = os.path.join(dir, "test")
if os.path.isdir(test):
......
......@@ -706,12 +706,6 @@
<File
RelativePath="..\Objects\rangeobject.c">
</File>
<File
RelativePath="..\Modules\regexmodule.c">
</File>
<File
RelativePath="..\Modules\regexpr.c">
</File>
<File
RelativePath="..\Modules\rgbimgmodule.c">
</File>
......
......@@ -74,7 +74,6 @@ MODULES_DYNAMIC =\
@.^.Lib.md5/pyd\
@.^.Lib.operator/pyd\
@.^.Lib.parser/pyd\
@.^.Lib.regex/pyd\
@.^.Lib.rgbimg/pyd\
@.^.Lib.sha/pyd\
@.^.Lib.signal/pyd\
......@@ -284,10 +283,6 @@ $(LIB_PYTHON): $(OBJECTS)
@.^.Lib.parser/pyd: @.^.Modules.o.parsermodule s.linktab
$(MAKEDLK) -d @.^.Lib.parser/pyd -s s.linktab -o @.^.Modules.o.parsermodule -e initparser
@.^.Lib.regex/pyd: @.^.Modules.o.regexmodule @.^.Modules.o.regexpr s.linktab
$(LINK) -aof -o @.^.Modules.o.regexlink @.^.Modules.o.regexmodule @.^.Modules.o.regexpr
$(MAKEDLK) -d @.^.Lib.regex/pyd -s s.linktab -o @.^.Modules.o.regexlink -e initregex
@.^.Lib.rgbimg/pyd: @.^.Modules.o.rgbimgmodule s.linktab
$(MAKEDLK) -d @.^.Lib.rgbimg/pyd -s s.linktab -o @.^.Modules.o.rgbimgmodule -e initrgbimg
......
......@@ -30,7 +30,7 @@
# into a program for a different change to Python programs...
import sys
import regex
import re
import os
from stat import *
......@@ -53,7 +53,7 @@ def main():
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
......@@ -148,12 +148,12 @@ def fix(filename):
# This expression doesn't catch *all* class definition headers,
# but it's pretty darn close.
classexpr = '^\([ \t]*class +[a-zA-Z0-9_]+\) *( *) *\(\(=.*\)?\):'
classprog = regex.compile(classexpr)
classexpr = '^([ \t]*class +[a-zA-Z0-9_]+) *( *) *((=.*)?):'
classprog = re.compile(classexpr)
# Expressions for finding base class expressions.
baseexpr = '^ *\(.*\) *( *) *$'
baseprog = regex.compile(baseexpr)
baseexpr = '^ *(.*) *( *) *$'
baseprog = re.compile(baseexpr)
def fixline(line):
if classprog.match(line) < 0: # No 'class' keyword -- no change
......
......@@ -35,7 +35,7 @@
# files.
import sys
import regex
import re
import os
from stat import *
import getopt
......@@ -90,7 +90,7 @@ def main():
# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
return regex.match(Wanted, name) >= 0
return re.match(Wanted, name) >= 0
def recursedown(dirname):
dbg('recursedown(%r)\n' % (dirname,))
......@@ -212,12 +212,12 @@ Number = Floatnumber + '\|' + Intnumber
# Anything else is an operator -- don't list this explicitly because of '/*'
OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '\(' + '\|'.join(OutsideComment) + '\)'
OutsideCommentProgram = regex.compile(OutsideCommentPattern)
OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')'
OutsideCommentProgram = re.compile(OutsideCommentPattern)
InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '\(' + '\|'.join(InsideComment) + '\)'
InsideCommentProgram = regex.compile(InsideCommentPattern)
InsideCommentPattern = '(' + '|'.join(InsideComment) + ')'
InsideCommentProgram = re.compile(InsideCommentPattern)
def initfixline():
global Program
......
......@@ -27,7 +27,6 @@
# preprocessor commands.
import sys
import regex
import getopt
defs = []
......
......@@ -27,7 +27,7 @@
# into a program for a different change to Python programs...
import sys
import regex
import re
import os
from stat import *
......@@ -50,7 +50,7 @@ def main():
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
......@@ -101,7 +101,7 @@ def fix(filename):
if lineno == 1 and g is None and line[:2] == '#!':
# Check for non-Python scripts
words = line[2:].split()
if words and regex.search('[pP]ython', words[0]) < 0:
if words and re.search('[pP]ython', words[0]) < 0:
msg = filename + ': ' + words[0]
msg = msg + ' script; not fixed\n'
err(msg)
......@@ -158,8 +158,8 @@ def fix(filename):
return 0
fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:'
fixprog = regex.compile(fixpat)
fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *(( *(.*) *)) *) *:'
fixprog = re.compile(fixpat)
def fixline(line):
if fixprog.match(line) >= 0:
......
......@@ -22,7 +22,7 @@
import sys
import os
import getopt
import regex
import re
# Types of symbols.
#
......@@ -32,7 +32,7 @@ ignore = 'Nntrgdsbavuc'
# Regular expression to parse "nm -o" output.
#
matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$')
matcher = re.compile('(.*):\t?........ (.) (.*)$')
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
......
......@@ -20,7 +20,7 @@
# into a program for a different change to Python programs...
import sys
import regex
import re
import os
from stat import *
import getopt
......@@ -59,7 +59,7 @@ def main():
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
......
......@@ -21,7 +21,7 @@
import sys
import regex
import re
import os
......@@ -57,8 +57,8 @@ def main():
# Compiled regular expressions to search for import statements
#
m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+')
m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)')
m_import = re.compile('^[ \t]*from[ \t]+([^ \t]+)[ \t]+')
m_from = re.compile('^[ \t]*import[ \t]+([^#]+)')
# Collect data from one file
......
......@@ -326,8 +326,6 @@ class PyBuildExt(build_ext):
#
# Some modules that are normally always on:
exts.append( Extension('regex', ['regexmodule.c', 'regexpr.c']) )
exts.append( Extension('_weakref', ['_weakref.c']) )
# array objects
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment