Commit 3e06ab1d authored by Guido van Rossum's avatar Guido van Rossum

The usual :)

parent 45cd9de2
...@@ -87,6 +87,8 @@ DEFAULT_ERROR_MESSAGE = """\ ...@@ -87,6 +87,8 @@ DEFAULT_ERROR_MESSAGE = """\
class HTTPServer(SocketServer.TCPServer): class HTTPServer(SocketServer.TCPServer):
allow_reuse_address = 1 # Seems to make sense in testing environment
def server_bind(self): def server_bind(self):
"""Override server_bind to store the server name.""" """Override server_bind to store the server name."""
SocketServer.TCPServer.server_bind(self) SocketServer.TCPServer.server_bind(self)
......
...@@ -197,7 +197,7 @@ class ConfigParser: ...@@ -197,7 +197,7 @@ class ConfigParser:
configuration files in the list will be read. A single configuration files in the list will be read. A single
filename may also be given. filename may also be given.
""" """
if type(filenames) is type(''): if type(filenames) in [type(''), type(u'')]:
filenames = [filenames] filenames = [filenames]
for filename in filenames: for filename in filenames:
try: try:
......
"""Convert a NT pathname to a file URL and vice versa.""" """Convert a NT pathname to a file URL and vice versa."""
def url2pathname(url): def url2pathname(url):
""" Convert a URL to a DOS path... r"""Convert a URL to a DOS path.
///C|/foo/bar/spam.foo ///C|/foo/bar/spam.foo
becomes becomes
...@@ -32,7 +33,8 @@ def url2pathname(url): ...@@ -32,7 +33,8 @@ def url2pathname(url):
return path return path
def pathname2url(p): def pathname2url(p):
""" Convert a DOS path name to a file url... r"""Convert a DOS path name to a file url.
C:\foo\bar\spam.foo C:\foo\bar\spam.foo
becomes becomes
......
#! /usr/bin/env python1.5 #! /usr/bin/env python1.5
"""Convert old ("regex") regular expressions to new syntax ("re"). r"""Convert old ("regex") regular expressions to new syntax ("re").
When imported as a module, there are two functions, with their own When imported as a module, there are two functions, with their own
strings: strings:
......
...@@ -76,7 +76,7 @@ class Completer: ...@@ -76,7 +76,7 @@ class Completer:
__builtin__.__dict__.keys(), __builtin__.__dict__.keys(),
__main__.__dict__.keys()]: __main__.__dict__.keys()]:
for word in list: for word in list:
if word[:n] == text: if word[:n] == text and word != "__builtins__":
matches.append(word) matches.append(word)
return matches return matches
...@@ -106,7 +106,7 @@ class Completer: ...@@ -106,7 +106,7 @@ class Completer:
matches = [] matches = []
n = len(attr) n = len(attr)
for word in words: for word in words:
if word[:n] == attr: if word[:n] == attr and word != "__builtins__":
matches.append("%s.%s" % (expr, word)) matches.append("%s.%s" % (expr, word))
return matches return matches
......
...@@ -6,7 +6,7 @@ and HEAD requests in a fairly straightforward manner. ...@@ -6,7 +6,7 @@ and HEAD requests in a fairly straightforward manner.
""" """
__version__ = "0.3" __version__ = "0.4"
import os import os
...@@ -14,6 +14,8 @@ import string ...@@ -14,6 +14,8 @@ import string
import posixpath import posixpath
import BaseHTTPServer import BaseHTTPServer
import urllib import urllib
import cgi
from StringIO import StringIO
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
...@@ -57,16 +59,62 @@ class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): ...@@ -57,16 +59,62 @@ class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
""" """
path = self.translate_path(self.path) path = self.translate_path(self.path)
f = None
if os.path.isdir(path): if os.path.isdir(path):
self.send_error(403, "Directory listing not supported") for index in "index.html", "index.htm":
return None index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
if ctype.startswith('text/'):
mode = 'r'
else:
mode = 'rb'
try: try:
f = open(path, 'rb') f = open(path, mode)
except IOError: except IOError:
self.send_error(404, "File not found") self.send_error(404, "File not found")
return None return None
self.send_response(200) self.send_response(200)
self.send_header("Content-type", self.guess_type(path)) self.send_header("Content-type", ctype)
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory");
return None
list.sort(lambda a, b: cmp(a.lower(), b.lower()))
f = StringIO()
f.write("<h2>Directory listing for %s</h2>\n" % self.path)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name = cgi.escape(name)
# Append / for directories or @ for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + os.sep
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
f.write('<li><a href="%s">%s</a>\n' % (linkname, displayname))
f.write("</ul>\n<hr>\n")
f.seek(0)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
return f return f
......
...@@ -141,6 +141,7 @@ class TCPServer: ...@@ -141,6 +141,7 @@ class TCPServer:
- address_family - address_family
- socket_type - socket_type
- request_queue_size (only for stream sockets) - request_queue_size (only for stream sockets)
- reuse_address
Instance variables: Instance variables:
...@@ -156,6 +157,8 @@ class TCPServer: ...@@ -156,6 +157,8 @@ class TCPServer:
request_queue_size = 5 request_queue_size = 5
allow_reuse_address = 0
def __init__(self, server_address, RequestHandlerClass): def __init__(self, server_address, RequestHandlerClass):
"""Constructor. May be extended, do not override.""" """Constructor. May be extended, do not override."""
self.server_address = server_address self.server_address = server_address
...@@ -171,6 +174,8 @@ class TCPServer: ...@@ -171,6 +174,8 @@ class TCPServer:
May be overridden. May be overridden.
""" """
if self.allow_reuse_address:
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.socket.bind(self.server_address) self.socket.bind(self.server_address)
def server_activate(self): def server_activate(self):
......
# #
# Secret Labs' Regular Expression Engine # Secret Labs' Regular Expression Engine
# $Id$
# #
# convert template to internal format # convert template to internal format
# #
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. # Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
# #
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with # Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and # CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work. # other compatibility work.
# #
# FIXME: <fl> formalize (objectify?) and document the compiler code import array
# format, so that other frontends can use this compiler
import array, string, sys
import _sre import _sre
from sre_constants import * from sre_constants import *
...@@ -30,158 +22,153 @@ for WORDSIZE in "BHil": ...@@ -30,158 +22,153 @@ for WORDSIZE in "BHil":
else: else:
raise RuntimeError, "cannot find a useable array type" raise RuntimeError, "cannot find a useable array type"
# FIXME: <fl> should move some optimizations from the parser to here!
class Code:
def __init__(self):
self.data = []
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index]
def __setitem__(self, index, code):
self.data[index] = code
def append(self, code):
self.data.append(code)
def todata(self):
# print self.data
return array.array(WORDSIZE, self.data).tostring()
def _lower(literal):
# return _sre._lower(literal) # FIXME
return string.lower(literal)
def _compile(code, pattern, flags): def _compile(code, pattern, flags):
append = code.append emit = code.append
for op, av in pattern: for op, av in pattern:
if op is ANY: if op is ANY:
if "s" in flags: if flags & SRE_FLAG_DOTALL:
append(CODES[op]) # any character at all! emit(OPCODES[op])
else: else:
append(CODES[NOT_LITERAL]) emit(OPCODES[CATEGORY])
append(10) emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (SUCCESS, FAILURE): elif op in (SUCCESS, FAILURE):
append(CODES[op]) emit(OPCODES[op])
elif op is AT: elif op is AT:
append(CODES[op]) emit(OPCODES[op])
append(POSITIONS[av]) if flags & SRE_FLAG_MULTILINE:
emit(ATCODES[AT_MULTILINE[av]])
else:
emit(ATCODES[av])
elif op is BRANCH: elif op is BRANCH:
append(CODES[op]) emit(OPCODES[op])
tail = [] tail = []
for av in av[1]: for av in av[1]:
skip = len(code); append(0) skip = len(code); emit(0)
_compile(code, av, flags) _compile(code, av, flags)
append(CODES[JUMP]) emit(OPCODES[JUMP])
tail.append(len(code)); append(0) tail.append(len(code)); emit(0)
code[skip] = len(code) - skip code[skip] = len(code) - skip
append(0) # end of branch emit(0) # end of branch
for tail in tail: for tail in tail:
code[tail] = len(code) - tail code[tail] = len(code) - tail
elif op is CALL: elif op is CALL:
append(CODES[op]) emit(OPCODES[op])
skip = len(code); append(0) skip = len(code); emit(0)
_compile(code, av, flags) _compile(code, av, flags)
append(CODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op is CATEGORY: # not used by current parser elif op is CATEGORY:
append(CODES[op]) emit(OPCODES[op])
append(CATEGORIES[av]) if flags & SRE_FLAG_LOCALE:
emit(CH_LOCALE[CHCODES[av]])
elif flags & SRE_FLAG_UNICODE:
emit(CH_UNICODE[CHCODES[av]])
else:
emit(CHCODES[av])
elif op is GROUP: elif op is GROUP:
if "i" in flags: if flags & SRE_FLAG_IGNORECASE:
append(CODES[MAP_IGNORE[op]]) emit(OPCODES[OP_IGNORE[op]])
else: else:
append(CODES[op]) emit(OPCODES[op])
append(av) emit(av-1)
elif op is IN: elif op is IN:
if "i" in flags: if flags & SRE_FLAG_IGNORECASE:
append(CODES[MAP_IGNORE[op]]) emit(OPCODES[OP_IGNORE[op]])
def fixup(literal): def fixup(literal, flags=flags):
return ord(_lower(literal)) return _sre.getlower(ord(literal), flags)
else: else:
append(CODES[op]) emit(OPCODES[op])
fixup = ord fixup = ord
skip = len(code); append(0) skip = len(code); emit(0)
for op, av in av: for op, av in av:
append(CODES[op]) emit(OPCODES[op])
if op is NEGATE: if op is NEGATE:
pass pass
elif op is LITERAL: elif op is LITERAL:
append(fixup(av)) emit(fixup(av))
elif op is RANGE: elif op is RANGE:
append(fixup(av[0])) emit(fixup(av[0]))
append(fixup(av[1])) emit(fixup(av[1]))
elif op is CATEGORY: elif op is CATEGORY:
append(CATEGORIES[av]) if flags & SRE_FLAG_LOCALE:
emit(CH_LOCALE[CHCODES[av]])
elif flags & SRE_FLAG_UNICODE:
emit(CH_UNICODE[CHCODES[av]])
else:
emit(CHCODES[av])
else: else:
raise ValueError, "unsupported set operator" raise error, "internal: unsupported set operator"
append(CODES[FAILURE]) emit(OPCODES[FAILURE])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op in (LITERAL, NOT_LITERAL): elif op in (LITERAL, NOT_LITERAL):
if "i" in flags: if flags & SRE_FLAG_IGNORECASE:
append(CODES[MAP_IGNORE[op]]) emit(OPCODES[OP_IGNORE[op]])
append(ord(_lower(av)))
else: else:
append(CODES[op]) emit(OPCODES[op])
append(ord(av)) emit(ord(av))
elif op is MARK: elif op is MARK:
append(CODES[op]) emit(OPCODES[op])
append(av) emit(av)
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
lo, hi = av[2].getwidth() if flags & SRE_FLAG_TEMPLATE:
if lo == 0: emit(OPCODES[REPEAT])
raise SyntaxError, "cannot repeat zero-width items" skip = len(code); emit(0)
if lo == hi == 1 and op is MAX_REPEAT: emit(av[0])
append(CODES[MAX_REPEAT_ONE]) emit(av[1])
skip = len(code); append(0)
append(av[0])
append(av[1])
_compile(code, av[2], flags) _compile(code, av[2], flags)
append(CODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = len(code) - skip
else: else:
append(CODES[op]) lo, hi = av[2].getwidth()
skip = len(code); append(0) if lo == 0:
append(av[0]) raise error, "nothing to repeat"
append(av[1]) if 0 and lo == hi == 1 and op is MAX_REPEAT:
_compile(code, av[2], flags) # FIXME: <fl> need a better way to figure out when
if op is MIN_REPEAT: # it's safe to use this one (in the parser, probably)
append(CODES[MIN_UNTIL]) emit(OPCODES[MAX_REPEAT_ONE])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
else: else:
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?) emit(OPCODES[op])
append(CODES[MAX_UNTIL]) skip = len(code); emit(0)
code[skip] = len(code) - skip emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
elif op is SUBPATTERN: elif op is SUBPATTERN:
## group = av[0] group = av[0]
## if group: if group:
## append(CODES[MARK]) emit(OPCODES[MARK])
## append((group-1)*2) emit((group-1)*2)
_compile(code, av[1], flags) _compile(code, av[1], flags)
## if group: if group:
## append(CODES[MARK]) emit(OPCODES[MARK])
## append((group-1)*2+1) emit((group-1)*2+1)
else: else:
raise ValueError, ("unsupported operand type", op) raise ValueError, ("unsupported operand type", op)
def compile(p, flags=()): def compile(p, flags=0):
# convert pattern list to internal format # internal: convert pattern list to internal format
if type(p) in (type(""), type(u"")): if type(p) in (type(""), type(u"")):
import sre_parse import sre_parse
pattern = p pattern = p
p = sre_parse.parse(p) p = sre_parse.parse(p)
else: else:
pattern = None pattern = None
# print p.getwidth() flags = p.pattern.flags | flags
# print p code = []
code = Code() _compile(code, p.data, flags)
_compile(code, p.data, p.pattern.flags) code.append(OPCODES[SUCCESS])
code.append(CODES[SUCCESS]) # FIXME: <fl> get rid of this limitation
# print list(code.data) assert p.pattern.groups <= 100,\
data = code.todata() "sorry, but this version only supports 100 named groups"
if 0: # debugging return _sre.compile(
print pattern, flags,
print "-" * 68 array.array(WORDSIZE, code).tostring(),
import sre_disasm p.pattern.groups-1, p.pattern.groupdict
sre_disasm.disasm(data) )
print "-" * 68
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
# #
# Secret Labs' Regular Expression Engine # Secret Labs' Regular Expression Engine
# $Id$
# #
# various symbols used by the regular expression engine. # various symbols used by the regular expression engine.
# run this script to update the _sre include files! # run this script to update the _sre include files!
# #
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
# #
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with # Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and # CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work. # other compatibility work.
# #
# should this really be here?
class error(Exception):
pass
# operators # operators
FAILURE = "failure" FAILURE = "failure"
...@@ -30,38 +31,51 @@ GROUP = "group" ...@@ -30,38 +31,51 @@ GROUP = "group"
GROUP_IGNORE = "group_ignore" GROUP_IGNORE = "group_ignore"
IN = "in" IN = "in"
IN_IGNORE = "in_ignore" IN_IGNORE = "in_ignore"
INFO = "info"
JUMP = "jump" JUMP = "jump"
LITERAL = "literal" LITERAL = "literal"
LITERAL_IGNORE = "literal_ignore" LITERAL_IGNORE = "literal_ignore"
MARK = "mark" MARK = "mark"
MAX_REPEAT = "max_repeat" MAX_REPEAT = "max_repeat"
MAX_REPEAT_ONE = "max_repeat_one" MAX_REPEAT_ONE = "max_repeat_one"
MAX_UNTIL = "max_until"
MIN_REPEAT = "min_repeat" MIN_REPEAT = "min_repeat"
MIN_UNTIL = "min_until"
NEGATE = "negate" NEGATE = "negate"
NOT_LITERAL = "not_literal" NOT_LITERAL = "not_literal"
NOT_LITERAL_IGNORE = "not_literal_ignore" NOT_LITERAL_IGNORE = "not_literal_ignore"
RANGE = "range" RANGE = "range"
REPEAT = "repeat" REPEAT = "repeat"
REPEAT_ONE = "repeat_one"
SUBPATTERN = "subpattern" SUBPATTERN = "subpattern"
# positions # positions
AT_BEGINNING = "at_beginning" AT_BEGINNING = "at_beginning"
AT_BEGINNING_LINE = "at_beginning_line"
AT_BOUNDARY = "at_boundary" AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary" AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end" AT_END = "at_end"
AT_END_LINE = "at_end_line"
# categories # categories
CATEGORY_DIGIT = "category_digit" CATEGORY_DIGIT = "category_digit"
CATEGORY_NOT_DIGIT = "category_not_digit" CATEGORY_NOT_DIGIT = "category_not_digit"
CATEGORY_SPACE = "category_space" CATEGORY_SPACE = "category_space"
CATEGORY_NOT_SPACE = "category_not_space" CATEGORY_NOT_SPACE = "category_not_space"
CATEGORY_WORD = "category_word" CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD = "category_not_word" CATEGORY_NOT_WORD = "category_not_word"
CATEGORY_LINEBREAK = "category_linebreak"
CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
CATEGORY_LOC_WORD = "category_loc_word"
CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
CATEGORY_UNI_DIGIT = "category_uni_digit"
CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
CATEGORY_UNI_SPACE = "category_uni_space"
CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
CATEGORY_UNI_WORD = "category_uni_word"
CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
CODES = [ OPCODES = [
# failure=0 success=1 (just because it looks better that way :-) # failure=0 success=1 (just because it looks better that way :-)
FAILURE, SUCCESS, FAILURE, SUCCESS,
...@@ -74,12 +88,13 @@ CODES = [ ...@@ -74,12 +88,13 @@ CODES = [
CATEGORY, CATEGORY,
GROUP, GROUP_IGNORE, GROUP, GROUP_IGNORE,
IN, IN_IGNORE, IN, IN_IGNORE,
INFO,
JUMP, JUMP,
LITERAL, LITERAL_IGNORE, LITERAL, LITERAL_IGNORE,
MARK, MARK,
MAX_REPEAT, MAX_UNTIL, MAX_REPEAT,
MAX_REPEAT_ONE, MAX_REPEAT_ONE,
MIN_REPEAT, MIN_UNTIL, MIN_REPEAT,
NOT_LITERAL, NOT_LITERAL_IGNORE, NOT_LITERAL, NOT_LITERAL_IGNORE,
NEGATE, NEGATE,
RANGE, RANGE,
...@@ -87,45 +102,95 @@ CODES = [ ...@@ -87,45 +102,95 @@ CODES = [
] ]
# convert to dictionary ATCODES = [
c = {} AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
i = 0 AT_NON_BOUNDARY, AT_END, AT_END_LINE
for code in CODES: ]
c[code] = i
i = i + 1 CHCODES = [
CODES = c CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
CATEGORY_UNI_NOT_LINEBREAK
]
def makedict(list):
d = {}
i = 0
for item in list:
d[item] = i
i = i + 1
return d
OPCODES = makedict(OPCODES)
ATCODES = makedict(ATCODES)
CHCODES = makedict(CHCODES)
# replacement operations for "ignore case" mode # replacement operations for "ignore case" mode
MAP_IGNORE = { OP_IGNORE = {
GROUP: GROUP_IGNORE, GROUP: GROUP_IGNORE,
IN: IN_IGNORE, IN: IN_IGNORE,
LITERAL: LITERAL_IGNORE, LITERAL: LITERAL_IGNORE,
NOT_LITERAL: NOT_LITERAL_IGNORE NOT_LITERAL: NOT_LITERAL_IGNORE
} }
POSITIONS = { AT_MULTILINE = {
AT_BEGINNING: ord("a"), AT_BEGINNING: AT_BEGINNING_LINE,
AT_BOUNDARY: ord("b"), AT_END: AT_END_LINE
AT_NON_BOUNDARY: ord("B"),
AT_END: ord("z"),
} }
CATEGORIES = { CH_LOCALE = {
CATEGORY_DIGIT: ord("d"), CATEGORY_DIGIT: CATEGORY_DIGIT,
CATEGORY_NOT_DIGIT: ord("D"), CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
CATEGORY_SPACE: ord("s"), CATEGORY_SPACE: CATEGORY_SPACE,
CATEGORY_NOT_SPACE: ord("S"), CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
CATEGORY_WORD: ord("w"), CATEGORY_WORD: CATEGORY_LOC_WORD,
CATEGORY_NOT_WORD: ord("W"), CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
} }
CH_UNICODE = {
CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
CATEGORY_SPACE: CATEGORY_UNI_SPACE,
CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
CATEGORY_WORD: CATEGORY_UNI_WORD,
CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
}
# flags
SRE_FLAG_TEMPLATE = 1
SRE_FLAG_IGNORECASE = 2
SRE_FLAG_LOCALE = 4
SRE_FLAG_MULTILINE = 8
SRE_FLAG_DOTALL = 16
SRE_FLAG_UNICODE = 32
SRE_FLAG_VERBOSE = 64
if __name__ == "__main__": if __name__ == "__main__":
import string import string
items = CODES.items() def dump(f, d, prefix):
items.sort(lambda a, b: cmp(a[1], b[1])) items = d.items()
items.sort(lambda a, b: cmp(a[1], b[1]))
for k, v in items:
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w") f = open("sre_constants.h", "w")
f.write("/* generated by sre_constants.py */\n") f.write("/* generated from sre_constants.py */\n")
for k, v in items: dump(f, OPCODES, "SRE_OP")
f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n") dump(f, ATCODES, "SRE")
dump(f, CHCODES, "SRE")
f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
f.close() f.close()
print "done" print "done"
# #
# Secret Labs' Regular Expression Engine # Secret Labs' Regular Expression Engine
# $Id$
# #
# convert re-style regular expression to SRE template. the current # convert re-style regular expression to sre pattern
# implementation is somewhat incomplete, and not very fast. should
# definitely be rewritten before Python 1.6 goes beta.
# #
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. # Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
# #
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with # Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and # CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work. # other compatibility work.
# #
# FIXME: comments marked with the FIXME tag are open issues. all such
# issues should be closed before the final beta.
import string, sys import string, sys
import _sre
from sre_constants import * from sre_constants import *
# FIXME: should be 65535, but the arraymodule is still broken
MAXREPEAT = 32767
SPECIAL_CHARS = ".\\[{()*+?^$|" SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{" REPEAT_CHARS = "*+?{"
# FIXME: string in tuple tests may explode with if char is unicode :-(
DIGITS = tuple(string.digits) DIGITS = tuple(string.digits)
OCTDIGITS = tuple("01234567") OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF") HEXDIGITS = tuple("0123456789abcdefABCDEF")
WHITESPACE = string.whitespace
ESCAPES = { ESCAPES = {
"\\a": (LITERAL, chr(7)), "\\a": (LITERAL, chr(7)),
"\\b": (LITERAL, chr(8)), "\\b": (LITERAL, chr(8)),
...@@ -55,10 +52,21 @@ CATEGORIES = { ...@@ -55,10 +52,21 @@ CATEGORIES = {
"\\Z": (AT, AT_END), # end of string "\\Z": (AT, AT_END), # end of string
} }
class Pattern: FLAGS = {
# FIXME: <fl> rename class, and store flags in here too! # standard flags
"i": SRE_FLAG_IGNORECASE,
"L": SRE_FLAG_LOCALE,
"m": SRE_FLAG_MULTILINE,
"s": SRE_FLAG_DOTALL,
"x": SRE_FLAG_VERBOSE,
# extensions
"t": SRE_FLAG_TEMPLATE,
"u": SRE_FLAG_UNICODE,
}
class State:
def __init__(self): def __init__(self):
self.flags = [] self.flags = 0
self.groups = 1 self.groups = 1
self.groupdict = {} self.groupdict = {}
def getgroup(self, name=None): def getgroup(self, name=None):
...@@ -67,9 +75,6 @@ class Pattern: ...@@ -67,9 +75,6 @@ class Pattern:
if name: if name:
self.groupdict[name] = gid self.groupdict[name] = gid
return gid return gid
def setflag(self, flag):
if flag in self.flags:
self.flags.append(flag)
class SubPattern: class SubPattern:
# a subpattern, in intermediate form # a subpattern, in intermediate form
...@@ -78,7 +83,6 @@ class SubPattern: ...@@ -78,7 +83,6 @@ class SubPattern:
if not data: if not data:
data = [] data = []
self.data = data self.data = data
self.flags = []
self.width = None self.width = None
def __repr__(self): def __repr__(self):
return repr(self.data) return repr(self.data)
...@@ -121,8 +125,8 @@ class SubPattern: ...@@ -121,8 +125,8 @@ class SubPattern:
hi = hi + j hi = hi + j
elif op in (MIN_REPEAT, MAX_REPEAT): elif op in (MIN_REPEAT, MAX_REPEAT):
i, j = av[2].getwidth() i, j = av[2].getwidth()
lo = lo + i * av[0] lo = lo + long(i) * av[0]
hi = hi + j * av[1] hi = hi + long(j) * av[1]
elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY): elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
lo = lo + 1 lo = lo + 1
hi = hi + 1 hi = hi + 1
...@@ -130,47 +134,23 @@ class SubPattern: ...@@ -130,47 +134,23 @@ class SubPattern:
break break
self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint)) self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
return self.width return self.width
def set(self, flag):
if not flag in self.flags:
self.flags.append(flag)
def reset(self, flag):
if flag in self.flags:
self.flags.remove(flag)
class Tokenizer: class Tokenizer:
def __init__(self, string): def __init__(self, string):
self.string = list(string) self.index = 0
self.string = string
self.next = self.__next() self.next = self.__next()
def __next(self): def __next(self):
if not self.string: if self.index >= len(self.string):
return None return None
char = self.string[0] char = self.string[self.index]
if char[0] == "\\": if char[0] == "\\":
try: try:
c = self.string[1] c = self.string[self.index + 1]
except IndexError: except IndexError:
raise SyntaxError, "bogus escape" raise error, "bogus escape"
char = char + c char = char + c
try: self.index = self.index + len(char)
if c == "x":
# hexadecimal constant
for i in xrange(2, sys.maxint):
c = self.string[i]
if str(c) not in HEXDIGITS:
break
char = char + c
elif str(c) in DIGITS:
# decimal (or octal) number
for i in xrange(2, sys.maxint):
c = self.string[i]
# FIXME: if larger than current number of
# groups, interpret as an octal number
if str(c) not in DIGITS:
break
char = char + c
except IndexError:
pass # use what we've got this far
del self.string[0:len(char)]
return char return char
def match(self, char): def match(self, char):
if char == self.next: if char == self.next:
...@@ -187,45 +167,103 @@ class Tokenizer: ...@@ -187,45 +167,103 @@ class Tokenizer:
self.next = self.__next() self.next = self.__next()
return this return this
def _fixescape(escape, character_class=0): def isident(char):
# convert escape to (type, value) return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
if character_class:
# inside a character class, we'll look in the character def isdigit(char):
# escapes dictionary first return "0" <= char <= "9"
code = ESCAPES.get(escape)
if code: def isname(name):
return code # check that group name is a valid string
code = CATEGORIES.get(escape) # FIXME: <fl> this code is really lame. should use a regular
else: # expression instead, but I seem to have certain bootstrapping
code = CATEGORIES.get(escape) # problems here ;-)
if code: if not isident(name[0]):
return code return 0
code = ESCAPES.get(escape) for char in name:
if not isident(char) and not isdigit(char):
return 0
return 1
def _group(escape, state):
# check if the escape string represents a valid group
try:
group = int(escape[1:])
if group and group < state.groups:
return group
except ValueError:
pass
return None # not a valid group
def _class_escape(source, escape):
# handle escape code inside character class
code = ESCAPES.get(escape)
if code:
return code
code = CATEGORIES.get(escape)
if code: if code:
return code return code
if not character_class:
try:
group = int(escape[1:])
# FIXME: only valid if group <= current number of groups
return GROUP, group
except ValueError:
pass
try: try:
if escape[1:2] == "x": if escape[1:2] == "x":
while source.next in HEXDIGITS:
escape = escape + source.get()
escape = escape[2:] escape = escape[2:]
return LITERAL, chr(int(escape[-2:], 16) & 0xff) # FIXME: support unicode characters!
elif str(escape[1:2]) in DIGITS: return LITERAL, chr(int(escape[-4:], 16) & 0xff)
return LITERAL, chr(int(escape[1:], 8) & 0xff) elif str(escape[1:2]) in OCTDIGITS:
elif len(escape) == 2: while source.next in OCTDIGITS:
escape = escape + source.get()
escape = escape[1:]
# FIXME: support unicode characters!
return LITERAL, chr(int(escape[-6:], 8) & 0xff)
if len(escape) == 2:
return LITERAL, escape[1] return LITERAL, escape[1]
except ValueError: except ValueError:
pass pass
raise SyntaxError, "bogus escape: %s" % repr(escape) raise error, "bogus escape: %s" % repr(escape)
def _branch(subpattern, items): def _escape(source, escape, state):
# handle escape code in expression
code = CATEGORIES.get(escape)
if code:
return code
code = ESCAPES.get(escape)
if code:
return code
try:
if escape[1:2] == "x":
while source.next in HEXDIGITS:
escape = escape + source.get()
escape = escape[2:]
# FIXME: support unicode characters!
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
elif escape[1:2] in DIGITS:
while 1:
group = _group(escape, state)
if group:
if (not source.next or
not _group(escape + source.next, state)):
return GROUP, group
escape = escape + source.get()
elif source.next in OCTDIGITS:
escape = escape + source.get()
else:
break
escape = escape[1:]
# FIXME: support unicode characters!
return LITERAL, chr(int(escape[-6:], 8) & 0xff)
if len(escape) == 2:
return LITERAL, escape[1]
except ValueError:
pass
raise error, "bogus escape: %s" % repr(escape)
# form a branch operator from a set of items (FIXME: move this
# optimization to the compiler module!) def _branch(pattern, items):
# form a branch operator from a set of items
subpattern = SubPattern(pattern)
# check if all items share a common prefix # check if all items share a common prefix
while 1: while 1:
...@@ -257,26 +295,36 @@ def _branch(subpattern, items): ...@@ -257,26 +295,36 @@ def _branch(subpattern, items):
for item in items: for item in items:
set.append(item[0]) set.append(item[0])
subpattern.append((IN, set)) subpattern.append((IN, set))
return return subpattern
subpattern.append((BRANCH, (None, items))) subpattern.append((BRANCH, (None, items)))
return subpattern
def _parse(source, pattern, flags=()): def _parse(source, state, flags=0):
# parse regular expression pattern into an operator list. # parse regular expression pattern into an operator list.
subpattern = SubPattern(pattern) subpattern = SubPattern(state)
this = None
while 1: while 1:
if str(source.next) in ("|", ")"): if source.next in ("|", ")"):
break # end of subpattern break # end of subpattern
this = source.get() this = source.get()
if this is None: if this is None:
break # end of pattern break # end of pattern
if state.flags & SRE_FLAG_VERBOSE:
# skip whitespace and comments
if this in WHITESPACE:
continue
if this == "#":
while 1:
this = source.get()
if this in (None, "\n"):
break
continue
if this and this[0] not in SPECIAL_CHARS: if this and this[0] not in SPECIAL_CHARS:
subpattern.append((LITERAL, this)) subpattern.append((LITERAL, this))
...@@ -294,11 +342,11 @@ def _parse(source, pattern, flags=()): ...@@ -294,11 +342,11 @@ def _parse(source, pattern, flags=()):
if this == "]" and set != start: if this == "]" and set != start:
break break
elif this and this[0] == "\\": elif this and this[0] == "\\":
code1 = _fixescape(this, 1) code1 = _class_escape(source, this)
elif this: elif this:
code1 = LITERAL, this code1 = LITERAL, this
else: else:
raise SyntaxError, "unexpected end of regular expression" raise error, "unexpected end of regular expression"
if source.match("-"): if source.match("-"):
# potential range # potential range
this = source.get() this = source.get()
...@@ -308,20 +356,20 @@ def _parse(source, pattern, flags=()): ...@@ -308,20 +356,20 @@ def _parse(source, pattern, flags=()):
break break
else: else:
if this[0] == "\\": if this[0] == "\\":
code2 = _fixescape(this, 1) code2 = _class_escape(source, this)
else: else:
code2 = LITERAL, this code2 = LITERAL, this
if code1[0] != LITERAL or code2[0] != LITERAL: if code1[0] != LITERAL or code2[0] != LITERAL:
raise SyntaxError, "illegal range" raise error, "illegal range"
if len(code1[1]) != 1 or len(code2[1]) != 1: if len(code1[1]) != 1 or len(code2[1]) != 1:
raise SyntaxError, "illegal range" raise error, "illegal range"
set.append((RANGE, (code1[1], code2[1]))) set.append((RANGE, (code1[1], code2[1])))
else: else:
if code1[0] is IN: if code1[0] is IN:
code1 = code1[1][0] code1 = code1[1][0]
set.append(code1) set.append(code1)
# FIXME: <fl> move set optimization to support function # FIXME: <fl> move set optimization to compiler!
if len(set)==1 and set[0][0] is LITERAL: if len(set)==1 and set[0][0] is LITERAL:
subpattern.append(set[0]) # optimization subpattern.append(set[0]) # optimization
elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
...@@ -335,43 +383,41 @@ def _parse(source, pattern, flags=()): ...@@ -335,43 +383,41 @@ def _parse(source, pattern, flags=()):
if this == "?": if this == "?":
min, max = 0, 1 min, max = 0, 1
elif this == "*": elif this == "*":
min, max = 0, sys.maxint min, max = 0, MAXREPEAT
elif this == "+": elif this == "+":
min, max = 1, sys.maxint min, max = 1, MAXREPEAT
elif this == "{": elif this == "{":
min, max = 0, sys.maxint min, max = 0, MAXREPEAT
lo = hi = "" lo = hi = ""
while str(source.next) in DIGITS: while source.next in DIGITS:
lo = lo + source.get() lo = lo + source.get()
if source.match(","): if source.match(","):
while str(source.next) in DIGITS: while source.next in DIGITS:
hi = hi + source.get() hi = hi + source.get()
else: else:
hi = lo hi = lo
if not source.match("}"): if not source.match("}"):
raise SyntaxError, "bogus range" raise error, "bogus range"
if lo: if lo:
min = int(lo) min = int(lo)
if hi: if hi:
max = int(hi) max = int(hi)
# FIXME: <fl> check that hi >= lo! # FIXME: <fl> check that hi >= lo!
else: else:
raise SyntaxError, "not supported" raise error, "not supported"
# figure out which item to repeat # figure out which item to repeat
# FIXME: should back up to the right mark, right?
if subpattern: if subpattern:
index = len(subpattern)-1 item = subpattern[-1:]
while subpattern[index][0] is MARK:
index = index - 1
item = subpattern[index:index+1]
else: else:
raise SyntaxError, "nothing to repeat" raise error, "nothing to repeat"
if source.match("?"): if source.match("?"):
subpattern[index] = (MIN_REPEAT, (min, max, item)) subpattern[-1] = (MIN_REPEAT, (min, max, item))
else: else:
subpattern[index] = (MAX_REPEAT, (min, max, item)) subpattern[-1] = (MAX_REPEAT, (min, max, item))
elif this == ".": elif this == ".":
subpattern.append((ANY, None)) subpattern.append((ANY, None))
elif this == "(": elif this == "(":
group = 1 group = 1
name = None name = None
...@@ -379,28 +425,41 @@ def _parse(source, pattern, flags=()): ...@@ -379,28 +425,41 @@ def _parse(source, pattern, flags=()):
group = 0 group = 0
# options # options
if source.match("P"): if source.match("P"):
# named group: skip forward to end of name # python extensions
if source.match("<"): if source.match("<"):
# named group: skip forward to end of name
name = "" name = ""
while 1: while 1:
char = source.get() char = source.get()
if char is None or char == ">": if char is None:
raise error, "unterminated name"
if char == ">":
break break
name = name + char name = name + char
group = 1 group = 1
if not isname(name):
raise error, "illegal character in group name"
elif source.match("="):
# named backreference
raise error, "not yet implemented"
else:
char = source.get()
if char is None:
raise error, "unexpected end of pattern"
raise error, "unknown specifier: ?P%s" % char
elif source.match(":"): elif source.match(":"):
# non-capturing group # non-capturing group
group = 2 group = 2
elif source.match_set("iI"): elif source.match("#"):
pattern.setflag("i") # comment
elif source.match_set("lL"): while 1:
pattern.setflag("l") if source.next is None or source.next == ")":
elif source.match_set("mM"): break
pattern.setflag("m") source.get()
elif source.match_set("sS"): else:
pattern.setflag("s") # flags
elif source.match_set("xX"): while FLAGS.has_key(source.next):
pattern.setflag("x") state.flags = state.flags | FLAGS[source.get()]
if group: if group:
# parse group contents # parse group contents
b = [] b = []
...@@ -408,30 +467,25 @@ def _parse(source, pattern, flags=()): ...@@ -408,30 +467,25 @@ def _parse(source, pattern, flags=()):
# anonymous group # anonymous group
group = None group = None
else: else:
group = pattern.getgroup(name) group = state.getgroup(name)
if group:
subpattern.append((MARK, (group-1)*2))
while 1: while 1:
p = _parse(source, pattern, flags) p = _parse(source, state, flags)
if source.match(")"): if source.match(")"):
if b: if b:
b.append(p) b.append(p)
_branch(subpattern, b) p = _branch(state, b)
else: subpattern.append((SUBPATTERN, (group, p)))
subpattern.append((SUBPATTERN, (group, p)))
break break
elif source.match("|"): elif source.match("|"):
b.append(p) b.append(p)
else: else:
raise SyntaxError, "group not properly closed" raise error, "group not properly closed"
if group:
subpattern.append((MARK, (group-1)*2+1))
else: else:
# FIXME: should this really be a while loop?
while 1: while 1:
char = source.get() char = source.get()
if char is None or char == ")": if char is None or char == ")":
break break
raise error, "unknown extension"
elif this == "^": elif this == "^":
subpattern.append((AT, AT_BEGINNING)) subpattern.append((AT, AT_BEGINNING))
...@@ -440,58 +494,93 @@ def _parse(source, pattern, flags=()): ...@@ -440,58 +494,93 @@ def _parse(source, pattern, flags=()):
subpattern.append((AT, AT_END)) subpattern.append((AT, AT_END))
elif this and this[0] == "\\": elif this and this[0] == "\\":
code =_fixescape(this) code = _escape(source, this, state)
subpattern.append(code) subpattern.append(code)
else: else:
raise SyntaxError, "parser error" raise error, "parser error"
return subpattern return subpattern
def parse(source, flags=()): def parse(pattern, flags=0):
s = Tokenizer(source) # parse 're' pattern into list of (opcode, argument) tuples
g = Pattern() source = Tokenizer(pattern)
state = State()
b = [] b = []
while 1: while 1:
p = _parse(s, g, flags) p = _parse(source, state, flags)
tail = s.get() tail = source.get()
if tail == "|": if tail == "|":
b.append(p) b.append(p)
elif tail == ")": elif tail == ")":
raise SyntaxError, "unbalanced parenthesis" raise error, "unbalanced parenthesis"
elif tail is None: elif tail is None:
if b: if b:
b.append(p) b.append(p)
p = SubPattern(g) p = _branch(state, b)
_branch(p, b)
break break
else: else:
raise SyntaxError, "bogus characters at end of regular expression" raise error, "bogus characters at end of regular expression"
return p return p
if __name__ == "__main__": def parse_template(source, pattern):
from pprint import pprint # parse 're' replacement string into list of literals and
from testpatterns import PATTERNS # group references
a = b = c = 0 s = Tokenizer(source)
for pattern, flags in PATTERNS: p = []
if flags: a = p.append
continue while 1:
print "-"*68 this = s.get()
try: if this is None:
p = parse(pattern) break # end of replacement string
print repr(pattern), "->" if this and this[0] == "\\":
pprint(p.data) if this == "\\g":
import sre_compile name = ""
try: if s.match("<"):
code = sre_compile.compile(p) while 1:
c = c + 1 char = s.get()
except: if char is None:
pass raise error, "unterminated group name"
a = a + 1 if char == ">":
except SyntaxError, v: break
print "**", repr(pattern), v name = name + char
b = b + 1 if not name:
print "-"*68 raise error, "bad group name"
print a, "of", b, "patterns successfully parsed" try:
print c, "of", b, "patterns successfully compiled" index = int(name)
except ValueError:
if not isname(name):
raise error, "illegal character in group name"
try:
index = pattern.groupindex[name]
except KeyError:
raise IndexError, "unknown group name"
a((MARK, index))
elif len(this) > 1 and this[1] in DIGITS:
while s.next in DIGITS:
this = this + s.get()
a((MARK, int(this[1:])))
else:
try:
a(ESCAPES[this])
except KeyError:
for char in this:
a((LITERAL, char))
else:
a((LITERAL, this))
return p
def expand_template(template, match):
# FIXME: <fl> this is sooooo slow. drop in the slicelist
# code instead
p = []
a = p.append
for c, s in template:
if c is LITERAL:
a(s)
elif c is MARK:
s = match.group(s)
if s is None:
raise error, "empty group"
a(s)
return match.string[:0].join(p)
...@@ -15,6 +15,44 @@ def main(): ...@@ -15,6 +15,44 @@ def main():
unlink(TESTFN) unlink(TESTFN)
def testoverflow(type, lowerLimit, upperLimit):
# should not overflow assigning lower limit
if verbose:
print "overflow test: array(%s, [%s])" % (`type`, `lowerLimit`)
try:
a = array.array(type, [lowerLimit])
except:
raise TestFailed, "array(%s) overflowed assigning %s" %\
(`type`, `lowerLimit`)
# should overflow assigning less than lower limit
if verbose:
print "overflow test: array(%s, [%s])" % (`type`, `lowerLimit-1`)
try:
a = array.array(type, [lowerLimit-1])
raise TestFailed, "array(%s) did not overflow assigning %s" %\
(`type`, `lowerLimit-1`)
except OverflowError:
pass
# should not overflow assigning upper limit
if verbose:
print "overflow test: array(%s, [%s])" % (`type`, `upperLimit`)
try:
a = array.array(type, [upperLimit])
except:
raise TestFailed, "array(%s) overflowed assigning %s" %\
(`type`, `upperLimit`)
# should overflow assigning more than upper limit
if verbose:
print "overflow test: array(%s, [%s])" % (`type`, `upperLimit+1`)
try:
a = array.array(type, [upperLimit+1])
raise TestFailed, "array(%s) did not overflow assigning %s" %\
(`type`, `upperLimit+1`)
except OverflowError:
pass
def testtype(type, example): def testtype(type, example):
a = array.array(type) a = array.array(type)
...@@ -81,6 +119,20 @@ def testtype(type, example): ...@@ -81,6 +119,20 @@ def testtype(type, example):
if a != array.array(type, [1, 1, 2, 3, 4, 5, 5]): if a != array.array(type, [1, 1, 2, 3, 4, 5, 5]):
raise TestFailed, "array(%s) self-slice-assign (cntr)" % `type` raise TestFailed, "array(%s) self-slice-assign (cntr)" % `type`
# test that overflow exceptions are raised as expected for assignment
# to array of specific integral types
from math import pow
if type in ('b', 'h', 'i', 'l'):
# check signed and unsigned versions
a = array.array(type)
signedLowerLimit = -1 * long(pow(2, a.itemsize * 8 - 1))
signedUpperLimit = long(pow(2, a.itemsize * 8 - 1)) - 1L
unsignedLowerLimit = 0
unsignedUpperLimit = long(pow(2, a.itemsize * 8)) - 1L
testoverflow(type, signedLowerLimit, signedUpperLimit)
testoverflow(type.upper(), unsignedLowerLimit, unsignedUpperLimit)
main() main()
# Test the exit module
from test_support import verbose
import atexit
def handler1():
print "handler1"
def handler2(*args, **kargs):
print "handler2", args, kargs
# save any exit functions that may have been registered as part of the
# test framework
_exithandlers = atexit._exithandlers
atexit._exithandlers = []
atexit.register(handler1)
atexit.register(handler2)
atexit.register(handler2, 7, kw="abc")
# simulate exit behavior by calling atexit._run_exitfuncs directly...
atexit._run_exitfuncs()
# restore exit handlers
atexit._exithandlers = _exithandlers
"""Test the binascii C module.""" #! /usr/bin/env python
"""Test script for the binhex C module
Uses the mechanism of the python binhex module
Roger E. Masse
"""
import binhex
import tempfile
from test_support import verbose from test_support import verbose
import binascii
# Show module doc string
print binascii.__doc__
# Show module exceptions
print binascii.Error
print binascii.Incomplete
# Check presence and display doc strings of all functions
funcs = []
for suffix in "base64", "hqx", "uu":
prefixes = ["a2b_", "b2a_"]
if suffix == "hqx":
prefixes.extend(["crc_", "rlecode_", "rledecode_"])
for prefix in prefixes:
name = prefix + suffix
funcs.append(getattr(binascii, name))
for func in funcs:
print "%-15s: %s" % (func.__name__, func.__doc__)
# Create binary test data
testdata = "The quick brown fox jumps over the lazy dog.\r\n"
for i in range(256):
# Be slow so we don't depend on other modules
testdata = testdata + chr(i)
testdata = testdata + "\r\nHello world.\n"
# Test base64 with valid data
print "base64 test"
MAX_BASE64 = 57
lines = []
for i in range(0, len(testdata), MAX_BASE64):
b = testdata[i:i+MAX_BASE64]
a = binascii.b2a_base64(b)
lines.append(a)
print a,
res = ""
for line in lines:
b = binascii.a2b_base64(line)
res = res + b
assert res == testdata
# Test base64 with random invalid characters sprinkled throughout
# (This requires a new version of binascii.)
fillers = ""
valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/"
for i in range(256):
c = chr(i)
if c not in valid:
fillers = fillers + c
def addnoise(line):
noise = fillers
ratio = len(line) / len(noise)
res = ""
while line and noise:
if len(line) / len(noise) > ratio:
c, line = line[0], line[1:]
else:
c, noise = noise[0], noise[1:]
res = res + c
return res + noise + line
res = ""
for line in map(addnoise, lines):
b = binascii.a2b_base64(line)
res = res + b
assert res == testdata
# Test uu
print "uu test"
MAX_UU = 45
lines = []
for i in range(0, len(testdata), MAX_UU):
b = testdata[i:i+MAX_UU]
a = binascii.b2a_uu(b)
lines.append(a)
print a,
res = ""
for line in lines:
b = binascii.a2b_uu(line)
res = res + b
assert res == testdata
# Test crc32()
crc = binascii.crc32("Test the CRC-32 of")
crc = binascii.crc32(" this string.", crc)
if crc != 1571220330:
print "binascii.crc32() failed."
# The hqx test is in test_binhex.py def test():
try:
fname1 = tempfile.mktemp()
fname2 = tempfile.mktemp()
f = open(fname1, 'w')
except:
raise ImportError, "Cannot test binhex without a temp file"
start = 'Jack is my hero'
f.write(start)
f.close()
binhex.binhex(fname1, fname2)
if verbose:
print 'binhex'
binhex.hexbin(fname2, fname1)
if verbose:
print 'hexbin'
f = open(fname1, 'r')
finish = f.readline()
if start <> finish:
print 'Error: binhex <> hexbin'
elif verbose:
print 'binhex == hexbin'
try:
import os
os.unlink(fname1)
os.unlink(fname2)
except:
pass
test()
...@@ -6,7 +6,19 @@ from types import ClassType ...@@ -6,7 +6,19 @@ from types import ClassType
print '5. Built-in exceptions' print '5. Built-in exceptions'
# XXX This is not really enough, each *operation* should be tested! # XXX This is not really enough, each *operation* should be tested!
def test_raise_catch(exc):
try:
raise exc, "spam"
except exc, err:
buf = str(err)
try:
raise exc("spam")
except exc, err:
buf = str(err)
print buf
def r(thing): def r(thing):
test_raise_catch(thing)
if type(thing) == ClassType: if type(thing) == ClassType:
print thing.__name__ print thing.__name__
else: else:
...@@ -94,4 +106,8 @@ r(ZeroDivisionError) ...@@ -94,4 +106,8 @@ r(ZeroDivisionError)
try: x = 1/0 try: x = 1/0
except ZeroDivisionError: pass except ZeroDivisionError: pass
r(Exception)
try: x = 1/0
except Exception, e: pass
unlink(TESTFN) unlink(TESTFN)
from test_support import verbose, findfile, TestFailed
import linuxaudiodev
import os
def play_sound_file(path):
fp = open(path, 'r')
data = fp.read()
fp.close()
try:
a = linuxaudiodev.open('w')
except linuxaudiodev.error, msg:
raise TestFailed, msg
else:
a.write(data)
a.close()
def test():
play_sound_file(findfile('audiotest.au'))
test()
...@@ -129,6 +129,18 @@ testit('pow(1,0)', math.pow(1,0), 1) ...@@ -129,6 +129,18 @@ testit('pow(1,0)', math.pow(1,0), 1)
testit('pow(2,1)', math.pow(2,1), 2) testit('pow(2,1)', math.pow(2,1), 2)
testit('pow(2,-1)', math.pow(2,-1), 0.5) testit('pow(2,-1)', math.pow(2,-1), 0.5)
print 'rint'
try:
math.rint
except AttributeError:
# this platform does not have rint, that is fine, skip the test
pass
else:
testit('rint(0.7)', math.rint(0.7), 1)
testit('rint(-0.3)', math.rint(-0.3), 0)
testit('rint(2.5)', math.rint(2.5), 2)
testit('rint(3.5)', math.rint(3.5), 4)
print 'sin' print 'sin'
testit('sin(0)', math.sin(0), 0) testit('sin(0)', math.sin(0), 0)
testit('sin(pi/2)', math.sin(math.pi/2), 1) testit('sin(pi/2)', math.sin(math.pi/2), 1)
......
...@@ -58,7 +58,42 @@ def test_both(): ...@@ -58,7 +58,42 @@ def test_both():
assert start == PAGESIZE assert start == PAGESIZE
assert end == PAGESIZE + 6 assert end == PAGESIZE + 6
# test seeking around (try to overflow the seek implementation)
m.seek(0,0)
print ' Seek to zeroth byte'
assert m.tell() == 0
m.seek(42,1)
print ' Seek to 42nd byte'
assert m.tell() == 42
m.seek(0,2)
print ' Seek to last byte'
assert m.tell() == len(m)
print ' Try to seek to negative position...'
try:
m.seek(-1)
except ValueError:
pass
else:
assert 0, 'expected a ValueError but did not get it'
print ' Try to seek beyond end of mmap...'
try:
m.seek(1,2)
except ValueError:
pass
else:
assert 0, 'expected a ValueError but did not get it'
print ' Try to seek to negative position...'
try:
m.seek(-len(m)-1,2)
except ValueError:
pass
else:
assert 0, 'expected a ValueError but did not get it'
m.close() m.close()
os.unlink("foo") os.unlink("foo")
print ' Test passed' print ' Test passed'
......
# Python test set -- part 3, built-in operations. import operator
import sys
def test(name, input, output, *args):
print 'testing:', name
f = getattr(operator, name)
params = (input,) + args
try:
val = apply(f, params)
except:
val = sys.exc_type
if val <> output:
print '%s%s = %s: %s expected' % (f.__name__, params, `val`, `output`)
print '3. Operations' test('abs', -1, 1)
print 'XXX Not yet implemented' test('add', 3, 7, 4)
test('and_', 0xf, 0xa, 0xa)
test('concat', 'py', 'python', 'thon')
test('countOf', [1, 2, 1, 3, 1, 4], 1, 3)
a = [4, 3, 2, 1]
test('delitem', a, None, 1)
if a <> [4, 2, 1]:
print 'delitem() failed'
a = range(10)
test('delslice', a, None, 2, 8)
if a <> [0, 1, 8, 9]:
print 'delslice() failed'
a = range(10)
test('div', 5, 2, 2)
test('getitem', a, 2, 2)
test('getslice', a, [4, 5], 4, 6)
test('indexOf', [4, 3, 2, 1], 1, 3)
test('inv', 4, -5)
test('isCallable', 4, 0)
test('isCallable', operator.isCallable, 1)
test('isMappingType', operator.isMappingType, 0)
test('isMappingType', operator.__dict__, 1)
test('isNumberType', 8.3, 1)
test('isNumberType', dir(), 0)
test('isSequenceType', dir(), 1)
test('isSequenceType', 'yeahbuddy', 1)
test('isSequenceType', 3, 0)
test('lshift', 5, 10, 1)
test('mod', 5, 1, 2)
test('mul', 5, 10, 2)
test('neg', 5, -5)
test('or_', 0xa, 0xf, 0x5)
test('pos', -5, -5)
a = range(3)
test('repeat', a, a+a, 2)
test('rshift', 5, 2, 1)
test('sequenceIncludes', range(4), 1, 2)
test('sequenceIncludes', range(4), 0, 5)
test('setitem', a, None, 0, 2)
if a <> [2, 1, 2]:
print 'setitem() failed'
a = range(4)
test('setslice', a, None, 1, 3, [2, 1])
if a <> [0, 2, 1, 3]:
print 'setslice() failed:', a
test('sub', 5, 2, 3)
test('truth', 5, 1)
test('truth', [], 0)
test('xor', 0xb, 0x7, 0xc)
# some negative tests
test('indexOf', [4, 3, 2, 1], ValueError, 9)
...@@ -10,10 +10,10 @@ import pyexpat ...@@ -10,10 +10,10 @@ import pyexpat
class Outputter: class Outputter:
def StartElementHandler(self, name, attrs): def StartElementHandler(self, name, attrs):
print 'Start element:\n\t', name, attrs print 'Start element:\n\t', repr(name), attrs
def EndElementHandler(self, name): def EndElementHandler(self, name):
print 'End element:\n\t', name print 'End element:\n\t', repr(name)
def CharacterDataHandler(self, data): def CharacterDataHandler(self, data):
data = string.strip(data) data = string.strip(data)
...@@ -22,13 +22,13 @@ class Outputter: ...@@ -22,13 +22,13 @@ class Outputter:
print '\t', repr(data) print '\t', repr(data)
def ProcessingInstructionHandler(self, target, data): def ProcessingInstructionHandler(self, target, data):
print 'PI:\n\t', target, data print 'PI:\n\t', repr(target), repr(data)
def StartNamespaceDeclHandler(self, prefix, uri): def StartNamespaceDeclHandler(self, prefix, uri):
print 'NS decl:\n\t', prefix, uri print 'NS decl:\n\t', repr(prefix), repr(uri)
def EndNamespaceDeclHandler(self, prefix): def EndNamespaceDeclHandler(self, prefix):
print 'End of NS decl:\n\t', prefix print 'End of NS decl:\n\t', repr(prefix)
def StartCdataSectionHandler(self): def StartCdataSectionHandler(self):
print 'Start of CDATA section' print 'Start of CDATA section'
...@@ -51,8 +51,9 @@ class Outputter: ...@@ -51,8 +51,9 @@ class Outputter:
print 'Not standalone' print 'Not standalone'
return 1 return 1
def ExternalEntityRefHandler(self, context, base, sysId, pubId): def ExternalEntityRefHandler(self, *args):
print 'External entity ref:', context, base, sysId, pubId context, base, sysId, pubId = args
print 'External entity ref:', args
return 1 return 1
def DefaultHandler(self, userData): def DefaultHandler(self, userData):
...@@ -64,7 +65,14 @@ class Outputter: ...@@ -64,7 +65,14 @@ class Outputter:
out = Outputter() out = Outputter()
parser = pyexpat.ParserCreate(namespace_separator='!') parser = pyexpat.ParserCreate(namespace_separator='!')
for name in ['StartElementHandler', 'EndElementHandler',
# Test getting/setting returns_unicode
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
'CharacterDataHandler', 'ProcessingInstructionHandler', 'CharacterDataHandler', 'ProcessingInstructionHandler',
'UnparsedEntityDeclHandler', 'NotationDeclHandler', 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
...@@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler', ...@@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler',
'DefaultHandler', 'DefaultHandlerExpand', 'DefaultHandler', 'DefaultHandlerExpand',
#'NotStandaloneHandler', #'NotStandaloneHandler',
'ExternalEntityRefHandler' 'ExternalEntityRefHandler'
]: ]
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) ) setattr(parser, name, getattr(out, name) )
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
...@@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> ...@@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
%unparsed_entity; %unparsed_entity;
]> ]>
<root> <root attr1="value1" attr2="value2&#8000;">
<myns:subelement xmlns:myns="http://www.python.org/namespace"> <myns:subelement xmlns:myns="http://www.python.org/namespace">
Contents of subelements Contents of subelements
</myns:subelement> </myns:subelement>
...@@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> ...@@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
</root> </root>
""" """
# Produce UTF-8 output
parser.returns_unicode = 0
try: try:
parser.Parse(data, 1) parser.Parse(data, 1)
except pyexpat.error: except pyexpat.error:
...@@ -105,3 +116,33 @@ except pyexpat.error: ...@@ -105,3 +116,33 @@ except pyexpat.error:
print '** Column', parser.ErrorColumnNumber print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex print '** Byte', parser.ErrorByteIndex
# Try the parse again, this time producing Unicode output
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
try:
parser.Parse(data, 1)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try parsing a file
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
import StringIO
file = StringIO.StringIO(data)
try:
parser.ParseFile(file)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
...@@ -97,7 +97,7 @@ try: ...@@ -97,7 +97,7 @@ try:
if not canfork or os.fork(): if not canfork or os.fork():
# parent is server # parent is server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((hostname, PORT)) s.bind(("127.0.0.1", PORT))
s.listen(1) s.listen(1)
if verbose: if verbose:
print 'parent accepting' print 'parent accepting'
...@@ -133,7 +133,7 @@ try: ...@@ -133,7 +133,7 @@ try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if verbose: if verbose:
print 'child connecting' print 'child connecting'
s.connect((hostname, PORT)) s.connect(("127.0.0.1", PORT))
msg = 'socket test' msg = 'socket test'
s.send(msg) s.send(msg)
data = s.recv(1024) data = s.recv(1024)
......
from test_support import verbose from test_support import verbose
import strop, sys import string, sys
# XXX: kludge... short circuit if strings don't have methods
try:
''.join
except AttributeError:
raise ImportError
def test(name, input, output, *args): def test(name, input, output, *args):
if verbose: if verbose:
print 'string.%s%s =? %s... ' % (name, (input,) + args, output), print 'string.%s%s =? %s... ' % (name, (input,) + args, output),
f = getattr(strop, name)
try: try:
value = apply(f, (input,) + args) # Prefer string methods over string module functions
try:
f = getattr(input, name)
value = apply(f, args)
except AttributeError:
f = getattr(string, name)
value = apply(f, (input,) + args)
except: except:
value = sys.exc_type value = sys.exc_type
if value != output: if value != output:
...@@ -34,7 +45,52 @@ test('find', 'abcdefghiabc', 9, 'abc', 1) ...@@ -34,7 +45,52 @@ test('find', 'abcdefghiabc', 9, 'abc', 1)
test('find', 'abcdefghiabc', -1, 'def', 4) test('find', 'abcdefghiabc', -1, 'def', 4)
test('rfind', 'abcdefghiabc', 9, 'abc') test('rfind', 'abcdefghiabc', 9, 'abc')
test('lower', 'HeLLo', 'hello') test('lower', 'HeLLo', 'hello')
test('lower', 'hello', 'hello')
test('upper', 'HeLLo', 'HELLO') test('upper', 'HeLLo', 'HELLO')
test('upper', 'HELLO', 'HELLO')
test('title', ' hello ', ' Hello ')
test('title', 'hello ', 'Hello ')
test('title', "fOrMaT thIs aS titLe String", 'Format This As Title String')
test('title', "fOrMaT,thIs-aS*titLe;String", 'Format,This-As*Title;String')
test('title', "getInt", 'Getint')
test('expandtabs', 'abc\rab\tdef\ng\thi', 'abc\rab def\ng hi')
test('expandtabs', 'abc\rab\tdef\ng\thi', 'abc\rab def\ng hi', 8)
test('expandtabs', 'abc\rab\tdef\ng\thi', 'abc\rab def\ng hi', 4)
test('expandtabs', 'abc\r\nab\tdef\ng\thi', 'abc\r\nab def\ng hi', 4)
test('islower', 'a', 1)
test('islower', 'A', 0)
test('islower', '\n', 0)
test('islower', 'abc', 1)
test('islower', 'aBc', 0)
test('islower', 'abc\n', 1)
test('isupper', 'a', 0)
test('isupper', 'A', 1)
test('isupper', '\n', 0)
test('isupper', 'ABC', 1)
test('isupper', 'AbC', 0)
test('isupper', 'ABC\n', 1)
test('istitle', 'a', 0)
test('istitle', 'A', 1)
test('istitle', '\n', 0)
test('istitle', 'A Titlecased Line', 1)
test('istitle', 'A\nTitlecased Line', 1)
test('istitle', 'A Titlecased, Line', 1)
test('istitle', 'Not a capitalized String', 0)
test('istitle', 'Not\ta Titlecase String', 0)
test('istitle', 'Not--a Titlecase String', 0)
test('splitlines', "abc\ndef\n\rghi", ['abc', 'def', '', 'ghi'])
test('splitlines', "abc\ndef\n\r\nghi", ['abc', 'def', '', 'ghi'])
test('splitlines', "abc\ndef\r\nghi", ['abc', 'def', 'ghi'])
test('splitlines', "abc\ndef\r\nghi\n", ['abc', 'def', 'ghi'])
test('splitlines', "abc\ndef\r\nghi\n\r", ['abc', 'def', 'ghi', ''])
test('splitlines', "\nabc\ndef\r\nghi\n\r", ['', 'abc', 'def', 'ghi', ''])
test('splitlines', "\nabc\ndef\r\nghi\n\r", ['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], 1)
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
...@@ -49,8 +105,9 @@ test('split', 'a b c d', ['a', 'b c d'], None, 1) ...@@ -49,8 +105,9 @@ test('split', 'a b c d', ['a', 'b c d'], None, 1)
test('split', 'a b c d', ['a', 'b', 'c d'], None, 2) test('split', 'a b c d', ['a', 'b', 'c d'], None, 2)
test('split', 'a b c d', ['a', 'b', 'c', 'd'], None, 3) test('split', 'a b c d', ['a', 'b', 'c', 'd'], None, 3)
test('split', 'a b c d', ['a', 'b', 'c', 'd'], None, 4) test('split', 'a b c d', ['a', 'b', 'c', 'd'], None, 4)
test('split', 'a b c d', ['a', 'b', 'c', 'd'], None, 0) test('split', 'a b c d', ['a b c d'], None, 0)
test('split', 'a b c d', ['a', 'b', 'c d'], None, 2) test('split', 'a b c d', ['a', 'b', 'c d'], None, 2)
test('split', 'a b c d ', ['a', 'b', 'c', 'd'])
# join now works with any sequence type # join now works with any sequence type
class Sequence: class Sequence:
...@@ -61,27 +118,68 @@ class Sequence: ...@@ -61,27 +118,68 @@ class Sequence:
test('join', ['a', 'b', 'c', 'd'], 'a b c d') test('join', ['a', 'b', 'c', 'd'], 'a b c d')
test('join', ('a', 'b', 'c', 'd'), 'abcd', '') test('join', ('a', 'b', 'c', 'd'), 'abcd', '')
test('join', Sequence(), 'w x y z') test('join', Sequence(), 'w x y z')
test('join', 7, TypeError)
class BadSeq(Sequence):
def __init__(self): self.seq = [7, 'hello', 123L]
test('join', BadSeq(), TypeError)
# try a few long ones # try a few long ones
print strop.join(['x' * 100] * 100, ':') print string.join(['x' * 100] * 100, ':')
print strop.join(('x' * 100,) * 100, ':') print string.join(('x' * 100,) * 100, ':')
test('strip', ' hello ', 'hello') test('strip', ' hello ', 'hello')
test('lstrip', ' hello ', 'hello ') test('lstrip', ' hello ', 'hello ')
test('rstrip', ' hello ', ' hello') test('rstrip', ' hello ', ' hello')
test('strip', 'hello', 'hello')
test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS') test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS')
test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def') test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def')
table = string.maketrans('a', 'A')
test('translate', 'abc', 'Abc', table)
test('translate', 'xyz', 'xyz', table)
test('replace', 'one!two!three!', 'one@two!three!', '!', '@', 1) test('replace', 'one!two!three!', 'one@two!three!', '!', '@', 1)
test('replace', 'one!two!three!', 'onetwothree', '!', '')
test('replace', 'one!two!three!', 'one@two@three!', '!', '@', 2) test('replace', 'one!two!three!', 'one@two@three!', '!', '@', 2)
test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 3) test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 3)
test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 4) test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 4)
test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 0) test('replace', 'one!two!three!', 'one!two!three!', '!', '@', 0)
test('replace', 'one!two!three!', 'one@two@three@', '!', '@') test('replace', 'one!two!three!', 'one@two@three@', '!', '@')
test('replace', 'one!two!three!', 'one!two!three!', 'x', '@') test('replace', 'one!two!three!', 'one!two!three!', 'x', '@')
test('replace', 'one!two!three!', 'one!two!three!', 'x', '@', 2) test('replace', 'one!two!three!', 'one!two!three!', 'x', '@', 2)
strop.whitespace test('startswith', 'hello', 1, 'he')
strop.lowercase test('startswith', 'hello', 1, 'hello')
strop.uppercase test('startswith', 'hello', 0, 'hello world')
test('startswith', 'hello', 1, '')
test('startswith', 'hello', 0, 'ello')
test('startswith', 'hello', 1, 'ello', 1)
test('startswith', 'hello', 1, 'o', 4)
test('startswith', 'hello', 0, 'o', 5)
test('startswith', 'hello', 1, '', 5)
test('startswith', 'hello', 0, 'lo', 6)
test('startswith', 'helloworld', 1, 'lowo', 3)
test('startswith', 'helloworld', 1, 'lowo', 3, 7)
test('startswith', 'helloworld', 0, 'lowo', 3, 6)
test('endswith', 'hello', 1, 'lo')
test('endswith', 'hello', 0, 'he')
test('endswith', 'hello', 1, '')
test('endswith', 'hello', 0, 'hello world')
test('endswith', 'helloworld', 0, 'worl')
test('endswith', 'helloworld', 1, 'worl', 3, 9)
test('endswith', 'helloworld', 1, 'world', 3, 12)
test('endswith', 'helloworld', 1, 'lowo', 1, 7)
test('endswith', 'helloworld', 1, 'lowo', 2, 7)
test('endswith', 'helloworld', 1, 'lowo', 3, 7)
test('endswith', 'helloworld', 0, 'lowo', 4, 7)
test('endswith', 'helloworld', 0, 'lowo', 3, 8)
test('endswith', 'ab', 0, 'ab', 0, 1)
test('endswith', 'ab', 0, 'ab', 0, 0)
string.whitespace
string.lowercase
string.uppercase
import time from test_support import verbose
import timing
time.altzone r = range(100000)
time.clock() if verbose:
t = time.time() print 'starting...'
time.asctime(time.gmtime(t)) timing.start()
if time.ctime(t) <> time.asctime(time.localtime(t)): for i in r:
print 'time.ctime(t) <> time.asctime(time.localtime(t))'
time.daylight
if long(time.mktime(time.localtime(t))) <> long(t):
print 'time.mktime(time.localtime(t)) <> t'
time.sleep(1.2)
tt = time.gmtime(t)
for directive in ('a', 'A', 'b', 'B', 'c', 'd', 'H', 'I',
'j', 'm', 'M', 'p', 'S',
'U', 'w', 'W', 'x', 'X', 'y', 'Y', 'Z', '%'):
format = ' %' + directive
try:
time.strftime(format, tt)
except ValueError:
print 'conversion specifier:', format, ' failed.'
time.timezone
time.tzname
# expected errors
try:
time.asctime(0)
except TypeError:
pass pass
timing.finish()
if verbose:
print 'finished'
try: secs = timing.seconds()
time.mktime((999999, 999999, 999999, 999999, milli = timing.milli()
999999, 999999, 999999, 999999, micro = timing.micro()
999999))
except OverflowError: if verbose:
pass print 'seconds:', secs
print 'milli :', milli
print 'micro :', micro
""" Test script for the Unicode implementation. """ Test script for the unicodedata module.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
""" """#"
from test_support import verbose from test_support import verbose
import sys import sys
def test(method, input, output, *args): # Test Unicode database APIs
if verbose: import unicodedata
print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
try:
f = getattr(input, method)
value = apply(f, args)
except:
value = sys.exc_type
exc = sys.exc_info()[:2]
else:
exc = None
if value != output:
if verbose:
print 'no'
print '*',f, `input`, `output`, `value`
if exc:
print ' value == %s: %s' % (exc)
else:
if verbose:
print 'yes'
test('capitalize', u' hello ', u' hello ') print 'Testing unicodedata module...',
test('capitalize', u'hello ', u'Hello ')
test('title', u' hello ', u' Hello ') assert unicodedata.digit(u'A',None) is None
test('title', u'hello ', u'Hello ') assert unicodedata.digit(u'9') == 9
test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String') assert unicodedata.digit(u'\u215b',None) is None
test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String') assert unicodedata.digit(u'\u2468') == 9
test('title', u"getInt", u'Getint')
test('find', u'abcdefghiabc', 0, u'abc') assert unicodedata.numeric(u'A',None) is None
test('find', u'abcdefghiabc', 9, u'abc', 1) assert unicodedata.numeric(u'9') == 9
test('find', u'abcdefghiabc', -1, u'def', 4) assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0
test('rfind', u'abcdefghiabc', 9, u'abc') assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None
test('lower', u'HeLLo', u'hello') assert unicodedata.category(u'\uFFFE') == 'Cn'
test('lower', u'hello', u'hello') assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'
test('upper', u'HeLLo', u'HELLO') assert unicodedata.bidirectional(u'\uFFFE') == ''
test('upper', u'HELLO', u'HELLO') assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'
if 0: assert unicodedata.decomposition(u'\uFFFE') == ''
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
test('maketrans', u'abc', transtable, u'xyz') assert unicodedata.mirrored(u'\uFFFE') == 0
test('maketrans', u'abc', ValueError, u'xyzq') assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1
test('split', u'this is the split function', assert unicodedata.combining(u'\uFFFE') == 0
[u'this', u'is', u'the', u'split', u'function']) assert unicodedata.combining(u'a') == 0
test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|') assert unicodedata.combining(u'\u20e1') == 230
test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
test('split', u'a b c d', [u'a', u'b c d'], None, 1)
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
test('split', u'a b c d', [u'a b c d'], None, 0)
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
# join now works with any sequence type
class Sequence:
def __init__(self): self.seq = 'wxyz'
def __len__(self): return len(self.seq)
def __getitem__(self, i): return self.seq[i]
test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
test('join', u' ', u'w x y z', Sequence())
test('join', u' ', TypeError, 7)
class BadSeq(Sequence):
def __init__(self): self.seq = [7, u'hello', 123L]
test('join', u' ', TypeError, BadSeq())
result = u''
for i in range(10):
if i > 0:
result = result + u':'
result = result + u'x'*10
test('join', u':', result, [u'x' * 10] * 10)
test('join', u':', result, (u'x' * 10,) * 10)
test('strip', u' hello ', u'hello')
test('lstrip', u' hello ', u'hello ')
test('rstrip', u' hello ', u' hello')
test('strip', u'hello', u'hello')
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
if 0:
test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
table = string.maketrans('a', u'A')
test('translate', u'abc', u'Abc', table)
test('translate', u'xyz', u'xyz', table)
test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
test('replace', u'one!two!three!', u'onetwothree', '!', '')
test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
test('startswith', u'hello', 1, u'he')
test('startswith', u'hello', 1, u'hello')
test('startswith', u'hello', 0, u'hello world')
test('startswith', u'hello', 1, u'')
test('startswith', u'hello', 0, u'ello')
test('startswith', u'hello', 1, u'ello', 1)
test('startswith', u'hello', 1, u'o', 4)
test('startswith', u'hello', 0, u'o', 5)
test('startswith', u'hello', 1, u'', 5)
test('startswith', u'hello', 0, u'lo', 6)
test('startswith', u'helloworld', 1, u'lowo', 3)
test('startswith', u'helloworld', 1, u'lowo', 3, 7)
test('startswith', u'helloworld', 0, u'lowo', 3, 6)
test('endswith', u'hello', 1, u'lo')
test('endswith', u'hello', 0, u'he')
test('endswith', u'hello', 1, u'')
test('endswith', u'hello', 0, u'hello world')
test('endswith', u'helloworld', 0, u'worl')
test('endswith', u'helloworld', 1, u'worl', 3, 9)
test('endswith', u'helloworld', 1, u'world', 3, 12)
test('endswith', u'helloworld', 1, u'lowo', 1, 7)
test('endswith', u'helloworld', 1, u'lowo', 2, 7)
test('endswith', u'helloworld', 1, u'lowo', 3, 7)
test('endswith', u'helloworld', 0, u'lowo', 4, 7)
test('endswith', u'helloworld', 0, u'lowo', 3, 8)
test('endswith', u'ab', 0, u'ab', 0, 1)
test('endswith', u'ab', 0, u'ab', 0, 0)
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
if 0:
test('capwords', u'abc def ghi', u'Abc Def Ghi')
test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
# Comparisons:
print 'Testing Unicode comparisons...',
assert u'abc' == 'abc'
assert 'abc' == u'abc'
assert u'abc' == u'abc'
assert u'abcd' > 'abc'
assert 'abcd' > u'abc'
assert u'abcd' > u'abc'
assert u'abc' < 'abcd'
assert 'abc' < u'abcd'
assert u'abc' < u'abcd'
print 'done.'
test('ljust', u'abc', u'abc ', 10)
test('rjust', u'abc', u' abc', 10)
test('center', u'abc', u' abc ', 10)
test('ljust', u'abc', u'abc ', 6)
test('rjust', u'abc', u' abc', 6)
test('center', u'abc', u' abc ', 6)
test('ljust', u'abc', u'abc', 2)
test('rjust', u'abc', u'abc', 2)
test('center', u'abc', u'abc', 2)
test('islower', u'a', 1)
test('islower', u'A', 0)
test('islower', u'\n', 0)
test('islower', u'\u1FFc', 0)
test('islower', u'abc', 1)
test('islower', u'aBc', 0)
test('islower', u'abc\n', 1)
test('isupper', u'a', 0)
test('isupper', u'A', 1)
test('isupper', u'\n', 0)
test('isupper', u'\u1FFc', 0)
test('isupper', u'ABC', 1)
test('isupper', u'AbC', 0)
test('isupper', u'ABC\n', 1)
test('istitle', u'a', 0)
test('istitle', u'A', 1)
test('istitle', u'\n', 0)
test('istitle', u'\u1FFc', 1)
test('istitle', u'A Titlecased Line', 1)
test('istitle', u'A\nTitlecased Line', 1)
test('istitle', u'A Titlecased, Line', 1)
test('istitle', u'Greek \u1FFcitlecases ...', 1)
test('istitle', u'Not a capitalized String', 0)
test('istitle', u'Not\ta Titlecase String', 0)
test('istitle', u'Not--a Titlecase String', 0)
test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
test('translate', u"abababc", u'bbbc', {ord('a'):None})
test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
# Contains:
print 'Testing Unicode contains method...',
assert ('a' in u'abdb') == 1
assert ('a' in u'bdab') == 1
assert ('a' in u'bdaba') == 1
assert ('a' in u'bdba') == 1
assert ('a' in u'bdba') == 1
assert (u'a' in u'bdba') == 1
assert (u'a' in u'bdb') == 0
assert (u'a' in 'bdb') == 0
assert (u'a' in 'bdba') == 1
assert (u'a' in ('a',1,None)) == 1
assert (u'a' in (1,None,'a')) == 1
assert (u'a' in (1,None,u'a')) == 1
assert ('a' in ('a',1,None)) == 1
assert ('a' in (1,None,'a')) == 1
assert ('a' in (1,None,u'a')) == 1
assert ('a' in ('x',1,u'y')) == 0
assert ('a' in ('x',1,None)) == 0
print 'done.'
# Formatting:
print 'Testing Unicode formatting strings...',
assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
assert u"%c" % (u"abc",) == u'a'
assert u"%c" % ("abc",) == u'a'
assert u"%c" % (34,) == u'"'
assert u"%c" % (36,) == u'$'
assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
assert u"%(x)s, %()s" % {'x':u"abc", u''.encode('utf-8'):"def"} == u'abc, def'
# formatting jobs delegated from the string implementation:
assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
assert '...%s...' % u"abc" == u'...abc...'
print 'done.'
# Test builtin codecs
print 'Testing builtin codecs...',
assert unicode('hello','ascii') == u'hello'
assert unicode('hello','utf-8') == u'hello'
assert unicode('hello','utf8') == u'hello'
assert unicode('hello','latin-1') == u'hello'
try:
u'Andr\202 x'.encode('ascii')
u'Andr\202 x'.encode('ascii','strict')
except ValueError:
pass
else:
raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
try:
unicode('Andr\202 x','ascii')
unicode('Andr\202 x','ascii','strict')
except ValueError:
pass
else:
raise AssertionError, "unicode('Andr\202') failed to raise an exception"
assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
assert u'hello'.encode('ascii') == 'hello'
assert u'hello'.encode('utf-8') == 'hello'
assert u'hello'.encode('utf8') == 'hello'
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
assert u'hello'.encode('latin-1') == 'hello'
u = u''.join(map(unichr, range(1024)))
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
assert unicode(u.encode(encoding),encoding) == u
u = u''.join(map(unichr, range(256)))
for encoding in (
'latin-1',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
u = u''.join(map(unichr, range(128)))
for encoding in (
'ascii',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.'
print 'Testing standard mapping codecs...',
print '0-127...',
s = ''.join(map(chr, range(128)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258',
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
'cp1006', 'cp875', 'iso8859_8',
### These have undefined mappings:
#'cp424',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print '128-255...',
s = ''.join(map(chr, range(128,256)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip:
#'cp1006', 'cp875', 'iso8859_8',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.'
print 'Testing Unicode string concatenation...',
assert (u"abc" u"def") == u"abcdef"
assert ("abc" u"def") == u"abcdef"
assert (u"abc" "def") == u"abcdef"
assert (u"abc" u"def" "ghi") == u"abcdefghi"
assert ("abc" "def" u"ghi") == u"abcdefghi"
print 'done.' print 'done.'
# Check every path through every method of UserDict #!/usr/bin/env python
import sys, string
from UserDict import UserDict from test_support import verbose
# UserString is a wrapper around the native builtin string type.
d0 = {} # UserString instances should behave similar to builtin string objects.
d1 = {"one": 1} # The test cases were in part derived from 'test_string.py'.
d2 = {"one": 1, "two": 2} from UserString import UserString
# Test constructors if __name__ == "__main__":
verbose = 0
u = UserDict()
u0 = UserDict(d0) tested_methods = {}
u1 = UserDict(d1)
u2 = UserDict(d2) def test(methodname, input, *args):
global tested_methods
uu = UserDict(u) tested_methods[methodname] = 1
uu0 = UserDict(u0) if verbose:
uu1 = UserDict(u1) print '%s.%s(%s) ' % (input, methodname, args),
uu2 = UserDict(u2) u = UserString(input)
objects = [input, u, UserString(u)]
# Test __repr__ res = [""] * 3
for i in range(3):
assert str(u0) == str(d0) object = objects[i]
assert repr(u1) == repr(d1) try:
assert `u2` == `d2` f = getattr(object, methodname)
res[i] = apply(f, args)
# Test __cmp__ and __len__ except:
res[i] = sys.exc_type
all = [d0, d1, d2, u, u0, u1, u2, uu, uu0, uu1, uu2] if res[0] != res[1]:
for a in all: if verbose:
for b in all: print 'no'
assert cmp(a, b) == cmp(len(a), len(b)) print `input`, f, `res[0]`, "<>", `res[1]`
else:
# Test __getitem__ if verbose:
print 'yes'
assert u2["one"] == 1 if res[1] != res[2]:
try: if verbose:
u1["two"] print 'no'
except KeyError: print `input`, f, `res[1]`, "<>", `res[2]`
pass else:
else: if verbose:
assert 0, "u1['two'] shouldn't exist" print 'yes'
# Test __setitem__ test('capitalize', ' hello ')
test('capitalize', 'hello ')
u3 = UserDict(u2)
u3["two"] = 2 test('center', 'foo', 0)
u3["three"] = 3 test('center', 'foo', 3)
test('center', 'foo', 16)
# Test __delitem__
test('ljust', 'foo', 0)
del u3["three"] test('ljust', 'foo', 3)
try: test('ljust', 'foo', 16)
del u3["three"]
except KeyError: test('rjust', 'foo', 0)
pass test('rjust', 'foo', 3)
else: test('rjust', 'foo', 16)
assert 0, "u3['three'] shouldn't exist"
test('count', 'abcabcabc', 'abc')
# Test clear test('count', 'abcabcabc', 'abc', 1)
test('count', 'abcabcabc', 'abc', -1)
u3.clear() test('count', 'abcabcabc', 'abc', 7)
assert u3 == {} test('count', 'abcabcabc', 'abc', 0, 3)
test('count', 'abcabcabc', 'abc', 0, 333)
# Test copy()
test('find', 'abcdefghiabc', 'abc')
u2a = u2.copy() test('find', 'abcdefghiabc', 'abc', 1)
assert u2a == u2 test('find', 'abcdefghiabc', 'def', 4)
test('rfind', 'abcdefghiabc', 'abc')
class MyUserDict(UserDict):
def display(self): print self test('index', 'abcabcabc', 'abc')
test('index', 'abcabcabc', 'abc', 1)
m2 = MyUserDict(u2) test('index', 'abcabcabc', 'abc', -1)
m2a = m2.copy() test('index', 'abcabcabc', 'abc', 7)
assert m2a == m2 test('index', 'abcabcabc', 'abc', 0, 3)
test('index', 'abcabcabc', 'abc', 0, 333)
# Test keys, items, values
test('rindex', 'abcabcabc', 'abc')
assert u2.keys() == d2.keys() test('rindex', 'abcabcabc', 'abc', 1)
assert u2.items() == d2.items() test('rindex', 'abcabcabc', 'abc', -1)
assert u2.values() == d2.values() test('rindex', 'abcabcabc', 'abc', 7)
test('rindex', 'abcabcabc', 'abc', 0, 3)
# Test has_key test('rindex', 'abcabcabc', 'abc', 0, 333)
for i in u2.keys():
assert u2.has_key(i) == 1 test('lower', 'HeLLo')
assert u1.has_key(i) == d1.has_key(i) test('lower', 'hello')
assert u0.has_key(i) == d0.has_key(i) test('upper', 'HeLLo')
test('upper', 'HELLO')
# Test update
test('title', ' hello ')
t = UserDict() test('title', 'hello ')
t.update(u2) test('title', "fOrMaT thIs aS titLe String")
assert t == u2 test('title', "fOrMaT,thIs-aS*titLe;String")
test('title', "getInt")
# Test get
test('expandtabs', 'abc\rab\tdef\ng\thi')
for i in u2.keys(): test('expandtabs', 'abc\rab\tdef\ng\thi', 8)
assert u2.get(i) == u2[i] test('expandtabs', 'abc\rab\tdef\ng\thi', 4)
assert u1.get(i) == d1.get(i) test('expandtabs', 'abc\r\nab\tdef\ng\thi', 4)
assert u0.get(i) == d0.get(i)
test('islower', 'a')
test('islower', 'A')
test('islower', '\n')
test('islower', 'abc')
test('islower', 'aBc')
test('islower', 'abc\n')
test('isupper', 'a')
test('isupper', 'A')
test('isupper', '\n')
test('isupper', 'ABC')
test('isupper', 'AbC')
test('isupper', 'ABC\n')
test('isdigit', ' 0123456789')
test('isdigit', '56789')
test('isdigit', '567.89')
test('isdigit', '0123456789abc')
test('isspace', '')
test('isspace', ' ')
test('isspace', ' \t')
test('isspace', ' \t\f\n')
test('istitle', 'a')
test('istitle', 'A')
test('istitle', '\n')
test('istitle', 'A Titlecased Line')
test('istitle', 'A\nTitlecased Line')
test('istitle', 'A Titlecased, Line')
test('istitle', 'Not a capitalized String')
test('istitle', 'Not\ta Titlecase String')
test('istitle', 'Not--a Titlecase String')
test('splitlines', "abc\ndef\n\rghi")
test('splitlines', "abc\ndef\n\r\nghi")
test('splitlines', "abc\ndef\r\nghi")
test('splitlines', "abc\ndef\r\nghi\n")
test('splitlines', "abc\ndef\r\nghi\n\r")
test('splitlines', "\nabc\ndef\r\nghi\n\r")
test('splitlines', "\nabc\ndef\r\nghi\n\r")
test('splitlines', "\nabc\ndef\r\nghi\n\r")
test('split', 'this is the split function')
test('split', 'a|b|c|d', '|')
test('split', 'a|b|c|d', '|', 2)
test('split', 'a b c d', None, 1)
test('split', 'a b c d', None, 2)
test('split', 'a b c d', None, 3)
test('split', 'a b c d', None, 4)
test('split', 'a b c d', None, 0)
test('split', 'a b c d', None, 2)
test('split', 'a b c d ')
# join now works with any sequence type
class Sequence:
def __init__(self): self.seq = 'wxyz'
def __len__(self): return len(self.seq)
def __getitem__(self, i): return self.seq[i]
test('join', '', ('a', 'b', 'c', 'd'))
test('join', '', Sequence())
test('join', '', 7)
class BadSeq(Sequence):
def __init__(self): self.seq = [7, 'hello', 123L]
test('join', '', BadSeq())
test('strip', ' hello ')
test('lstrip', ' hello ')
test('rstrip', ' hello ')
test('strip', 'hello')
test('swapcase', 'HeLLo cOmpUteRs')
transtable = string.maketrans("abc", "xyz")
test('translate', 'xyzabcdef', transtable, 'def')
transtable = string.maketrans('a', 'A')
test('translate', 'abc', transtable)
test('translate', 'xyz', transtable)
test('replace', 'one!two!three!', '!', '@', 1)
test('replace', 'one!two!three!', '!', '')
test('replace', 'one!two!three!', '!', '@', 2)
test('replace', 'one!two!three!', '!', '@', 3)
test('replace', 'one!two!three!', '!', '@', 4)
test('replace', 'one!two!three!', '!', '@', 0)
test('replace', 'one!two!three!', '!', '@')
test('replace', 'one!two!three!', 'x', '@')
test('replace', 'one!two!three!', 'x', '@', 2)
test('startswith', 'hello', 'he')
test('startswith', 'hello', 'hello')
test('startswith', 'hello', 'hello world')
test('startswith', 'hello', '')
test('startswith', 'hello', 'ello')
test('startswith', 'hello', 'ello', 1)
test('startswith', 'hello', 'o', 4)
test('startswith', 'hello', 'o', 5)
test('startswith', 'hello', '', 5)
test('startswith', 'hello', 'lo', 6)
test('startswith', 'helloworld', 'lowo', 3)
test('startswith', 'helloworld', 'lowo', 3, 7)
test('startswith', 'helloworld', 'lowo', 3, 6)
test('endswith', 'hello', 'lo')
test('endswith', 'hello', 'he')
test('endswith', 'hello', '')
test('endswith', 'hello', 'hello world')
test('endswith', 'helloworld', 'worl')
test('endswith', 'helloworld', 'worl', 3, 9)
test('endswith', 'helloworld', 'world', 3, 12)
test('endswith', 'helloworld', 'lowo', 1, 7)
test('endswith', 'helloworld', 'lowo', 2, 7)
test('endswith', 'helloworld', 'lowo', 3, 7)
test('endswith', 'helloworld', 'lowo', 4, 7)
test('endswith', 'helloworld', 'lowo', 3, 8)
test('endswith', 'ab', 'ab', 0, 1)
test('endswith', 'ab', 'ab', 0, 0)
# TODO: test cases for: int, long, float, complex, +, * and cmp
s = ""
for builtin_method in dir(s):
if not tested_methods.has_key(builtin_method):
print "no regression test case for method '"+builtin_method+"'"
# Test the windows specific win32reg module. # Ridiculously simple test of the winsound module for Windows.
# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey
from winreg import * import winsound
import os, sys for i in range(100, 2000, 100):
winsound.Beep(i, 75)
test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me" print "Hopefully you heard some sounds increasing in frequency!"
test_data = [
("Int Value", 45, REG_DWORD),
("String Val", "A string value", REG_SZ,),
(u"Unicode Val", u"A Unicode value", REG_SZ,),
("StringExpand", "The path is %path%", REG_EXPAND_SZ),
("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ),
("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ),
("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ),
("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ),
("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY),
]
def WriteTestData(root_key):
# Set the default value for this key.
SetValue(root_key, test_key_name, REG_SZ, "Default value")
key = CreateKey(root_key, test_key_name)
# Create a sub-key
sub_key = CreateKey(key, "sub_key")
# Give the sub-key some named values
for value_name, value_data, value_type in test_data:
SetValueEx(sub_key, value_name, 0, value_type, value_data)
# Check we wrote as many items as we thought.
nkeys, nvalues, since_mod = QueryInfoKey(key)
assert nkeys==1, "Not the correct number of sub keys"
assert nvalues==1, "Not the correct number of values"
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
assert nkeys==0, "Not the correct number of sub keys"
assert nvalues==len(test_data), "Not the correct number of values"
# Close this key this way...
# (but before we do, copy the key as an integer - this allows
# us to test that the key really gets closed).
int_sub_key = int(sub_key)
CloseKey(sub_key)
try:
QueryInfoKey(int_sub_key)
raise RuntimeError, "It appears the CloseKey() function does not close the actual key!"
except EnvironmentError:
pass
# ... and close that key that way :-)
int_key = int(key)
key.Close()
try:
QueryInfoKey(int_key)
raise RuntimeError, "It appears the key.Close() function does not close the actual key!"
except EnvironmentError:
pass
def ReadTestData(root_key):
# Check we can get default value for this key.
val = QueryValue(root_key, test_key_name)
assert val=="Default value", "Registry didnt give back the correct value"
key = OpenKey(root_key, test_key_name)
# Read the sub-keys
sub_key = OpenKey(key, "sub_key")
# Check I can enumerate over the values.
index = 0
while 1:
try:
data = EnumValue(sub_key, index)
except EnvironmentError:
break
assert data in test_data, "didnt read back the correct test data."
index = index + 1
assert index==len(test_data), "Didnt read the correct number of items"
# Check I can directly access each item
for value_name, value_data, value_type in test_data:
read_val, read_typ = QueryValueEx(sub_key, value_name)
assert read_val==value_data and read_typ == value_type, \
"Could not directly read the value"
sub_key.Close()
# Enumerate our main key.
read_val = EnumKey(key, 0)
assert read_val == "sub_key", "Read subkey value wrong"
try:
EnumKey(key, 1)
assert 0, "Was able to get a second key when I only have one!"
except EnvironmentError:
pass
key.Close()
def DeleteTestData(root_key):
key = OpenKey(root_key, test_key_name, 0, KEY_ALL_ACCESS)
sub_key = OpenKey(key, "sub_key", 0, KEY_ALL_ACCESS)
# It is not necessary to delete the values before deleting
# the key (although subkeys must not exist). We delete them
# manually just to prove we can :-)
for value_name, value_data, value_type in test_data:
DeleteValue(sub_key, value_name)
nkeys, nvalues, since_mod = QueryInfoKey(sub_key)
assert nkeys==0 and nvalues==0, "subkey not empty before delete"
sub_key.Close()
DeleteKey(key, "sub_key")
try:
# Shouldnt be able to delete it twice!
DeleteKey(key, "sub_key")
assert 0, "Deleting the key twice succeeded"
except EnvironmentError:
pass
key.Close()
DeleteKey(root_key, test_key_name)
# Opening should now fail!
try:
key = OpenKey(root_key, test_key_name)
assert 0, "Could open the non-existent key"
except WindowsError: # Use this error name this time
pass
def TestAll(root_key):
WriteTestData(root_key)
ReadTestData(root_key)
DeleteTestData(root_key)
# Test on my local machine.
TestAll(HKEY_CURRENT_USER)
print "Local registry tests worked"
try:
remote_name = sys.argv[sys.argv.index("--remote")+1]
except (IndexError, ValueError):
remote_name = None
if remote_name is not None:
try:
remote_key = ConnectRegistry(remote_name, HKEY_CURRENT_USER)
except EnvironmentError, exc:
print "Could not connect to the remote machine -", exc.strerror
remote_key = None
if remote_key is not None:
TestAll(remote_key)
print "Remote registry tests worked"
else:
print "Remote registry calls can be tested using",
print "'test_winreg.py --remote \\\\machine_name'"
...@@ -11,7 +11,7 @@ except ImportError: ...@@ -11,7 +11,7 @@ except ImportError:
buf = file.read() * 8 buf = file.read() * 8
file.close() file.close()
# test the chucksums (hex so the test doesn't break on 64-bit machines) # test the checksums (hex so the test doesn't break on 64-bit machines)
print hex(zlib.crc32('penguin')), hex(zlib.crc32('penguin', 1)) print hex(zlib.crc32('penguin')), hex(zlib.crc32('penguin', 1))
print hex(zlib.adler32('penguin')), hex(zlib.adler32('penguin', 1)) print hex(zlib.adler32('penguin')), hex(zlib.adler32('penguin', 1))
......
...@@ -18,6 +18,7 @@ del time ...@@ -18,6 +18,7 @@ del time
_start_new_thread = thread.start_new_thread _start_new_thread = thread.start_new_thread
_allocate_lock = thread.allocate_lock _allocate_lock = thread.allocate_lock
_get_ident = thread.get_ident _get_ident = thread.get_ident
ThreadError = thread.error
del thread del thread
_print_exc = traceback.print_exc _print_exc = traceback.print_exc
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment