Commit 31961621 authored by Barry Warsaw's avatar Barry Warsaw

Two changes:

- All constructors grow an optional argument `factory' which is a
  callable used when new message instances are created by the next()
  methods.  Defaults to the rfc822.Message class.

- A new subclass of UnixMailbox is added, called PortableUnixMailbox.
  It's identical to UnixMailbox, but uses a more portable test for
  From_ delimiter lines.  With PortableUnixMailbox, any line that
  starts with "From " is considered a delimiter (this should really
  check for two newlines before the F, but it doesn't.
parent ba7d73a3
...@@ -9,9 +9,10 @@ import os ...@@ -9,9 +9,10 @@ import os
__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"] __all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
class _Mailbox: class _Mailbox:
def __init__(self, fp): def __init__(self, fp, factory=rfc822.Message):
self.fp = fp self.fp = fp
self.seekp = 0 self.seekp = 0
self.factory = factory
def seek(self, pos, whence=0): def seek(self, pos, whence=0):
if whence==1: # Relative to current position if whence==1: # Relative to current position
...@@ -34,7 +35,7 @@ class _Mailbox: ...@@ -34,7 +35,7 @@ class _Mailbox:
self.seekp = stop = self.fp.tell() self.seekp = stop = self.fp.tell()
if start != stop: if start != stop:
break break
return rfc822.Message(_Subfile(self.fp, start, stop)) return self.factory(_Subfile(self.fp, start, stop))
class _Subfile: class _Subfile:
...@@ -117,22 +118,50 @@ class UnixMailbox(_Mailbox): ...@@ -117,22 +118,50 @@ class UnixMailbox(_Mailbox):
self.fp.seek(pos) self.fp.seek(pos)
return return
# An overridable mechanism to test for From-line-ness. # An overridable mechanism to test for From-line-ness. You can either
# You can either specify a different regular expression # specify a different regular expression or define a whole new
# or define a whole new _isrealfromline() method. # _isrealfromline() method. Note that this only gets called for lines
# Note that this only gets called for lines starting with # starting with the 5 characters "From ".
# the 5 characters "From ". #
# BAW: According to
#http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
# the only portable, reliable way to find message delimiters in a BSD (i.e
# Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
# beginning of the file, "^From .*\n". While _fromlinepattern below seems
# like a good idea, in practice, there are too many variations for more
# strict parsing of the line to be completely accurate.
#
# _strict_isrealfromline() is the old version which tries to do stricter
# parsing of the From_ line. _portable_isrealfromline() simply returns
# true, since it's never called if the line doesn't already start with
# "From ".
#
# This algorithm, and the way it interacts with _search_start() and
# _search_end() may not be completely correct, because it doesn't check
# that the two characters preceding "From " are \n\n or the beginning of
# the file. Fixing this would require a more extensive rewrite than is
# necessary. For convenience, we've added a StrictUnixMailbox class which
# uses the older, more strict _fromlinepattern regular expression.
_fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \ _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$" r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
_regexp = None _regexp = None
def _isrealfromline(self, line): def _strict_isrealfromline(self, line):
if not self._regexp: if not self._regexp:
import re import re
self._regexp = re.compile(self._fromlinepattern) self._regexp = re.compile(self._fromlinepattern)
return self._regexp.match(line) return self._regexp.match(line)
def _portable_isrealfromline(self, line):
return 1
_isrealfromline = _strict_isrealfromline
class PortableUnixMailbox(UnixMailbox):
_isrealfromline = UnixMailbox._portable_isrealfromline
class MmdfMailbox(_Mailbox): class MmdfMailbox(_Mailbox):
def _search_start(self): def _search_start(self):
...@@ -155,7 +184,7 @@ class MmdfMailbox(_Mailbox): ...@@ -155,7 +184,7 @@ class MmdfMailbox(_Mailbox):
class MHMailbox: class MHMailbox:
def __init__(self, dirname): def __init__(self, dirname, factory=rfc822.Message):
import re import re
pat = re.compile('^[1-9][0-9]*$') pat = re.compile('^[1-9][0-9]*$')
self.dirname = dirname self.dirname = dirname
...@@ -168,6 +197,7 @@ class MHMailbox: ...@@ -168,6 +197,7 @@ class MHMailbox:
# This only works in Python 1.6 or later; # This only works in Python 1.6 or later;
# before that str() added 'L': # before that str() added 'L':
self.boxes = map(str, list) self.boxes = map(str, list)
self.factory = factory
def next(self): def next(self):
if not self.boxes: if not self.boxes:
...@@ -175,14 +205,15 @@ class MHMailbox: ...@@ -175,14 +205,15 @@ class MHMailbox:
fn = self.boxes[0] fn = self.boxes[0]
del self.boxes[0] del self.boxes[0]
fp = open(os.path.join(self.dirname, fn)) fp = open(os.path.join(self.dirname, fn))
return rfc822.Message(fp) return self.factory(fp)
class Maildir: class Maildir:
# Qmail directory mailbox # Qmail directory mailbox
def __init__(self, dirname): def __init__(self, dirname, factory=rfc822.Message):
self.dirname = dirname self.dirname = dirname
self.factory = factory
# check for new mail # check for new mail
newdir = os.path.join(self.dirname, 'new') newdir = os.path.join(self.dirname, 'new')
...@@ -202,7 +233,7 @@ class Maildir: ...@@ -202,7 +233,7 @@ class Maildir:
fn = self.boxes[0] fn = self.boxes[0]
del self.boxes[0] del self.boxes[0]
fp = open(fn) fp = open(fn)
return rfc822.Message(fp) return self.factory(fp)
class BabylMailbox(_Mailbox): class BabylMailbox(_Mailbox):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment