Added the -D/--docstrings option for extraction of unmarked module,

class, method, and function docstrings.

Added the -D/--docstrings option for extraction of unmarked module,
class, method, and function docstrings.
08a8a355 · Barry Warsaw · 3aecfc96 · 08a8a355
Commit 08a8a355 authored Oct 27, 2000 by Barry Warsaw
Hide whitespace changes
Inline Side-by-side

Showing with 82 additions and 40 deletions

Tools/i18n/pygettext.py Tools/i18n/pygettext.py +82 -40

No files found.
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -4,15 +4,7 @@
 # minimally patched to make it even more xgettext compatible 
 # by Peter Funk <pf@artcom-gmbh.de>
-# for selftesting
+"""pygettext -- Python equivalent of xgettext(1)
-try:
-    import fintl
-    _ = fintl.gettext
-except ImportError:
-    def _(s): return s
-__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
 internationalization of C programs.  Most of these tools are independent of
@@ -65,7 +57,13 @@ Options:
    -E
    --escape
-        replace non-ASCII characters with octal escape sequences.
+        Replace non-ASCII characters with octal escape sequences.
+    -D
+    --docstrings
+        Extract module, class, method, and function docstrings.  These do not
+        need to be wrapped in _() markers, and in fact cannot be for Python to
+        consider them docstrings.
    -h
    --help
@@ -93,6 +91,15 @@ Options:
        each msgid.  The style of comments is controlled by the -S/--style
        option.  This is the default.
+    -o filename
+    --output=filename
+        Rename the default output file from messages.pot to filename.  If
+        filename is `-' then the output is sent to standard out.
+    -p dir
+    --output-dir=dir
+        Output files will be placed in directory dir.
    -S stylename
    --style stylename
        Specify which style to use for location comments.  Two styles are
@@ -103,15 +110,6 @@ Options:
        The style name is case insensitive.  GNU style is the default.
-    -o filename
-    --output=filename
-        Rename the default output file from messages.pot to filename.  If
-        filename is `-' then the output is sent to standard out.
-    -p dir
-    --output-dir=dir
-        Output files will be placed in directory dir.
    -v
    --verbose
        Print the names of the files being processed.
@@ -132,7 +130,7 @@ Options:
 If `inputfile' is -, standard input is read.
-""")
+"""
 import os
 import sys
@@ -140,7 +138,14 @@ import time
 import getopt
 import tokenize
-__version__ = '1.1'
+# for selftesting
+try:
+    import fintl
+    _ = fintl.gettext
+except ImportError:
+    def _(s): return s
+__version__ = '1.2'
 default_keywords = ['_']
 DEFAULTKEYWORDS = ', '.join(default_keywords)
@@ -171,9 +176,9 @@ msgstr ""
 def usage(code, msg=''):
-    print __doc__ % globals()
+    print >> sys.stderr, _(__doc__) % globals()
    if msg:
-        print msg
+        print >> sys.stderr, msg
    sys.exit(code)
@@ -239,15 +244,48 @@ class TokenEater:
        self.__state = self.__waiting
        self.__data = []
        self.__lineno = -1
+        self.__freshmodule = 1
    def __call__(self, ttype, tstring, stup, etup, line):
        # dispatch
+##        import token
+##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+##              'tstring:', tstring
        self.__state(ttype, tstring, stup[0])
    def __waiting(self, ttype, tstring, lineno):
+        # Do docstring extractions, if enabled
+        if self.__options.docstrings:
+            # module docstring?
+            if self.__freshmodule:
+                if ttype == tokenize.STRING:
+                    self.__addentry(safe_eval(tstring), lineno)
+                    self.__freshmodule = 0
+                elif ttype not in (tokenize.COMMENT, tokenize.NL):
+                    self.__freshmodule = 0
+                return
+            # class docstring?
+            if ttype == tokenize.NAME and tstring in ('class', 'def'):
+                self.__state = self.__suiteseen
+                return
        if ttype == tokenize.NAME and tstring in self.__options.keywords:
            self.__state = self.__keywordseen
+    def __suiteseen(self, ttype, tstring, lineno):
+        # ignore anything until we see the colon
+        if ttype == tokenize.OP and tstring == ':':
+            self.__state = self.__suitedocstring
+    def __suitedocstring(self, ttype, tstring, lineno):
+        # ignore any intervening noise
+        if ttype == tokenize.STRING:
+            self.__addentry(safe_eval(tstring), lineno)
+            self.__state = self.__waiting
+        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
+                           tokenize.COMMENT):
+            # there was no class docstring
+            self.__state = self.__waiting
    def __keywordseen(self, ttype, tstring, lineno):
        if ttype == tokenize.OP and tstring == '(':
            self.__data = []
@@ -263,29 +301,28 @@ class TokenEater:
            # of messages seen.  Reset state for the next batch.  If there
            # were no strings inside _(), then just ignore this entry.
            if self.__data:
-                msg = EMPTYSTRING.join(self.__data)
+                self.__addentry(EMPTYSTRING.join(self.__data))
-                if not msg in self.__options.toexclude:
-                    entry = (self.__curfile, self.__lineno)
-                    linenos = self.__messages.get(msg)
-                    if linenos is None:
-                        self.__messages[msg] = [entry]
-                    else:
-                        linenos.append(entry)
            self.__state = self.__waiting
        elif ttype == tokenize.STRING:
            self.__data.append(safe_eval(tstring))
        # TBD: should we warn if we seen anything else?
+    def __addentry(self, msg, lineno=None):
+        if lineno is None:
+            lineno = self.__lineno
+        if not msg in self.__options.toexclude:
+            entry = (self.__curfile, lineno)
+            self.__messages.setdefault(msg, []).append(entry)
    def set_filename(self, filename):
        self.__curfile = filename
    def write(self, fp):
        options = self.__options
        timestamp = time.ctime(time.time())
-        # common header
+        # The time stamp in the header doesn't have the same format as that
-        # The time stamp in the header doesn't have the same format
+        # generated by xgettext...
-        # as that generated by xgettext...
+        print >> fp, pot_header % {'time': timestamp, 'version': __version__}
-        print >>fp, pot_header % {'time': timestamp, 'version': __version__}
        for k, v in self.__messages.items():
            if not options.writelocations:
                pass
@@ -304,13 +341,14 @@ class TokenEater:
                    if len(locline) + len(s) <= options.width:
                        locline = locline + s
                    else:
-                        print >>fp, locline
+                        print >> fp, locline
                        locline = "#:" + s
                if len(locline) > 2:
-                    print >>fp, locline
+                    print >> fp, locline
            # TBD: sorting, normalizing
-            print >>fp, 'msgid', normalize(k)
+            print >> fp, 'msgid', normalize(k)
-            print >>fp, 'msgstr ""\n'
+            print >> fp, 'msgstr ""\n'
 def main():
@@ -318,11 +356,12 @@ def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
-            'ad:Ehk:Kno:p:S:Vvw:x:',
+            'ad:DEhk:Kno:p:S:Vvw:x:',
            ['extract-all', 'default-domain', 'escape', 'help',
             'keyword=', 'no-default-keywords',
             'add-location', 'no-location', 'output=', 'output-dir=',
             'style=', 'verbose', 'version', 'width=', 'exclude-file=',
+             'docstrings',
             ])
    except getopt.error, msg:
        usage(1, msg)
@@ -343,6 +382,7 @@ def main():
        verbose = 0
        width = 78
        excludefilename = ''
+        docstrings = 0
    options = Options()
    locations = {'gnu' : options.GNU,
@@ -359,6 +399,8 @@ def main():
            options.outfile = arg + '.pot'
        elif opt in ('-E', '--escape'):
            options.escape = 1
+        elif opt in ('-D', '--docstrings'):
+            options.docstrings = 1
        elif opt in ('-k', '--keyword'):
            options.keywords.append(arg)
        elif opt in ('-K', '--no-default-keywords'):