Commit b5ec5e4b authored by Jack Jansen's avatar Jack Jansen

Contributed code that converts Python source files to any combination of tabs/spaces for indenting.

parent f65ab1bf
Original README for Tabcleaner.py
tabcleaner.py is a utility that reformats leading whitespace in a Python source.
It uses tokenize.py (from the std distribution) to detect INDENTs and DEDENTs,
then reformats according to the user's options (tabs-only, spaces-only with
indent size n, or mixed with tab worth m and indent level of n).
Python does not care about the indent of comments and multi-linestrings.
tabcleaner places these at what Python considers the current indentlevel. About
half the time, this is correct; the rest of the time it is probably one indent
level less than what was desired. It is pretty much guaranteed to be
syntactically correct, (earlier versions broke on some triple-quoted strings).
With no args, (or "-h") prints usage text.
Contact: gmcm@hypernet.com
Additional comments: I have made a few slight changes. It was written to take
command line arguments, so that you can set parameters like the size of indents,
and whether you want the result to be all tabs, or all spaces, or a mixture of
both (an evil combination, if you ask me). It is set, be default, to change your
indentation to all tabs.
In the current version of Python, all the code in the standard library is
indented with only spaces. This is a somewhat awkward standard on the mac, so
most MacPython code is indented with only tabs. This script can be used to do any
version, but all tabs is the default, which seems to be the best option for the
Mac.
How to use it on a Mac:
The script is set up to take filenames (or directory names) on the command line.
To simulate this behaviour with MacPython, you can build an applet out of it
(with BuildApplet, which should be in your Python folder). Any files draggged and
dropped onto the resulting applet will be converted to all tabs, with a backup
copy havning been saved.
If you want the script to convert to space based indentation, your best bet is
probably to change the default on line 46 of the file.
-Chris Barker cbarker@jps.net
#!/usr/bin/python
import tokenize
import string
TABSONLY = 'TABSONLY'
SPACESONLY = 'SPACESONLY'
MIXED = 'MIXED'
class PyText:
def __init__(self, fnm, optdict):
self.optdict = optdict
self.fnm = fnm
self.txt = open(self.fnm, 'r').readlines()
self.indents = [(0, 0, )]
self.lnndx = 0
self.indentndx = 0
def getline(self):
if self.lnndx < len(self.txt):
txt = self.txt[self.lnndx]
self.lnndx = self.lnndx + 1
else:
txt = ''
return txt
def tokeneater(self, type, token, start, end, line):
if type == tokenize.INDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl+1, start[0]-1,))
elif type == tokenize.DEDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl-1, start[0]-1,))
elif type == tokenize.ENDMARKER:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, len(self.txt))
def split(self, ln):
content = string.lstrip(ln)
if not content:
return ('', '\n')
lead = ln[:len(ln) - len(content)]
lead = string.expandtabs(lead)
return (lead, content)
def process(self):
style = self.optdict.get('style', TABSONLY)
indent = string.atoi(self.optdict.get('indent', '4'))
tabsz = string.atoi(self.optdict.get('tabs', '8'))
print 'file %s -> style %s, tabsize %d, indent %d' % (self.fnm, style, tabsz, indent)
tokenize.tokenize(self.getline, self.tokeneater)
#import pprint
#pprint.pprint(self.indents)
new = []
for (lvl, s, e) in self.indents:
if s >= len(self.txt):
break
if s == e:
continue
oldlead, content = self.split(self.txt[s])
#print "oldlead", len(oldlead), `oldlead`
if style == TABSONLY:
newlead = '\t'*lvl
elif style == SPACESONLY:
newlead = ' '*(indent*lvl)
else:
sz = indent*lvl
t,spcs = divmod(sz, tabsz)
newlead = '\t'*t + ' '*spcs
new.append(newlead + content)
for ln in self.txt[s+1:e]:
lead, content = self.split(ln)
#print "lead:", len(lead)
new.append(newlead + lead[len(oldlead):] + content)
self.save(new)
#print "---", self.fnm
#for ln in new:
# print ln,
#print
def save(self, txt):
bakname = os.path.splitext(self.fnm)[0]+'.bak'
print "backing up", self.fnm, "to", bakname
#print os.getcwd()
try:
os.rename(self.fnm, bakname)
except os.error:
os.remove(bakname)
os.rename(self.fnm, bakname)
open(self.fnm, 'w').writelines(txt)
def test():
tc = PyText('test1.py')
tc.process()
tc = PyText('test1.py')
tc.process(style=TABSONLY)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=4, tabs=8)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=2, tabs=8)
def cleanfile(fnm, d):
if os.path.isdir(fnm) and not os.path.islink(fnm):
names = os.listdir(fnm)
for name in names:
fullnm = os.path.join(fnm, name)
if (os.path.isdir(fullnm) and not os.path.islink(fullnm)) or \
os.path.normcase(fullnm[-3:]) == ".py":
cleanfile(fullnm, d)
return
tc = PyText(fnm, d)
tc.process()
usage="""\
%s [options] [path...]
options
-T : reformat to TABS ONLY
-S : reformat to SPACES ONLY ( -i option is important)
-M : reformat to MIXED SPACES / TABS ( -t and -i options important)
-t<n> : tab is worth <n> characters
-i<n> : indents should be <n> characters
-h : print this text
path is file or directory
"""
if __name__ == '__main__':
import sys, getopt, os
opts, args = getopt.getopt(sys.argv[1:], "TSMht:i:")
d = {}
print `opts`
for opt in opts:
if opt[0] == '-T':
d['style'] = TABSONLY
elif opt[0] == '-S':
d['style'] = SPACESONLY
elif opt[0] == '-M':
d['style'] = MIXED
elif opt[0] == '-t':
d['tabs'] = opt[1]
elif opt[0] == '-i':
d['indent'] = opt[1]
elif opt[0] == '-h':
print usage % sys.argv[0]
sys.exit(0)
if not args:
print usage % sys.argv[0]
for arg in args:
cleanfile(arg, d)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment