Commit 2b10c869 authored by Jim Fulton's avatar Jim Fulton

initial

parent 6bdedcbb
database_type='Gadfly'
###########################################################################
#
# Copyright
#
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
# rights reserved.
#
###########################################################################
__doc__='''%s Database Connection
$Id: DA.py,v 1.1 1998/04/15 15:10:37 jim Exp $''' % database_type
__version__='$Revision: 1.1 $'[11:-2]
from db import DB, manage_DataSources
import sys, DABase, Globals
_connections={}
def data_sources():
return filter(lambda ds, used=_connections.has_key: not used(ds[0]),
manage_DataSources())
addConnectionForm=Globals.HTMLFile('connectionAdd',globals())
def manage_addAqueductGadflyConnection(
self, id, title, connection, check=None, REQUEST=None):
"""Add a DB connection to a folder"""
self._setObject(id, Connection(
id, title, connection, check))
if REQUEST is not None: return self.manage_main(self,REQUEST)
return self.manage_main(self,REQUEST)
class Connection(DABase.Connection):
database_type=database_type
id='%s_database_connection' % database_type
meta_type=title='Aqueduct %s Database Connection' % database_type
icon='misc_/Aqueduct%s/conn' % database_type
def factory(self): return DB
def connect(self,s):
c=_connections
if c.has_key(s) and c[s] != self._p_oid:
raise 'In Use', (
'The database <em>%s</em> is in use.' % s)
c[s]=self._p_oid
return Connection.inheritedAttribute('connect')(self, s)
def __del__(self):
s=self.connection_string
c=_connections
if c.has_key(s) and c[s] == self._p_oid: del c[s]
def manage_close_connection(self, REQUEST):
" "
s=self.connection_string
c=_connections
if c.has_key(s) and c[s] == self._p_oid: del c[s]
return Connection.inheritedAttribute('manage_close_connection')(self, REQUEST)
##############################################################################
#
# $Log: DA.py,v $
# Revision 1.1 1998/04/15 15:10:37 jim
# initial
#
###########################################################################
#
# Copyright
#
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
# rights reserved.
#
###########################################################################
__doc__='''Database Connection
$Id: DABase.py,v 1.1 1998/04/15 15:10:38 jim Exp $'''
__version__='$Revision: 1.1 $'[11:-2]
from db import manage_DataSources
import AqueductDA.Connection, sys
from Globals import HTMLFile
from ImageFile import ImageFile
from ExtensionClass import Base
import Acquisition
class Connection(AqueductDA.Connection.Connection):
_isAnSQLConnection=1
manage_options=AqueductDA.Connection.Connection.manage_options+(
{'label': 'Browse', 'action':'manage_browse'},
{'label': 'Design', 'action':'manage_tables'},
)
manage_tables=HTMLFile('tables',globals())
manage_browse=HTMLFile('browse',globals())
info=None
def tpValues(self):
if hasattr(self, '_v_tpValues'): return self._v_tpValues
r=[]
self._v_tables=tables=TableBrowserCollection()
tables=tables.__dict__
c=self._v_database_connection
try:
for d in c.tables(rdb=0):
try:
name=d['TABLE_NAME']
b=TableBrowser()
b._d=d
b._columns=c.columns(name)
try: b.icon=table_icons[d['TABLE_TYPE']]
except: pass
r.append(b)
tables[name]=b
except:
# print d['TABLE_NAME'], sys.exc_type, sys.exc_value
pass
finally: pass #print sys.exc_type, sys.exc_value
self._v_tpValues=r
return r
def __getitem__(self, name):
if name=='tableNamed':
if not hasattr(self, '_v_tables'): self.tpValues()
return self._v_tables.__of__(self)
raise KeyError, name
def manage_wizard(self, tables):
" "
def manage_join(self, tables, select_cols, join_cols, REQUEST=None):
"""Create an SQL join"""
def manage_insert(self, table, cols, REQUEST=None):
"""Create an SQL insert"""
def manage_update(self, table, keys, cols, REQUEST=None):
"""Create an SQL update"""
class TableBrowserCollection(Acquisition.Implicit):
"Helper class for accessing tables via URLs"
class Browser(Base):
def __getattr__(self, name):
try: return self._d[name]
except KeyError: raise AttributeError, name
class TableBrowser(Browser, Acquisition.Implicit):
icon='what'
Description=check=''
info=HTMLFile('table_info',globals())
menu=HTMLFile('table_menu',globals())
def tpValues(self):
r=[]
for d in self._columns:
b=ColumnBrowser()
b._d=d
try: b.icon=field_icons[d['Type']]
except: pass
b.TABLE_NAME=self._d['TABLE_NAME']
r.append(b)
return r
def tpId(self): return self._d['TABLE_NAME']
def tpURL(self): return "Table/%s" % self._d['TABLE_NAME']
def Name(self): return self._d['TABLE_NAME']
def Type(self): return self._d['TABLE_TYPE']
manage_designInput=HTMLFile('designInput',globals())
def manage_buildInput(self, id, source, default, REQUEST=None):
"Create a database method for an input form"
args=[]
values=[]
names=[]
columns=self._columns
for i in range(len(source)):
s=source[i]
if s=='Null': continue
c=columns[i]
d=default[i]
t=c['Type']
n=c['Name']
names.append(n)
if s=='Argument':
values.append("<!--#sql-value %s type=%s-->'" %
(n, vartype(t)))
a='%s%s' % (n, boboType(t))
if d: a="%s=%s" % (a,d)
args.append(a)
elif s=='Property':
values.append("<!--#sql-value %s type=%s-->'" %
(n, vartype(t)))
else:
if isStringType(t):
if find(d,"\'") >= 0: d=join(split(d,"\'"),"''")
values.append("'%s'" % d)
elif d:
values.append(str(d))
else:
raise ValueError, (
'no default was given for <em>%s</em>' % n)
class ColumnBrowser(Browser):
icon='field'
def check(self):
return ('\t<input type=checkbox name="%s.%s">' %
(self.TABLE_NAME, self._d['Name']))
def tpId(self): return self._d['Name']
def tpURL(self): return "Column/%s" % self._d['Name']
def Description(self):
d=self._d
if d['Scale']:
return " %(Type)s(%(Precision)s,%(Scale)s) %(Nullable)s" % d
else:
return " %(Type)s(%(Precision)s) %(Nullable)s" % d
table_icons={
'TABLE': 'table',
'VIEW':'view',
'SYSTEM_TABLE': 'stable',
}
field_icons={
'BIGINT': 'int',
'BINARY': 'bin',
'BIT': 'bin',
'CHAR': 'text',
'DATE': 'date',
'DECIMAL': 'float',
'DOUBLE': 'float',
'FLOAT': 'float',
'INTEGER': 'int',
'LONGVARBINARY': 'bin',
'LONGVARCHAR': 'text',
'NUMERIC': 'float',
'REAL': 'float',
'SMALLINT': 'int',
'TIME': 'time',
'TIMESTAMP': 'datetime',
'TINYINT': 'int',
'VARBINARY': 'bin',
'VARCHAR': 'text',
}
##############################################################################
#
# $Log: DABase.py,v $
# Revision 1.1 1998/04/15 15:10:38 jim
# initial
#
# Revision 1.8 1998/01/29 16:26:44 brian
# Added eval support
#
# Revision 1.7 1998/01/16 18:35:34 jim
# Updated with new da protocols.
#
# Revision 1.5 1998/01/07 16:28:27 jim
# Brought up to date with latest Principia models, and got rid of DA objects and search interface.
#
# Revision 1.4 1997/12/02 19:35:34 jim
# changed to get rid of DA folders.
#
# Revision 1.5 1997/11/26 20:04:22 jim
# New Architecture, note that backward compatibility tools are needed
#
# Revision 1.4 1997/09/22 18:46:12 jim
# Got rid of ManageHTML
#
# Revision 1.3 1997/08/06 18:20:50 jim
# Renamed description->title and name->id and other changes
#
# Revision 1.2 1997/07/28 21:32:24 jim
# Added add method.
#
# Revision 1.1 1997/07/25 16:52:44 jim
# initial
#
# Revision 1.1 1997/07/25 15:49:40 jim
# initial
#
#
#install DA.py
#install DABase.py
#install __init__.py
#install browse.dtml
#install connectionAdd.dtml
#install db.py
#install gadfly
#install icons
#install table_info.dtml
#install table_menu.dtml
#install tables.dtml
##############################################################################
#
# Copyright
#
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
# rights reserved.
#
##############################################################################
__doc__='''Generic Database Adapter Package Registration
$Id: __init__.py,v 1.1 1998/04/15 15:10:39 jim Exp $'''
__version__='$Revision: 1.1 $'[11:-2]
import Globals, ImageFile
classes=('DA.Connection',)
database_type='Gadfly'
misc_={'conn':
ImageFile.ImageFile('AqueductDA/www/DBAdapterFolder_icon.gif')}
for icon in ('table', 'view', 'stable', 'what',
'field', 'text','bin','int','float',
'date','time','datetime'):
misc_[icon]=ImageFile.ImageFile('icons/%s.gif' % icon, globals())
meta_types=(
{'name':'Aqueduct %s Database Connection' % database_type,
'action':'manage_addAqueduct%sConnectionForm' % database_type,
},
)
DA=None
def getDA():
global DA
if DA is None:
home=Globals.package_home(globals())
from gadfly import sqlwhere
sqlwhere.filename="%s/gadfly/sql.mar" % home
import DA
return DA
getDA()
def manage_addAqueductGadflyConnectionForm(self, REQUEST, *args, **kw):
" "
DA=getDA()
return DA.addConnectionForm(
self,REQUEST,
database_type=database_type,
data_sources=DA.data_sources)
def manage_addAqueductGadflyConnection(
self, id, title, connection, check=None, REQUEST=None):
" "
return getDA().manage_addAqueductGadflyConnection(
self, id, title, connection, check, REQUEST)
methods={
'manage_addAqueductGadflyConnection':
manage_addAqueductGadflyConnection,
'manage_addAqueductGadflyConnectionForm':
manage_addAqueductGadflyConnectionForm,
}
##############################################################################
#
# $Log: __init__.py,v $
# Revision 1.1 1998/04/15 15:10:39 jim
# initial
#
# Revision 1.4 1998/01/29 16:26:45 brian
# Added eval support
#
# Revision 1.3 1998/01/16 18:35:08 jim
# Updated with new da protocols.
#
# Revision 1.2 1997/11/26 20:04:22 jim
# New Architecture, note that backward compatibility tools are needed
#
# Revision 1.1 1997/07/25 16:52:46 jim
# initial
#
# Revision 1.1 1997/07/25 15:49:41 jim
# initial
#
#
<html>
<head><title><!--#var title_or_id--> tables</title></head>
<body bgcolor="#FFFFFF" link="#000099" vlink="#555555" alink="#77003B">
<!--#var manage_tabs-->
<!--#tree header=info-->
<IMG SRC="<!--#var SCRIPT_NAME-->/misc_/AqueductGadfly/<!--#var icon-->"
ALT="<!--#var Type-->" BORDER="0">
<!--#var Name--><!--#var Description-->
<!--#/tree-->
</body>
</html>
<html>
<head>
<title>Add Aqueduct <!--#var database_type--> Database Connection</title>
</head>
<body bgcolor="#FFFFFF" link="#000099" vlink="#555555" alink="#77003B">
<h2>Add Aqueduct <!--#var database_type--> Database Connection</h2>
<!--#if data_sources-->
<form action="manage_addAqueduct<!--#var database_type-->Connection"
method="POST">
<table cellspacing="2">
<tr>
<th align="LEFT" valign="TOP">Id</th>
<td align="LEFT" valign="TOP">
<input type="TEXT" name="id" size="40"
value="<!--#var database_type-->_database_connection">
</td>
</tr>
<tr>
<th align="LEFT" valign="TOP"><em>Title</em></th>
<td align="LEFT" valign="TOP">
<input type="TEXT" name="title" size="40"
value="Aqueduct <!--#var database_type--> Database Connection">
</td>
</tr>
<tr>
<th align="LEFT" valign="TOP">Select a Data Source</th>
<td align="LEFT" valign="TOP"><select name=connection size=7>
<!--#in data_sources-->
<option value="<!--#var sequence-key-->">
<!--#var sequence-key--><!--#if
sequence-item-->, <!--#var sequence-item--><!--#/if-->
</option-->
<!--#/in-->
</select></td>
</tr>
<tr>
<th align="LEFT" valign="TOP">Connect immediately</th>
<td align="LEFT" valign="TOP">
<input name="check" type="CHECKBOX" value="YES" CHECKED>
</td>
</tr>
<tr>
<td></td>
<td><br><input type="SUBMIT" value="Add"></td>
</tr>
</table>
</form>
<!--#else-->
Sorry, you cannot create any Aqueduct <!--#var database_type--> Database
Connections because no <!--#var database_type--> databases exist, or
all of the existing databases are in use.
<!--#/if-->
</body>
</html>
'''$Id: db.py,v 1.1 1998/04/15 15:10:41 jim Exp $'''
# Copyright
#
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
# rights reserved.
#
__version__='$Revision: 1.1 $'[11:-2]
import string, sys, os
from string import strip, split, find, join
from gadfly import gadfly
import Globals
data_dir=os.path.join(Globals.data_dir,'gadfly')
def manage_DataSources():
return map(
lambda d: (d,''),
filter(lambda f, i=os.path.isdir, d=data_dir, j=os.path.join:
i(j(d,f)),
os.listdir(data_dir))
)
class DB:
database_error=gadfly.error
def tables(self,*args,**kw):
return map(lambda name: {
'TABLE_NAME': name,
'TABLE_TYPE': 'TABLE',
}, self.db.table_names())
def columns(self, table_name):
return map(lambda col: {
'Name': col.colid, 'Type': col.datatype, 'Precision': 0,
'Scale': 0, 'Nullable': 'with Null'
}, self.db.database.datadefs[table_name].colelts)
def __init__(self,connection):
path=os.path
dir=path.join(data_dir,connection)
if not path.isdir(dir):
raise self.database_error, 'invalid database error, ' + connection
if not path.exists(path.join(dir,connection+".gfd")):
db=gadfly.gadfly()
db.startup(connection,dir)
else: db=gadfly.gadfly(connection,dir)
self.connection=connection
self.db=db
self.cursor=db.cursor()
def query(self,query_string, max_rows=9999999):
self._register()
c=self.db.cursor()
queries=filter(None, map(strip,split(query_string, '\0')))
if not queries: raise 'Query Error', 'empty query'
names=None
result='cool\ns\n'
for qs in queries:
c.execute(qs)
d=c.description
if d is None: continue
if names is not None:
raise 'Query Error', (
'select in multiple sql-statement query'
)
names=map(lambda d: d[0], d)
results=c.fetchmany(max_rows)
nv=len(names)
indexes=range(nv)
row=['']*nv
defs=[maybe_int]*nv
j=join
rdb=[j(names,'\t'),None]
append=rdb.append
for result in results:
for i in indexes:
try: row[i]=defs[i](result[i])
except NewType, v: row[i], defs[i] = v
append(j(row,'\t'))
rdb[1]=j(map(lambda d, Defs=Defs: Defs[d], defs),'\t')
rdb.append('')
result=j(rdb,'\n')
return result
class _p_jar:
# This is place holder for new transaction machinery 2pc
def __init__(self, db=None): self.db=db
def begin_commit(self, *args): pass
def finish_commit(self, *args): pass
_p_jar=_p_jar(_p_jar())
_p_oid=_p_changed=_registered=None
def _register(self):
if not self._registered:
try:
get_transaction().register(self)
self._registered=1
except: pass
def __inform_commit__(self, *ignored):
self.db.commit()
self._registered=0
def __inform_abort__(self, *ignored):
self.db.rollback()
self.db.checkpoint()
self._registered=0
NewType="Excecption to raise when sniffing types, blech"
def maybe_int(v, int_type=type(0), float_type=type(0.0), t=type):
t=t(v)
if t is int_type: return str(v)
if v is None or v=='': return ''
if t is float_type: raise NewType, (maybe_float(v), maybe_float)
raise NewType, (maybe_string(v), maybe_string)
def maybe_float(v, int_type=type(0), float_type=type(0.0), t=type):
t=t(v)
if t is int_type or t is float_type: return str(v)
if v is None or v=='': return ''
raise NewType, (maybe_string(v), maybe_string)
def maybe_string(v):
v=str(v)
if find(v,'\t') >= 0 or find(v,'\t'):
raise NewType, (must_be_text(v), must_be_text)
return v
def must_be_text(v, f=find, j=join, s=split):
if f(v,'\\'):
v=j(s(v,'\\'),'\\\\')
v=j(s(v,'\t'),'\\t')
v=j(s(v,'\n'),'\\n')
return v
Defs={maybe_int: 'i', maybe_float:'n', maybe_string:'s', must_be_text:'t'}
##########################################################################
#
# $Log: db.py,v $
# Revision 1.1 1998/04/15 15:10:41 jim
# initial
#
# Revision 1.3 1997/08/06 18:21:27 jim
# Renamed description->title and name->id and other changes
#
# Revision 1.2 1997/08/06 14:29:35 jim
# Changed to use abstract dbi base.
#
The following copyright is modified from the python copyright.
Copyright Notice
----------------
The kjParsing source is copyrighted, but you can freely use and copy it
as long as you don't change or remove the copyright:
Copyright Aaron Robert Watters, 1994
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appears in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation.
AARON ROBERT WATTERS DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL AARON ROBERT WATTERS BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Signature
---------
Aaron Robert Watters
Department of Computer and Information Sciences
New Jersey Institute of Technology
University Heights
Newark, NJ 07102
phone (201)596-2666
fax (201)596-5777
home phone (908)545-3367
email: aaron@vienna.njit.edu
# Grammar generation
# for lisp lists with strings, ints, vars, print, and setq
# set this variable to regenerate the grammar on each load
REGENERATEONLOAD = 1
import string
GRAMMARSTRING ="""
Value :: ## indicates Value is the root nonterminal for the grammar
@R SetqRule :: Value >> ( setq var Value )
@R ListRule :: Value >> ( ListTail
@R TailFull :: ListTail >> Value ListTail
@R TailEmpty :: ListTail >> )
@R Varrule :: Value >> var
@R Intrule :: Value >> int
@R Strrule :: Value >> str
@R PrintRule :: Value >> ( print Value )
"""
COMPILEDFILENAME = "TESTLispG.py"
MARSHALLEDFILENAME = "TESTLispG.mar"
LISPCOMMENTREGEX = ";.*"
INTREGEX = "["+string.digits+"]+"
STRREGEX = '"[^\n"]*"'
VARREGEX = "["+string.letters+"]["+string.letters+string.digits+"]*"
### declare interpretation functions and regex's for terminals
def intInterp( str ):
return string.atoi(str)
def stripQuotes( str ):
return str[1:len(str)-1]
def echo(string):
return string
def DeclareTerminals(Grammar):
Grammar.Addterm("int", INTREGEX, intInterp)
Grammar.Addterm("str", STRREGEX, stripQuotes)
Grammar.Addterm("var", VARREGEX, echo)
### declare the rule reduction interpretation functions.
def EchoValue( list, Context ):
return list[0]
def VarValue( list, Context ):
varName = list[0]
if Context.has_key(varName):
return Context[varName]
else:
raise NameError, "no such lisp variable in context "+varName
def NilTail( list, Context ):
return []
def AddToList( list, Context ):
return [ list[0] ] + list[1]
def MakeList( list, Context ):
return list[1]
def DoSetq( list, Context):
Context[ list[2] ] = list[3]
return list[3]
def DoPrint( list, Context ):
print list[2]
return list[2]
def BindRules(Grammar):
Grammar.Bind( "Intrule", EchoValue )
Grammar.Bind( "Strrule", EchoValue )
Grammar.Bind( "Varrule", VarValue )
Grammar.Bind( "TailEmpty", NilTail )
Grammar.Bind( "TailFull", AddToList )
Grammar.Bind( "ListRule", MakeList )
Grammar.Bind( "SetqRule", DoSetq )
Grammar.Bind( "PrintRule", DoPrint )
# This function generates the grammar and dumps it to a file.
def GrammarBuild():
import kjParseBuild
LispG = kjParseBuild.NullCGrammar()
LispG.SetCaseSensitivity(0) # grammar is not case sensitive for keywords
DeclareTerminals(LispG)
LispG.Keywords("setq print")
LispG.punct("().")
LispG.Nonterms("Value ListTail")
LispG.comments([LISPCOMMENTREGEX])
LispG.Declarerules(GRAMMARSTRING)
LispG.Compile()
print "dumping as python to "+COMPILEDFILENAME
outfile = open(COMPILEDFILENAME, "w")
LispG.Reconstruct("LispG",outfile,"GRAMMAR")
outfile.close()
print "dumping as binary to "+MARSHALLEDFILENAME
outfile = open(MARSHALLEDFILENAME, "w")
LispG.MarshalDump(outfile)
outfile.close()
BindRules(LispG)
return LispG
# this function initializes the compiled grammar from the generated file.
def LoadLispG():
import TESTLispG
# reload to make sure we get the most recent version!
# (only needed when debugging the grammar).
reload(TESTLispG)
LispG = TESTLispG.GRAMMAR()
DeclareTerminals(LispG)
BindRules(LispG)
return LispG
def unMarshalLispG():
import kjParser
infile = open(MARSHALLEDFILENAME, "r")
LispG = kjParser.UnMarshalGram(infile)
infile.close()
DeclareTerminals(LispG)
BindRules(LispG)
return LispG
########## test the grammar generation
if REGENERATEONLOAD:
print "(re)generating the LispG grammar in file TESTLispG.py"
Dummy = GrammarBuild()
print "(re)generation done."
print "loading grammar as python"
LispG = LoadLispG()
### declare an initial context, and do some tests.
Context = { 'x':3 }
test1 = LispG.DoParse1( '()', Context)
test2 = LispG.DoParse1( '(123)', Context)
test3 = LispG.DoParse1( '(x)', Context)
test4 = LispG.DoParse1( '" a string "', Context)
test5 = LispG.DoParse1( '(setq y (1 2 3) )', Context )
test6 = LispG.DoParse1( '(SeTq x ("a string" "another" 0))', Context )
test7str = """
; this is a lisp comment
(setq abc (("a" x)
("b" (setq d 12))
("c" y) ) ; another lisp comment
)
"""
test7 = LispG.DoParse1( test7str, Context)
test8 = LispG.DoParse1( '(print (1 x d))', Context)
print "unmarshalling the grammar"
LispG2 = unMarshalLispG()
### declare an initial context, and do some tests.
Context = { 'x':3 }
test1 = LispG2.DoParse1( '()', Context)
test2 = LispG2.DoParse1( '(123)', Context)
test3 = LispG2.DoParse1( '(x)', Context)
test4 = LispG2.DoParse1( '" a string "', Context)
test5 = LispG2.DoParse1( '(setq y (1 2 3) )', Context )
test6 = LispG2.DoParse1( '(SeTq x ("a string" "another" 0))', Context )
test7str = """
; this is a lisp comment
(setq abc (("a" x)
("b" (setq d 12))
("c" y) ) ; another lisp comment
)
"""
test7 = LispG2.DoParse1( test7str, Context)
test8 = LispG2.DoParse1( '(print (1 x d))', Context)
#
# test for kjParseBuild module automatic parser generation
#
# lisp lists with strings, ints, vars, and setq
import string
### The string representation for the grammar.
### Since this is used only by GrammarBuild()
### it could be put in a separate file with GrammarBuild()
### to save space/load time after Grammar compilation.
###
GRAMMARSTRING ="""
Value :: ## indicates Value is the root nonterminal for the grammar
@R SetqRule :: Value >> ( setq var Value )
@R ListRule :: Value >> ( ListTail
@R TailFull :: ListTail >> Value ListTail
@R TailEmpty :: ListTail >> )
@R Varrule :: Value >> var
@R Intrule :: Value >> int
@R Strrule :: Value >> str
"""
### the name of the file in which to create the compiled
### grammar declarations
COMPILEDFILENAME = "TESTLispG2.py"
### declare comment form(s) as regular expressions
LISPCOMMENTREGEX = ";.*"
### declare regular expression string constants for terminals
#integer terminal:::::::
INTREGEX = "["+string.digits+"]+"
#string terminal::::::::
STRREGEX = '"[^\n"]*"'
#var terminal::::::::
VARREGEX = "["+string.letters+"]["+string.letters+string.digits+"]*"
### declare interpretation functions for terminals
# int interpretation function: translates string to int:
# Could use string.atoi without the extra level of indirection
# but for demo purposes here it is.
#
def intInterp( str ):
return string.atoi(str)
# interpretation function for strings strips off the surrounding quotes.
def stripQuotes( str ):
if len(str)<2:
TypeError, "string too short?"
return str[1:len(str)-1]
# interpretation function for vars just returns the recognized string
def echo(string):
return string
# This function declares the nonterminals both in the
# "grammar generation phase" and in loading the compiled
# grammar after generation
#
def DeclareTerminals(Grammar):
Grammar.Addterm("int", INTREGEX, intInterp)
Grammar.Addterm("str", STRREGEX, stripQuotes)
Grammar.Addterm("var", VARREGEX, echo)
### declare the rule reduction interpretation functions.
# EchoValue() serves for Intrule and Strrule, since
# we just want to echo the value returned by the
# respective terminal interpretation functions.
#
# Parser delivers list of form [ interpreted_value ]
def EchoValue( list, Context ):
if len(list)!=1:
raise TypeError, "this shouldn't happen! (1)"
return list[0]
# for Varrule interpreter must try to look up the value
# in the Context dictionary
#
# Parser delivers list of form [ var_name ]
def VarValue( list, Context ):
if len(list)!=1:
raise TypeError, "Huh? (2)"
varName = list[0]
if Context.has_key(varName):
return Context[varName]
else:
raise NameError, "no such lisp variable in context "+varName
# for an empty tail, return the empty list
#
# Parser delivers list of form [")"]
def NilTail( list, Context ):
if len(list) != 1 or list[0] != ")":
return TypeError, "Bad reduction?"
return []
# For a full tail, add the new element to the front of the list
#
# Parser delivers list of form [Value, TailValue]
def AddToList( list, Context ):
if len(list) !=2:
return TypeError, "Bad reduction?"
return [ list[0] ] + list[1]
# For a list, simply return the list determined by the tail
#
# Parser delivers list of form ["(", TailValue ]
def MakeList( list, Context ):
if len(list)!=2 or list[0]!="(":
raise TypeError, "Bad reduction? (3)"
return list[1]
# For a setq, declare a new variable in the Context dictionary
#
# Parser delivers list of form # ["(", "setq", varName, Value, ")"]
def DoSetq( list, Context):
if len(list) != 5\
or list[0] != "("\
or list[1] != "setq"\
or list[4] != ")":
print list
raise TypeError, "Bad reduction? (4)"
VarName = list[2]
if type(VarName) != type(''):
raise TypeError, "Bad var name? (5)"
Value = list[3]
# add or set the variable in the Context dictionary
Context[ VarName ] = Value
return Value
# This function Binds the named rules of the Grammar string to their
# interpretation functions in a Grammar.
#
def BindRules(Grammar):
Grammar.Bind( "Intrule", EchoValue )
Grammar.Bind( "Strrule", EchoValue )
Grammar.Bind( "Varrule", VarValue )
Grammar.Bind( "TailEmpty", NilTail )
Grammar.Bind( "TailFull", AddToList )
Grammar.Bind( "ListRule", MakeList )
Grammar.Bind( "SetqRule", DoSetq )
# This function generates the grammar and dumps it to a file.
# Since it will be used only once (after debugging),
# it probably should be put in another file save memory/load-time.
#
# the result returned is a Grammar Object that can be used
# for testing/debugging purposes.
#
# (maybe this should be made into a generic function?)
def GrammarBuild():
import kjParseBuild
# initialize a Null compilable grammar to define
LispG = kjParseBuild.NullCGrammar()
# declare terminals for the grammar
DeclareTerminals(LispG)
# declare the keywords for the grammar
# defun is not used, included here for demo purposes only
LispG.Keywords("setq defun")
# Declare punctuations
# dot is not used here
LispG.punct("().")
# Declare Nonterms
LispG.Nonterms("Value ListTail")
# Declare comment forms
LispG.comments([LISPCOMMENTREGEX])
# Declare rules
LispG.Declarerules(GRAMMARSTRING)
# Compile the grammar
LispG.Compile()
# Write the grammar to a file except for
# the function bindings (which must be rebound)
outfile = open(COMPILEDFILENAME, "w")
LispG.Reconstruct("LispG",outfile,"GRAMMAR")
outfile.close()
# for debugging purposes only, bind the rules
# in the generated grammar
BindRules(LispG)
# return the generated Grammar
return LispG
# this function initializes the compiled grammar from
# generated file.
def LoadLispG():
import TESTLispG2
# make sure we have most recent version (during debugging)
reload(TESTLispG2)
# evaluate the grammar function from generated file
LispG = TESTLispG2.GRAMMAR()
# bind the semantics functions
DeclareTerminals(LispG)
BindRules(LispG)
return LispG
########## test grammar generation
# do generation
Dummy = GrammarBuild()
# load the grammar from the file as LispG
LispG = LoadLispG()
# declare an initial context, and do some tests.
Context = { "x":3 }
test1 = LispG.DoParse1( "()", Context)
test2 = LispG.DoParse1( "(123)", Context)
test3 = LispG.DoParse1( "(x)", Context)
test4 = LispG.DoParse1( '" a string "', Context)
test5 = LispG.DoParse1( "(setq y (1 2 3) )", Context )
test6 = LispG.DoParse1( '(setq x ("a string" "another" 0))', Context )
test7str = """
; this is a lisp comment
(setq abc (("a" x)
("b" (setq d 12))
("c" y) ) ; another lisp comment
)
"""
test7 = LispG.DoParse1( test7str, Context)
# this was used for debugging null productions (a nearly full sql grammar
# is available on request).
#set this to automatically rebuild the grammar.
REBUILD = 1
MARSHALFILE = "SQLTEST.mar"
SELECTRULES = """
## highest level for select statement (not select for update)
select-statement ::
@R selectR :: select-statement >>
SELECT
from-clause
where-clause
group-by-clause
having-clause
## generalized to allow null from clause eg: select 2+2
@R fromNull :: from-clause >>
@R fromFull :: from-clause >> FROM
@R whereNull :: where-clause >>
@R whereFull :: where-clause >> WHERE
@R groupNull :: group-by-clause >>
@R groupFull :: group-by-clause >> GROUP BY
@R havingNull :: having-clause >>
@R havingFull :: having-clause >> HAVING
@R unionNull :: union-clause >>
@R unionFull :: union-clause >> UNION
"""
SELECTNONTERMS = """
select-statement
all-distinct select-list table-reference-list
where-clause group-by-clause having-clause union-clause
maybe-order-by
search-condition column-list maybe-all order-by-clause
column-name from-clause
"""
# of these the following need resolution
# (select-list) (table-reference-list)
# (search-condition) order-by-clause (column-name)
SELECTKEYWORDS = """
SELECT FROM WHERE GROUP BY HAVING UNION DISTINCT ALL AS
"""
# test generation of the grammar
def BuildSQLG():
import kjParseBuild
SQLG = kjParseBuild.NullCGrammar()
SQLG.SetCaseSensitivity(0)
SQLG.Keywords(SELECTKEYWORDS)
SQLG.Nonterms(SELECTNONTERMS)
# no comments yet
SQLG.Declarerules(SELECTRULES)
print "building"
SQLG.Compile()
print "marshaling"
outfile = open( MARSHALFILE, "w")
SQLG.MarshalDump(outfile)
outfile.close()
return SQLG
# load function
def LoadSQLG():
import kjParser
print "unmarshalling"
infile = open(MARSHALFILE, "r")
SQLG = kjParser.UnMarshalGram(infile)
infile.close()
return SQLG
#### for testing
if REBUILD:
SQLG0 = BuildSQLG()
print " rebuilt SQLG0 as compilable grammar"
SQLG = LoadSQLG()
print " build SQLG as reloaded grammar"
\ No newline at end of file
<html>
<head>
<title>
Gadfly: SQL Relational Database in Python, beta 0.2
</title>
</head>
<body bgcolor="#aaffff">
<h1>Gadfly: SQL Relational Database in Python, beta 0.2</h1>
<p>
<center>
<table border bgcolor="#ffffaa">
<tr><td><em>Time flies like an arrow. Fruit flies like a banana.</em></td></tr>
</table>
</center>
</p><p>
<blockquote>
Gadfly is a simple relational database system implemented in Python
based on the SQL Structured Query Language. This is a beta release.
The package requires installation before use, please see the section on
installation.
</blockquote>
</p>
<center>
<table bgcolor="#FFFFFF">
<tr><td><a href="http://www.pythonpros.com/arw">Aaron Watters</a></td></tr>
<tr><td><a href="mailto:arw@pythonpros.com">arw@pythonpros.com</a></td></tr>
<tr><td><a href="http://www.pythonpros.com/">not speaking for the "pros"</a></td></tr>
</table>
</center>
<h2>What is it?</h2>
Gadfly is a package that provides relational database functionality
entirely implemented in <a href="http://www.python.org">Python</a>.
It supports a subset of the intergalactic standard
RDBMS Structured Query Language SQL. Gadfly may be appropriate for
write-once, read-many database systems of moderate size.
It may also be useful for data preparation functions and as a data
analysis tool.
<p>
One of the most compelling aspects of Gadfly is that it
runs whereever Python runs and supports client/server on
any platform that supports the standard Python socket
interface. Even the file formats used by Gadfly for storage
are cross-platform -- a gadfly database directory can be
moved from Win95 to Linux using a binary copying mechanism
and gadfly will read and run the database.
<p>
It supports persistent databases consisting of a collection of
structured tables with indices, and a
<a href="gfSQL.html">large subset of SQL</a> for accessing
and modifying those tables. It supports
<a href="gfrecover.html">a log based recovery protocol</a>
which allows committed operations of a database to be recovered
even if the database was not shut down in a proper manner (ie,
in the event of a CPU or software crash, [but not in the event
of a disk crash]). It also supports a <a href="server.html">
TCP/IP Client/Server mode</a> where remote clients can access
a Gadfly database over a TCP/IP network (such as the Internet)
subject to configurable security mechanisms.
<p>
Because it
lacks (at this time) concurrency control, and file-system based
indexing it is not appropriate for very large
multiprocess transaction based systems.
<p>
As a Python implementation (or mainly-Python if you install kjbuckets -- see
below) you might expect the system to be very slow. It's not.
Actually it uses some very sophisticated data structures to provide
a (I hope) correct and (I think) relatively
efficient implementation.
I hope to continue to improve the performance in future releases.
The system is really intended to be used with the kjbuckets
builtin for speed, see below.
<p>
Since Gadfly depends intimately on the
<a href="http://www.pythonpros.com/arw/kwParsing">kwParsing</a>
package it is distributed
as part of the kwParsing package, under the same generous copyright.
<p>
As a beta release I reserve the right to change anything about this implementation
as I see fit. However this software may be used for any purpose, provided
all users understand that there is no warrantee, and that if you like it a
"thank you" or other constructive comment sent to
<a href="mailto:arw@pythonpros.com">the author</a>
would be appreciated.
<h2>Why?</h2>
<p>
Gadfly allows Python programs a convenient way
to store, retrieve and query tabular data without having to rely on any external
database engine or package. That is, Gadfly provides
a simple, easy, and relatively
efficient in-memory relational database style engine for Python programs,
complete with a notion of a "committed, recoverable transaction" and "aborts".
Eventually, it might evolve to provide indexed-file access, concurrency
control and other stuff too.
<p>
Looking at the /etc directory in unix or at the Registry under win32 or at
the buzzillions of configuration files one find sitting around file systems
it becomes apparent that modern programs depend heavily on tabular data.
Furthermore as memory prices continue to drop and inexpensive machines keep
growing larger and larger memory capacity it is clear that more and more
database-style work
can be done on largish data sets in memory, and hence a simple in-memory
SQL implementation like Gadfly may be useful for serious work.
<p>
Gadfly uses relational style representations and the SQL query language
primarily because these are widely understood and familiar to many
programmers. I'd love to invent my own query language and database paradigm
and impose it on the rest of the world, but I have trouble imagining
how I might accomplish this. Especially that last part.
Truth be told, I also like the relational
paradigm, generally speaking, since it provides a more well defined, more
well thought out, and more useful
set of functionality than I've found elsewhere. SQL can't do everything,
but I believe it is successful in part because it can do a lot of important
things easily and well. (Python can do everything else...)
<h3>For Example</h3>
I once held a job where I extracted data out of various formats
and from multiple sources; processed it
to determine least-cost routing information; and generated configuration files
that controlled large telephony switches. Oracle as an SQL engine
to aide in this effort was complete overkill and something of an administrative
nightmare. Something like Gadfly might have worked better for the periodic
analysis/generation process I implemented.
<p>
Oracle is better for other purposes, of course, but there are many situations
where something simpler (and infinitely cheaper) like Gadfly might help out a
lot too.
<h3>Or it might be useful for education or research</h3>
I've also taught SQL many times and have often thought that it would be nice
to have a simple, cross platform, SQL implementation, without all the extra
junk associated with most capital D Database Engines. Gadfly may be the simplest
and most cross-platform SQL implementation in existence!
<h3>Yeah, but can't you just use Python without all this SQL junk?</h3>
Sure, but no matter how you slice it the analogue of
<pre>
select l.drinker, l.beer, count(*), sum(l.perday*f.perweek)
from likes l, frequents f
where l.drinker=f.drinker
group by l.drinker, l.beer
order by 4 desc, l.drinker, l.beer
</pre>
Is a lot of Python code, and it's harder to get right. I might
be wrong, but your home grown implementation might also be slower
than using Gadfly's query planner and optimized data structures.
Also gadfly does the dirty work of maintaining persistant indices
and using them when appropriate.
<h2>Use</h2>
<p>
The main "gadfly.py" module attempts to faithfully adhere to
<a href="http://www.python.org/sigs/db-sig/DatabaseAPI.html">
Greg Stein's
Python Database API, as discussed and certified by the Python DB-SIG.</a>
That said, some of the API that I didn't really understand is not implemented
yet. Please look to gadfly.py to determine exactly what parts of the
interface are implemented or stubbed.
<p>
Concurrent database updates are not supported. The "databases" are
currently designed to be written/modified by one process in isolation.
Multiple processes can access a Gadfly database when accesses are
arbitrated by a
<a href="server.html">tcp/ip Gadfly server process</a>.
<h3>Creating a new database</h3>
<p>
Unlike most Python/database-engine interfaces you must create
a Gadfly database using Python (whereas with Oracle you'd use other
tools, for example). To accomplish this use
<pre>
import gadfly
connection = gadfly.gadfly()
</pre>
with no arguments and then startup a database using the startup
method.
<pre>
connection.startup("mydatabase", "mydirectory")
</pre>
Here "mydirectory" must be a directory which exists
and which can be written to in order to store the database files.
The startup will create some files (well, currently 1 file) in
"mydirectory". This will have the effect of clobbering any existing
Gadfly database called "mydatabase" in the directory "mydirectory".
Gadfly will prevent you from starting up the same connection twice,
however.
<p>
Note that the first "import gadfly" reads in and initializes some rather large
data structures used for parsing SQL, and thus may take longer than other
module imports.
<p>
Now with your new database you can create tables, populate them,
and commit the result when you are happy.
<pre>
cursor = connection.cursor()
cursor.execute("create table ph (nm varchar, ph varchar)")
cursor.execute("insert into ph(nm, ph) values ('arw', '3367')")
cursor.execute("select * from ph")
for x in cursor.fetchall():
print x
# prints ('arw', '3367')
connection.commit()
</pre>
<h3>Reconnecting to an existing database</h3>
Once a database exists you can reconnect to it as follows:
<pre>
import gadfly
connection = gadfly.gadfly("mydatabase", "mydirectory")
</pre>
This will read in the database tables with the most recently committed values.
The initialized database may now be queried and updated.
<pre>
cursor = connection.cursor()
cursor.execute("update ph set nm='aaron' where nm='arw'")
cursor.execute("select * from ph")
for x in cursor.fetchall():
print x
# prints ('aaron', '3367')
</pre>
If you do not wish to commit updates you may simply not execute a commit
on the connection object (which writes out the tables). If you wish to
restore the old values from the existing database use
<pre>
connection.abort()
</pre>
Updates are only stored upon a connection.commit(). [Actually, if
autocheckpoint is disabled, updates are only stored to table files
on checkpoint -- see the documentation on the recovery mechanism.]
<p>
Use
<pre>
print cursor.pp()
</pre>
to "pretty print" the result of any evaluation (which might be None for a non-select).
<h2>Features</h2>
In this version all tables are read into memory
upon "connecting" to the database and "touched" tables are written out upon
checkpoint. Each table is represented
as a separate file in the destination directory, and there is a "data definition"
file as well (a list of data definition declarations). During active use
a log file appears int the active directory as well, and if the process
crashes this log file is used to recover committed operations.
<h3>The SELECT statement</h3>
At this point Gadfly supports quite a lot of the SQL semantics requested
in the ODBC 2.0 specification. Please see
<a href="gfSQL.html">the SQL constructs page</a> for a more
detailed presentation.
SQL statements supported
include the SELECT:
<pre>
SELECT [DISTINCT|ALL] expressions or *
FROM tables
[WHERE condition]
[GROUP BY group-expressions]
[HAVING aggregate-condition]
[union-clause]
[ORDER BY columns]
</pre>
This statement is quite powerful. It reads intuitively as follows:
<pre>
1) Make all combinations of rows from the tables (FROM line)
2) Eliminate those combinations not satisfying condition (WHERE line)
3) (if GROUP present) form aggregate groups that match on group-expressions
4) (if HAVING present) eliminate aggregate groups that don't satisfy
the aggregate-condition.
5) compute the columns to keep (SELECT line).
6) (if union-clause present) combine (union, difference, intersect)
the result with the result of another select statement.
7) if DISTINCT, throw out redundant entries.
8) (if ORDER present) order the result by the columns (ascending
or descending as specified, with precedence as listed).
</pre>
The actual implementation in gadfly is much more optimal than the
intuitive reading, particularly at steps 1 and 2 (which are combined
via optimizing transformations and hash join algorithms).
<p>
Conditions may include equalities, and inequalities of expressions.
Conditions may also be combined using AND, OR, NOT.
Expressions include column names, constants, and standard arithmetic
operations over them.
<p>
Embedded queries supported include subquery expressions, expr IN
(subselect),
quantified comparisons, and the
EXISTS (subselect) predicate.
<p>
Aggregate tests and computations can only be applied after the GROUPing
and before the columns are selected (steps 3,4,5).
Aggregate operations include COUNT(*), COUNT(expression), AVG(expression), SUM(expression),
MAX(expression), MIN(expression),
and the non-standard MEDIAN(expression). These may be applied to DISTINCT values (throwing
out redundancies, as in COUNT(DISTINCT drinker). if no GROUPing is present the
aggregate computations apply to the entire result after step 2.
<p>
There is much more to know about the SELECT statement.
The test suite (gftest.py) gives numerous examples of SELECT statements, including:
<pre>
select * from frequents
where drinker = 'norm'
select drinker from likes
union
select drinker from frequents
select drinker from likes
except
select drinker from frequents
select * from frequents
where drinker>'norm' or drinker<'b'
select *
from frequents as f, serves as s
where f.bar = s.bar
select *
from frequents as f, serves as s
where f.bar = s.bar and
not exists(
select l.drinker, l.beer
from likes l
where l.drinker=f.drinker and s.beer=l.beer)
select sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
select bar, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
where beer&lt;&gt;'bud'
group by bar
having sum(quantity)>500 or count(*)>3
order by 2 desc
select l.drinker, l.beer, count(*), sum(l.perday*f.perweek)
from likes l, frequents f
where l.drinker=f.drinker
group by l.drinker, l.beer
order by 4 desc, l.drinker, l.beer
</pre>
Please examine sqlgram.py for a precise definition of the supported syntax.
Please find any of the 500 books on SQL for a description of the meaning of
these constructs. Please inform me if any of them give the wrong result
when executed in Gadfly!
<h3>Table creation and "data types"</h3>
Create tables using the CREATE TABLE statement
<pre>
CREATE TABLE name (colname datatype [, colname datatype...])
</pre>
Data types currently "supported" are integer, float, and varchar.
They are ignored by the implementation, anything that is hashable
and marshallable can currently go in any column (but that is
likely to change).
For example
<pre>
create table frequents
(drinker varchar,
bar varchar,
perweek integer)
</pre>
At present you can put tuples, complexes, or anything else
into a column specified as "varchar". Don't count on that
always being true, please.
<h3>Other supported statements</h3>
Gadfly also supports the searched DELETE and UPDATE; INSERT VALUES and INSERT subselect;
CREATE/DROP INDEX, and DROP TABLE. These have the informal syntax
<pre>
DELETE FROM table WHERE condition
UPDATE table SET col=expr [, col=expr...] WHERE condition
INSERT INTO table [(column [, column...])] values (value [, value...])
INSERT INTO table [(column [, column...])] subselect
CREATE INDEX name ON table (column [, column...])
DROP TABLE table
DROP INDEX name
</pre>
For example
<pre>
delete from templikes where be='rollingrock'
update templikes set dr='norman' where dr='norm'
insert into ph(nm,ph) values ('nan', '0356')
insert into templikes(dr, be)
select drinker, beer from likes
create index sbb on serves (beer, bar)
drop table templikes
drop index tdindex
</pre>
Multiple statements may be executed in one cursor.execute(S)
by separating the statements with semicolons in S, for example
S might have the string value
<pre>
drop index tdindex;
drop table templikes
</pre>
(no final semicolon please!).
<p>
Please see gftest.py for examples of most of these. Remember that
SQL is case insensitive (capitalization of keywords doesn't matter).
Please see sqlgram.py for a precise definition of all supported constructs
<h3>Dynamic Values</h3>
Expressions also include the special expression '?' (the ODBC-style
dynamic expression)
as in
<pre>
insertstat = "insert into ph(nm,ph) values (?, ?)"
cursor.execute(insertstat, ('nan', "0356"))
cursor.execute(insertstat, ('bill', "2356"))
cursor.execute(insertstat, ('tom', "4356"))
</pre>
Dynamic values allow the cursor to use the same parsed
expression many times for a similar operation. Above
the insertstat is parsed and bound to the database only
once. Using dynamic attributes should speed up accesses.
Thus the above should run much faster than the equivalent
<pre>
cursor.execute("insert into ph(nm,ph) values ('nan', '0356')");
cursor.execute("insert into ph(nm,ph) values ('bill', '2356')");
cursor.execute("insert into ph(nm,ph) values ('tom', '4356')");
</pre>
Dynamic attributes can appear in other statements containing expressions (such
as SELECTs, UPDATEs and DELETEs too).
<p>
For SELECT, UPDATE, and DELETE the dynamic expression substitutions
must consist of a single tuple, as in
<pre>
stat = "select * from ph where nm=?"
cursor.execute(stat, ("nan",))
...
cursor.execute(stat, ("bob",))
...
</pre>
Since the dynamic substitution eliminates the need for parsing and
binding (expensive operations!) the above should run faster than
the equivalent
<pre>
cursor.execute("select * from ph where nm='nan'")
...
cursor.execute("select * from ph where nm='bob'")
...
</pre>
If you repeat several similar queries multiple times, associate each query
"template string"
with a unique cursor object so that each template must be parsed and bound
only once. Note that some relatively complex queries from the test suite
run 2 to 3 times faster after they have been parsed and bound, even
without the kjbuckets builtin. With kjbuckets the same ran 5 to 10 times
faster.
<h3>Multiple Batch Inserts and Dynamic Values</h3>
For the special case of INSERT VALUES a list of substitution tuples
allows the query engine to perform the inserts in optimized
batch mode. Thus the fastest way to perform the three inserts
given earlier is
<pre>
data = [('nan', "0356")), ('bill', "2356"), ('tom', "4356")]
stat = "insert into ph(nm,ph) values (?, ?)"
cursor.execute(stat, data)
</pre>
...and it would be even faster if the cursor had previously executed
the stat with different data (since then no parsing or binding would
occur).
<h2>Installation</h2>
To guarantee correct installation, please follow the following procedure. Of
course you must have
<a href="http://www.python.org/">Python</a>
in order to use this package!
<p>
<strong>Unpack</strong> the package into a directory on the Python search path.
<p>
<strong>Install</strong>: In the installation directory run the command
<pre>
% python gfinstall.py
</pre>
This creates sqlwhere.py, which aides in the location of the parser
data file required for parsing SQL.
In some circumstances this may
create the data file also. If it does the parser generation process
may take several minutes,
but you only need to run it during installation.
<p>
<strong>If you think there is a problem</strong> Use
<pre>
% python gfinstall.py force
</pre>
to force regeneration of the parser structures (for
example if you modify the grammar or suspect the grammar
data file sql.mar is corrupt).
<p>
<strong>Test/compile</strong>: Now (possibly in a different directory) run
<pre>
% mkdir dbtest
% python gftest.py dbtest
</pre>
This will create a test database and exercise the system to make sure it works.
It also should have the side effect of byte compiling all *.py files required
by gadfly.
Run gftest.py as a user with write permission to the installation directory in order
to guarantee byte compilation. It should just work ;c).
<h3>Interactive testing</h3>
After installation, you may interactively test the created database from the
same directory using
the interactive interpreter, for example, as in
<pre>
Python 1.4 (Feb 4 1997) [MSC 32 bit (Intel)]
Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam
&gt;&gt;&gt; from gadfly import gadfly
&gt;&gt;&gt; connection = gadfly("test", "dbtest")
&gt;&gt;&gt; cursor = connection.cursor()
&gt;&gt;&gt; cursor.execute("select * from frequents")
&gt;&gt;&gt; cursor.description
(('DRINKER', None, None, None, None, None, None), ('PERWEEK', None, None, None,
None, None, None), ('BAR', None, None, None, None, None, None))
&gt;&gt;&gt; print cursor.pp()
DRINKER | PERWEEK | BAR
============================
adam | 1 | lolas
woody | 5 | cheers
sam | 5 | cheers
norm | 3 | cheers
wilt | 2 | joes
norm | 1 | joes
lola | 6 | lolas
norm | 2 | lolas
woody | 1 | lolas
pierre | 0 | frankies
&gt;&gt;&gt;
</pre>
<h2>Architecture</h2>
The SQL grammar is described in sqlgram.py, the binding of the grammar constructs
to semantic objects is performed in sqlbind.py, the semantic objects and their
execution strategies is defined in sqlsem.py. The semantics uses a lot of classical
and non-classical logic (cylindric logic, to be precise) as well as
optimization heuristics
to define a relatively efficient and hopefully correct implementation of SQL.
I recommend the brave have a look at sqlsem.py for a look into the 12 years of
research into databases, logic, and programming languages that contributed bits to
this work. The underlying logic (in a different framework) is given in
<pre>
A. Watters, "Interpreting a Reconstructed Relational Calculus",
ACM SIGMOD Proceedings, 1993, Washington DC, pp. 367-376.
</pre>
The most basic data structures of the implementation are given in either
kjbuckets0.py or the faster
<a href="http://www.pythonpros.com/arw/kjbuckets/">kjbucketsmodule.c</a>,
which implement the same
data type signatures in Python and in a C extension to Python respectively.
<p>
The gadfly.py module is a simple wrapper that provides a standard DBAPI interface
to the system. The installation script gfinstall.py attempts to install the system,
creating the grammar file sql.mar if needed (or if "forced"). The test suite
gftest.py (which requires a writeable directory argument) attempts to provide
a regression test and a demonstration of the system. The SQL parser also requires
the <a href="http://www.pythonpros.com/arw/kwParsing">kwParsing</a> parser
generation package, which consists of a number of additional python modules.
<h2>Comments</h2>
Please find bugs and report them to <a href="mailto:arw@pythonpros.com">me</a>.
<p>
The query engine should run faster if you have the builtin
module kjbuckets installed. Otherwise it will use a "python imitation"
kjbuckets0.py. In one test the test suite ran two times faster using kjbuckets.
I suspect it will have a higher payoff for larger data sets.
<p>
<a href="mailto:arw@pythonpros.com">Suggestions welcome.</a>
</body></html>
\ No newline at end of file
"""main entry point for gadfly sql."""
import sqlgen, sqlbind
sql = sqlgen.getSQL()
sql = sqlbind.BindRules(sql)
error = "gadfly_error"
verbosity = 0
class gadfly:
"""as per the DBAPI spec "gadfly" is the connection object."""
closed = 0
verbose = verbosity # debug!
def __init__(self, databasename=None, directory=None,
forscratch=0, autocheckpoint=1, verbose=0):
verbose = self.verbose = self.verbose or verbose
# checkpoint on each commit if set
self.autocheckpoint = autocheckpoint
if verbose:
print "initializing gadfly instance", (\
databasename, directory, forscratch, verbose)
self.is_scratch = forscratch
self.databasename=databasename
self.directory = directory
self.fs = None
self.database = None
# db global transaction id
self.transid = 0
if databasename is not None:
self.open()
def transaction_log(self):
from gfdb0 import Transaction_Logger
if self.verbose:
print "new transaction log for", self.transid
return Transaction_Logger(self.database.log, self.transid, self.is_scratch)
# causes problems in 1.5?
#def __del__(self):
# """clear database on deletion, just in case of circularities"""
# # implicit checkpoint
# if self.verbose:
# print "deleting gadfly instance", self.databasename
# if not self.closed:
# self.close()
def checkpoint(self):
"""permanently record committed updates"""
# note: No transactions should be active at checkpoint for this implementation!
# implicit abort of active transactions!
verbose = self.verbose
if verbose:
print "checkpointing gadfly instance", self.databasename
db = self.database
log = db.log
# dump committed db to fs
fs = self.fs
if fs and db and not db.is_scratch:
# flush the log
if log:
if verbose: print "gadfly: committing log"
log.commit()
elif verbose:
print "gadfly: no log to commit"
if verbose: print "gadfly: dumping mutated db structures"
fs.dump(db)
elif verbose:
print "gadfly: no checkpoint required"
if verbose:
print "gadfly: new transid, reshadowing"
self.transid = self.transid+1
self.working_db.reshadow(db, self.transaction_log())
def startup(self, databasename, directory, scratch=0, verbose=0):
from gfdb0 import Database0, File_Storage0
verbose = self.verbose
if verbose:
print "gadfly: starting up new ", databasename
if self.database:
raise error, "cannot startup, database bound"
self.databasename=databasename
self.directory = directory
db = self.database = Database0()
db.is_scratch = scratch or self.is_scratch
self.fs = File_Storage0(databasename, directory)
self.working_db = Database0(db, self.transaction_log())
# commit initializes database files and log structure
self.commit()
# for now: all transactions serialized
# working db shared among all transactions/cursors
self.transid = self.transid+1
self.working_db = Database0(db, self.transaction_log())
def restart(self):
"""reload and rerun committed updates from log, discard uncommitted"""
# mainly for testing recovery.
if self.verbose:
print "gadfly: restarting database", self.databasename
self.database.clear()
self.working_db.clear()
self.working_db = None
self.database = None
self.open()
def open(self):
"""(re)load existing database"""
if self.verbose:
print "gadfly: loading database", self.databasename
from gfdb0 import File_Storage0, Database0
if self.directory:
directory = self.directory
else:
directory = "."
fs = self.fs = File_Storage0(self.databasename, directory)
db = self.database = fs.load(sql)
self.transid = self.transid+1
self.working_db = Database0(db, self.transaction_log())
def close(self):
"""checkpoint and clear the database"""
if self.closed: return
if self.verbose:
print "gadfly: closing database", self.databasename
db = self.database
if not db.is_scratch:
self.checkpoint()
if db: db.clear()
wdb = self.working_db
if wdb:
wdb.clear()
self.working_db = None
self.closed = 1
def commit(self):
"""commit the working database+transaction, flush log, new transid"""
verbose = self.verbose
autocheckpoint = self.autocheckpoint
if self.verbose:
print "gadfly: committing", self.transid, self.databasename
self.transid = self.transid+1
fs = self.fs
db = self.database
wdb = self.working_db
wdblog = wdb.log
if wdblog: wdblog.commit()
wdb.commit()
if fs and db and not db.is_scratch:
if autocheckpoint:
if verbose:
print "gadfly: autocheckpoint"
# skips a transid?
self.checkpoint()
else:
if verbose:
print "gadfly: no autocheckpoint"
wdb.reshadow(db, self.transaction_log())
else:
if verbose:
print "gadfly: scratch db, no logging, just reshadow"
wdb.reshadow(db, self.transaction_log())
def rollback(self):
"""discard the working db, new transid, recreate working db"""
verbose = self.verbose
if verbose:
print "gadfly: rolling back", self.transid, self.databasename
if not (self.fs or self.database):
raise error, "unbound, cannot rollback"
# discard updates in working database
self.working_db.clear()
self.transid = self.transid+1
self.working_db.reshadow(self.database, self.transaction_log())
#self.open()
def cursor(self):
if self.verbose:
print "gadfly: new cursor", self.databasename
db = self.database
if db is None:
raise error, "not bound to database"
return GF_Cursor(self)
def dumplog(self):
log = self.database.log
if log:
log.dump()
else:
print "no log to dump"
def table_names(self):
return self.working_db.relations()
def DUMP_ALL(self):
print "DUMPING ALL CONNECTION DATA", self.databasename, self.directory
print
print "***** BASE DATA"
print
print self.database
print
print "***** WORKING DATA"
print
print self.working_db
class GF_Cursor:
verbose = verbosity
arraysize = None
description = None
EVAL_DUMP = 0 # only for extreme debugging!
def __init__(self, gadfly_instance):
verbose = self.verbose = self.verbose or gadfly_instance.verbose
if verbose:
print "GF_Cursor.__init__", id(self)
self.connection = gadfly_instance
self.results = None
self.resultlist = None
self.statement = None
# make a shadow of the shadow db! (in case of errors)
from gfdb0 import Database0
self.shadow_db = Database0()
self.reshadow()
self.connection = gadfly_instance
def reshadow(self):
if self.verbose:
print "GF_Cursor.reshadow", id(self)
db = self.connection.working_db
shadow = self.shadow_db
shadow.reshadow(db, db.log)
if self.verbose:
print "rels", shadow.rels.keys()
def close(self):
if self.verbose:
print "GF_Cursor.close", id(self)
self.connection = None
def reset_results(self):
if self.verbose:
print "GF_Cursor.reset_results", id(self)
rs = self.results
if rs is None:
raise error, "must execute first"
if len(rs)!=1:
raise error, "cannot retrieve multiple results"
rel = rs[0]
rows = rel.rows()
atts = rel.attributes()
tupatts = tuple(atts)
resultlist = list(rows)
if len(tupatts)==1:
att = tupatts[0]
for i in xrange(len(resultlist)):
resultlist[i] = (resultlist[i][att],)
else:
for i in xrange(len(resultlist)):
resultlist[i] = resultlist[i].dump(tupatts)
self.resultlist = resultlist
def fetchone(self):
if self.verbose:
print "GF_Cursor.fetchone", id(self)
r = self.resultlist
if r is None:
self.reset_results()
r = self.resultlist
if len(r)<1:
raise error, "no more results"
result = r[0]
del r[0]
return result
def fetchmany(self, size=None):
if self.verbose:
print "GF_Cursor.fetchmany", id(self)
r = self.resultlist
if r is None:
self.reset_results()
r = self.resultlist
if size is None:
size = len(r)
result = r[:size]
del r[:size]
return result
def fetchall(self):
if self.verbose:
print "GF_Cursor.fetchall", id(self)
return self.fetchmany()
def execute(self, statement=None, params=None):
"""execute operations, commit results if no error"""
success = 0
verbose = self.verbose
if verbose:
print "GF_Cursor.execute", id(self)
if statement is None and self.statement is None:
raise error, "cannot execute, statement not bound"
if statement!=self.statement:
if verbose: print "GF_cursor: new statement: parsing"
# only reparse on new statement.
self.statement=statement
from sqlsem import Parse_Context
context = Parse_Context()
cs = self.commands = sql.DoParse1(statement, context)
else:
if verbose: print "GF_cursor: old statment, not parsing"
cs = self.commands
# always rebind! (db may have changed)
if verbose: print "GF_Cursor: binding to temp db"
# make a new shadow of working db
# (should optimize?)
self.reshadow()
# get shadow of working database
database = self.shadow_db
if self.EVAL_DUMP:
print "***"
print "*** dumping connection parameters before eval"
print "***"
print "*** eval scratch db..."
print
print database
print
print "*** connection data"
print
self.connection.DUMP_ALL()
print "********** end of eval dump"
for i in xrange(len(cs)):
if verbose:
print "GFCursor binding\n", cs[i]
print database.rels.keys()
cs[i] = cs[i].relbind(database)
cs = self.commands
self.results = results = list(cs)
# only unshadow results on no error
try:
for i in xrange(len(cs)):
results[i] = cs[i].eval(params)
success = 1
finally:
#print "in finally", success
# only on no error...
if success:
# commit updates in shadow of working db (not in real db)
if verbose: print "GFCursor: successful eval, storing results in wdb"
database.log.flush()
# database commit does not imply transaction commit.
database.commit()
else:
if verbose:
print \
"GFCursor: UNSUCCESSFUL EVAL, discarding results and log entries"
self.statement = None
self.results = None
self.resultlist = None
database.log.reset()
# handle curs.description
self.description = None
if len(results)==1:
result0 = results[0]
try:
atts = result0.attributes()
except:
pass
else:
descriptions = list(atts)
fluff = (None,) * 6
for i in xrange(len(atts)):
descriptions[i] = (atts[i],) + fluff
self.description = tuple(descriptions)
self.resultlist = None
def setoutputsize(self, *args):
# not implemented
pass
def setinputsizes(self, *args):
# not implemented
pass
def pp(self):
"""return pretty-print string rep of current results"""
from string import join
stuff = map(repr, self.results)
return join(stuff, "\n\n")
<html>
<head>
<title>
Gadfly SQL constructs
</title>
</head>
<body bgcolor="#ffffdd">
<h1>Gadfly SQL constructs</h1>
<blockquote>
This document describes SQL constructs supported by Gadfly.
The presentation
does not define the complete syntax -- see sqlgram.py for
the precise syntax as BNF -- nor the complete semantics --
see a good book on SQL for more detailed coverage of semantic
(or use the source, Luke ;c) ).
Also, please have a look at my
<a href="http://mulder.rutgers.edu/~aaron/dbnotes.cgi">evolving
database course notes</a> for more coverage of SQL.
Examples of all supported constructs are also shown in the
test suite source file gftest.py.
This document is only
a very brief guide, primarily of use to those who already
understand something about SQL -- it is neither a tutorial
nor a detailed discussion of syntax and semantics.
</blockquote>
<h1>The Standard, with omissions</h1>
<p>
Gadfly supports a large subset of ODBC 2.0 SQL. The reason
ODBC 2.0 is chosen is because it provides a fairly strong set of
constructs, but does not include some of the more obscure
features of other SQL standards which would be extremely
difficult and complex to implement correctly
(and perhaps, not used very frequently (?)).
<p>
Supported features include views, groupings, aggregates,
subquery expressions, quantified subquery comparisons,
EXISTS, IN, UNION, EXCEPT, INTERSECT, searched mutations and
Indices, among others (see below).
<p>
Some important omissions from ODBC 2.0 at this point are
<pre>
Nulls.
Outer joins.
Primary key constraints.
Unique indices.
CHECK conditions.
Enforced data type constraints.
Alter table (can't implement until NULLs arrive).
Date, Time, and Interval data types
</pre>
It is hoped these will be implemented at some future time.
<p>
Less important omissions include
<pre>
Cursor based updates and deletes
(justification: if you really need them the db design
is flawed, and it's possible to use python instead).
LIKE string predicate
(justification: use Python regexes in python code).
Users and permissions
(justification: Gadfly is not intended for full multiuser
use at this time).
</pre>
These may or may not be implemented at some future time.
<h1>Statements</h1>
All interaction with SQL databases is mediated by
SQL statements, or statement sequences. Statement
sequences are statements separated by semicolons.
SQL keywords and user defined names are not case
sensitive (but string values are, of course).
<p>
SQL statements include the following.
<h3>Select Statement</h3>
The select statement derives a table from tables
in the database. It's general form is
<pre>
sub_query
optorder_by
</pre>
Where sub_query is given by
<pre>
SELECT alldistinct select_list
FROM table_reference_list
optwhere
optgroup
opthaving
optunion
</pre>
Read the statement:
<pre>
SELECT [DISTINCT|ALL] expressions or *
FROM tables
[WHERE condition]
[GROUP BY group-expressions]
[HAVING aggregate-condition]
[union-clause]
[ORDER BY columns]
</pre>
as follows:
<pre>
1) Make all combinations of rows from the tables (FROM line)
2) Eliminate those combinations not satisfying condition (WHERE line)
3) (if GROUP present) form aggregate groups that match on group-expressions
4) (if HAVING present) eliminate aggregate groups that don't satisfy
the aggregate-condition.
5) compute the columns to keep (SELECT line).
6) (if union-clause present) combine (union, except, intersect)
the result with the result of another select statement.
7) if DISTINCT, throw out redundant entries.
8) (if ORDER present) order the result by the columns (ascending
or descending as specified, with precedence as listed).
</pre>
This reading has little to do with the actual implementation,
but the answer produced should match this intuitive reading.
<h3>Create and drop table</h3>
The create and drop table constructs
initialize and destroy a table structure, respectively.
<pre>
CREATE TABLE user_defined_name ( colelts )
DROP TABLE user_defined_name
</pre>
The colelts declare the names of the columns for
the table and their data types. The data types are
not checked or enforced in any way at this time.
<h3>Table mutations (INSERT, UPDATE, DELETE)</h3>
Insert, Update, and Delete statements insert rows
into tables, modify rows in tables in place, or
remove rows from tables respectively.
<pre>
INSERT INTO table_name optcolids insert_spec
DELETE FROM user_defined_name optwhere
UPDATE user_defined_name
SET assns
optwhere
</pre>
The insert statement has two variants (in this implementation)
INSERT sub-select and INSERT VALUES.
<pre>
insert into r (a,b,c) select a,b,c from s
insert into r (a,b,c) values (1,2,3)
</pre>
The first inserts
the result of a SELECT statement into the target table
and the other inserts explicit values (which may be dynamic
parameters, see below).
<P>
Cursor based updates are not supported at the SQL level,
eg
<pre>
update r set a=1 where current of curs
</pre>
is not supported.
<h3>Indices</h3>
The create and drop index statements initialize and
destroy index structures respectively.
<pre>
CREATE INDEX user_defined_name
ON user_defined_name
( namelist )
DROP INDEX user_defined_name
</pre>
Indices allow fast access to a table, based on values
for the indexed columns in the namelist.
<h3>Views</h3>
Create view and drop view statements initialize and
drop views, respectively.
<pre>
CREATE VIEW user_defined_name optnamelist
AS select_statement
DROP VIEW user_defined_name
</pre>
Views are "derived tables" which are defined
as stored SELECT statements. They can be used
as tables, except that they cannot be directly
mutated.
<h1>Conditions</h1>
Conditions are truth valued boolean expressions
formed from basic conditions possibly combined using
NOT, AND, OR (where NOT has highest precedence and
OR has lowest precedence) and parentheses.
<p>
Basic conditions include simple comparisons
<pre>
expression = expression
expression &lt; expression
expression &gt; expression
expression &lt;= expression
expression &gt;= expression
expression &lt;&gt; expression
</pre>
Variants of the simple comparisons are the quantified
subquery comparisons
<pre>
expression = ANY ( subquery )
expression = ALL ( subquery )
</pre>
(and similarly for the other comparison operators).
The IN predicate tests membership (like =ANY)
<pre>
expression IN ( subquery )
expression NOT IN ( subquery )
</pre>
For all the quantified comparisons and IN the
subquery must generate a single column table.
<p>
Also included are the the BETWEEN and NOT BETWEEN predicates
<pre>
expression BETWEEN expression AND expression
expression NOT BETWEEN expression AND expression
</pre>
<p>
The most general subquery predicate is EXISTS and NOT EXISTS
which places no restriction on the subquery:
<pre>
EXISTS (subquery)
NOT EXISTS (subquery)
</pre>
<h1>Expressions</h1>
Expressions occur in conditions (WHERE, HAVING, etc.),
in UPDATE searched assignments,
and in the select list of select statements.
<p>
Expressions are formed from primary expressions,
possibly combined using the standard arithmetic operators
and parentheses with the normal precedence.
<p>
Primary expressions include numeric and string literals.
Numeric literals supported are the Python numeric literals.
String constants are set off by apostrophies, where two
apostrophe's in sequence represent an apostrophy in the
string:
<pre>
'SQL string literals ain''t pretty'
</pre>
Column name expressions may be unqualified if they are
unambiguous, or may be qualified with a table name
or table alias
<pre>
bar
frequents.bar
f.bar
</pre>
The rules for scoping of column names are not covered
here. Column names in subqueries may refer to bindings
in the query (or queries) that contain the sub-query.
<p>
Subquery expressions of form
<pre>
( select_statement )
</pre>
must produce a single column and single row table.
<p>
Aggregate operations are only permitted in the select
list or in the HAVING condition of SELECT statements
(including subselects).
<pre>
COUNT(*)
COUNT(expression)
AVG(expression)
MAX(expression)
SUM(expression)
MIN(expression)
</pre>
<em><strong>and also including the non-standard extension MEDIAN
<pre>
MEDIAN(expression)
</pre>
</strong></em>
Aggregate operations can be applied to distinct values
as in
<pre>
COUNT(DISTINCT expression)
</pre>
The Dynamic expression "?" is a placeholder for a value
bound at evaluation time (from Python values). See the
<a href="gadfly.html">
API discussions </a>
(Use) for more details on the use of
dynamic parameters.
</body>
</html>
\ No newline at end of file
"""client access to gadfly server. (gfserve.py)
Imported as a module this module provides interfaces
that remotely access a gadfly database server.
Remote connections: gfclient
connection = gfclient.gfclient(
policy, # the name of the connection policy ["admin" for admin]
port, # the port number the server is running on
password,# the password of the policy
[machine]) # (optional) the machine where server is running
# (defaults to localhost)
methods for gfclient connections:
gfclient.checkpoint() checkpoint the server (fails silently
if connection is not "admin").
gfclient.restart() restart the server (fails silently
if connection is not "admin").
gfclient.shutdown() shutdown the server (fails silently
if connection is not "admin").
cursor = gfclient.cursor() returns a cursor on this connection
methods for cursor objects:
cursor.execute(statement, dynamic_parameters=None)
execute the statement with optional dynamic parameters.
Dynamic parameters can be a list of tuples for INSERT
VALUES statements, otherwise they must be a tuple
of values.
cursor.execute_prepared(name, dynamic_parameters=None)
execute a named statement configured for this connection
policy, with optional dynamic parameters. Dynamic
parameters permitted as for execute depending on the
statement the name refers to.
cursor.fetchall()
return results of the last executed statement
(None for non-queries, or list of tuples).
See gfstest.py for example usage.
SCRIPT INTERPRETATION:
Server maintenance utilities
COMMAND LINE:
python gfclient.py action port admin_password [machine]
TEST EXAMPLE:
python gfclient.py shutdown 2222 admin
action: one of
shutdown: shut down the server with no checkpoint
restart: restart the server (re-read the database and recover)
checkpoint: force a database checkpoint now
port: the port the server is running on
admin_password: the administrative password for the server
machine: [optional] the machine the server runs on.
"""
import gfsocket
def main():
import sys
try:
done=0
argv = sys.argv
[action, port, admin_password] = argv[1:4]
from string import atoi
port = atoi(port)
if len(argv)>4:
machine = argv[4]
else:
machine = None
print action, port, admin_password, machine
if action not in ["shutdown", "restart", "checkpoint"]:
print "bad action", action
print
return
dosimple(action, port, admin_password, machine)
done=1
finally:
if not done:
print __doc__
def dosimple(action, port, pw, machine=None):
import socket
if machine is None:
machine = socket.gethostname()
conn = gfclient("admin", port, pw, machine)
action = getattr(conn, action)
print action()
# copied from gfserve
# shut down the server (admin policy only)
# arguments = ()
# shutdown the server with no checkpoint
SHUTDOWN = "SHUTDOWN"
# restart the server (admin only)
# arguments = ()
# restart the server (recover)
# no checkpoint
RESTART = "RESTART"
# checkpoint the server (admin only)
# arguments = ()
# checkpoint the server
CHECKPOINT = "CHECKPOINT"
# exec prepared statement
# arguments = (prepared_name_string, dyn=None)
# execute the prepared statement with dynamic args.
# autocommit.
EXECUTE_PREPARED = "EXECUTE_PREPARED"
# exec any statement (only if not disabled)
# arguments = (statement_string, dyn=None)
# execute the statement with dynamic args.
# autocommit.
EXECUTE_STATEMENT = "EXECUTE_STATEMENT"
class gfclient:
closed = 0
def __init__(self, policy, port, password, machine=None):
import socket
self.policy = policy
self.port = port
self.password = password
if machine is None:
machine = socket.gethostname()
self.machine = machine
def open_connection(self):
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
#print type(sock), sock
sock.connect(self.machine, self.port)
return sock
def send_action(self, action, arguments, socket):
gfsocket.send_certified_action(
self.policy, action, arguments, self.password, socket)
def checkpoint(self):
return self.simple_action(CHECKPOINT)
def simple_action(self, action, args=()):
"""only valid for admin policy: force a server checkpoint"""
sock = self.open_connection()
self.send_action(action, args, sock)
data = gfsocket.recv_data(sock)
data = gfsocket.interpret_response(data)
return data
def restart(self):
"""only valid for admin policy: force a server restart"""
return self.simple_action(RESTART)
def shutdown(self):
"""only valid for admin policy: shut down the server"""
return self.simple_action(SHUTDOWN)
def close(self):
self.closed = 1
def commit(self):
# right now all actions autocommit
pass
# cannot rollback, autocommit on success
rollback = commit
def cursor(self):
"""return a cursor to this policy"""
if self.closed:
raise ValueError, "connection is closed"
return gfClientCursor(self)
class gfClientCursor:
statement = None
results = None
description = None
def __init__(self, connection):
self.connection = connection
# should add fetchone fetchmany
def fetchall(self):
return self.results
def execute(self, statement=None, params=None):
con = self.connection
data = con.simple_action(EXECUTE_STATEMENT, (statement, params))
(self.description, self.results) = data
def execute_prepared(self, name, params=None):
con = self.connection
data = con.simple_action(EXECUTE_PREPARED, (name, params))
if data is None:
self.description = self.results = None
else:
(self.description, self.results) = data
def setoutputsizes(self, *args):
pass # not implemented
def setinputsizes(self):
pass # not implemented
if __name__=="__main__":
main()
"""storage objects"""
verbosity = 0
### NOTE: current recovery semantics
### assumes NO transactions are active
### at commit time (log file is deleted at checkpoint)
### next step: make sure commit records are logged and stored
### correctly: get thru test suite.
# need to test recovery
# need to connect shadow stuff to usage
### idea: put in hook to use backupdb if multiple instances active
# what needs to be done:
# Add a Logfile object which knows how to
# do logging and recovery from the log.
#
# Integrate Logfile with filestorage/db0/Rel0
#
# Add relation_checkout to update ops.
#
# Add transaction semantics to db CURSORS.
#
# Add global active db log to gadfly main
#
# Test the recovery subsystem with all
# operation types.
# use whatever kjbuckets sqlsem is using
#from sqlsem import kjbuckets, maketuple
# error on checking of data integrity
StorageError = "StorageError"
# use md5 checksum (stub if md5 unavailable?)
def checksum(string):
from md5 import new
return new(string).digest()
def recursive_dump(data, prefix="["):
"""for debugging"""
from types import StringType
if type(data) is StringType:
print prefix, data
return
p2 = prefix+"["
try:
for x in data:
recursive_dump(x, p2)
except:
print prefix, data
def checksum_dump(data, file):
"""checksum and dump marshallable data to file"""
#print "checksum_dump", file
#recursive_dump(data)
from marshal import dumps, dump
storage = dumps(data)
checkpair = (checksum(storage), storage)
dump(checkpair, file)
def checksum_undump(file):
"""undump marshallable data from file, checksum"""
from marshal import load, loads
checkpair = load(file)
(check, storage) = checkpair
if checksum(storage)!=check:
raise StorageError, "data load checksum fails"
data = loads(storage)
return data
def backup_file(filename, backupname):
"""backup file, if unopenable ignore"""
try:
f = open(filename, "rb")
except:
return
data = f.read()
f.close()
f = open(backupname, "wb")
f.write(data)
f.close()
def del_file(filename):
"""delete file, ignore errors"""
from os import unlink
try:
unlink(filename)
except:
pass
class Database0:
"""quick and dirty in core database representation."""
# db.log is not None == use db.log to log modifications
# set for verbose prints
verbose = verbosity
# set for read only copy
readonly = 0
# set for temp/scratch db copy semantics
is_scratch = 0
def __init__(self, shadowing=None, log=None):
"""dictionary of relations."""
verbose = self.verbose
self.shadowing = shadowing
self.log = log
self.touched = 0
if log:
self.is_scratch = log.is_scratch
if shadowing and not log:
raise ValueError, "shadowing db requires log"
if verbose:
print "Database0 init"
if log:
log.verbose = 1
if shadowing:
# shadow structures of shadowed db
self.rels = shadow_dict(shadowing.rels, Relation0.unshadow)
self.datadefs = shadow_dict(shadowing.datadefs)
self.indices = shadow_dict(shadowing.indices)
else:
self.rels = {}
self.datadefs = {}
self.indices = {}
def reshadow(self, db, dblog):
"""(re)make self into shadow of db with dblog"""
self.shadowing = db
self.log = dblog
self.rels = shadow_dict(db.rels, Relation0.unshadow)
self.datadefs = shadow_dict(db.datadefs)
self.indices = shadow_dict(db.indices)
def clear(self):
"""I'm not sure if database has circular structure, so this added"""
self.shadowing = None
self.log = None
self.rels = {}
self.datadefs = {}
self.indices = {}
def commit(self):
"""commit shadowed changes"""
verbose = self.verbose
if self.shadowing and self.touched:
# log commit handled elsewhere
#log = self.log
#if log and not log.is_scratch:
#if verbose: print "committing log"
#self.log.commit(verbose)
if verbose: print "committing rels"
self.rels.commit(verbose)
if verbose: print "committing datadefs"
self.datadefs.commit(verbose)
if verbose: print "committing indices"
self.indices.commit(verbose)
st = self.shadowing.touched
if not st:
if verbose: "print setting touched", self.touched
self.shadowing.touched = self.touched
elif verbose:
print "shadowed database is touched"
elif verbose:
print "db0: commit on nonshadow instance"
def __setitem__(self, name, relation):
"""bind a name (uppercased) to tuples as a relation."""
from string import upper
if self.indices.has_key(name):
raise NameError, "cannot set index"
self.rels[ upper(name) ] = relation
if self.verbose: print "db0 sets rel", name
def add_index(self, name, index):
if self.rels.has_key(name):
raise NameError, `name`+": is relation"
self.indices[name] = index
if self.verbose: print "db0 sets index", name
def drop_index(self, name):
if self.verbose: print "db0 drops index", name
del self.indices[name]
def __getitem__(self, name):
if self.verbose: print "db0 gets rel", name
from string import upper
return self.rels[upper(name)]
def get_for_update(self, name):
"""note: does not imply updates, just possibility of them"""
verbose = self.verbose
if verbose: print "db0 gets rel for update", name
shadowing = self.shadowing
gotit = 0
from string import upper
name = upper(name)
rels = self.rels
if shadowing:
if rels.is_shadowed(name):
test = rels[name]
# do we really have a shadow or a db copy?
if test.is_shadow:
gotit = 1
if not gotit:
if shadowing.has_relation(name):
test = shadowing.get_for_update(name)
else:
# uncommitted whole relation
gotit = 1
else:
test = rels[name]
gotit = 1
if self.readonly:
raise ValueError, "cannot update, db is read only"
elif test.is_view:
raise ValueError, "VIEW %s cannot be updated" % name
elif shadowing and not gotit:
if verbose: print "db0: making shadow for", name
if test.is_shadow: return test
shadow = Relation0(())
shadow = shadow.shadow(test, self.log, name, self)
rels[name] = shadow
return shadow
else:
return test
def __delitem__(self, name):
if self.verbose: print "db0 drops rel", name
from string import upper
del self.rels[upper(name)]
def relations(self):
return self.rels.keys()
def has_relation(self, name):
return self.rels.has_key(name)
def getdatadefs(self):
result = self.datadefs.values()
# sort to make create tables first, eg
result.sort()
return result
def add_datadef(self, name, defn, logit=1):
"""only log the datadef if logit is set, else ignore redefinitions"""
dd = self.datadefs
if logit and dd.has_key(name):
raise KeyError, `name`+": already defined"
if logit:
self.touched = 1
dd[name] = defn
def has_datadef(self, name):
return self.datadefs.has_key(name)
def drop_datadef(self, name):
if self.verbose: print "db0 drops datadef",name
dd = self.datadefs
#print dd.keys()
if not dd.has_key(name):
raise KeyError, `name`+": no such element"
del dd[name]
def __repr__(self):
l = []
from string import join
l.append("INDICES: "+`self.indices.keys()`)
for (name, ddef) in self.datadefs.items():
l.append("data definition %s::\n%s" % (name, ddef))
for (name, rel) in self.rels.items():
l.append(name + ":")
l.append(rel.irepr())
return join(l, "\n\n")
def bindings(self, fromlist):
"""return (attdict, reldict, amb, ambatts) from fromlist = [(name,alias)...]
where reldict: alias --> tuplelist
attdict: attribute_name --> unique_relation
amb: dict of dottedname --> (rel, att)
ambatts: dict of ambiguous_name --> witness_alias
"""
from string import upper
rels = self.rels
ambiguous_atts = {}
ambiguous = {}
relseen = {}
attbindings = {}
relbindings = {}
for (name,alias) in fromlist:
name = upper(name)
alias = upper(alias)
if relseen.has_key(alias):
raise NameError, `alias` + ": bound twice in from list"
relseen[alias]=alias
try:
therel = rels[name]
except KeyError:
raise NameError, `name` + " no such relation in DB"
relbindings[alias] = therel
for attname in therel.attributes():
if not ambiguous_atts.has_key(attname):
if attbindings.has_key(attname):
oldrel = attbindings[attname]
oldbind = (oldrel, attname)
ambiguous[ "%s.%s" % oldbind] = oldbind
del attbindings[attname]
ambiguous_atts[attname]=alias
newbind = (alias, attname)
ambiguous[ "%s.%s" % newbind ] = newbind
else:
attbindings[attname] = alias
else:
newbind = (alias, attname)
ambiguous[ "%s.%s" % newbind ] = newbind
return (attbindings, relbindings, ambiguous, ambiguous_atts)
class File_Storage0:
"""quick and dirty file storage mechanism.
relation names in directory/dbname.gfd
contains a white separated list of relation names
relations in directory/relname.grl
contains sequence of marshalled tuples reps
prefixed by marshalled list of atts
"""
verbose = verbosity
def __init__(self, dbname, directory):
"""directory must exist."""
if self.verbose: print "fs0 init:", dbname, directory
self.dbname = dbname
self.directory = directory
self.relation_implementation = Relation0
self.recovery_mode = 0
def load(self, parser=None, forscratch=0):
# if logfile is present, need to recover
# error condition: fail to load relation, ddf, but no log file!
logfile = self.logfilename()
blogfile = self.backup_logfilename()
verbose = self.verbose
if verbose: print "fs0 load, checking", logfile
try:
testlog = open(logfile, "rb")
if verbose: print "fs0: opened", testlog
testlog.close()
testlog = open(blogfile, "rb")
testlog.close()
testlog = None
except:
recovery_mode = self.recovery_mode = 0
if verbose: print "recovery not needed"
else:
recovery_mode = self.recovery_mode = 1
if verbose: print "FS0 RECOVERY MODE LOAD!"
resultdb = Database0()
resultdb.is_scratch = forscratch
commands = self.get_initstatements()
#commands = parser.DoParse1(initstatements)
for command in commands:
if verbose: print "fs0 evals", command
command.relbind(resultdb)
command.eval()
for name in resultdb.relations():
if verbose: print "fs0 loads rel", name
rel = resultdb[name]
if rel.is_view:
# don't need to load views
continue
rel.set_empty()
try:
data = self.get_relation(name)
except StorageError, detail:
raise StorageError, "load failure %s: %s" % (name, detail)
attsin = tuple(data.attributes())
attsout = tuple(rel.attributes())
if attsin!=attsout:
raise StorageError, "rel %s: atts %s don't match %s" % (
name, attsin, attsout)
rel.add_tuples( data.rows() )
# in sync!
rel.touched = 0
# db in sync
resultdb.touched = 0
# do recovery, if needed
if recovery_mode:
if verbose: print "fs0 recovering from logfile", logfile
# restart the log file only if db is not scratch
restart = not forscratch
Log = DB_Logger(logfile, blogfile)
if verbose: Log.verbose=1
Log.recover(resultdb, restart)
# do a checkpoint
self.recovery_mode = 0
if restart and not forscratch:
Log.shutdown()
Log = None
del_file(logfile)
if verbose: print "FS0: dumping database"
self.dump(resultdb)
Log = resultdb.log = DB_Logger(logfile, blogfile)
Log.startup()
elif not forscratch:
Log = DB_Logger(logfile, blogfile)
Log.startup()
resultdb.log = Log
return resultdb
def relfilename(self, name):
return "%s/%s.grl" % (self.directory, name)
def backup_relfilename(self, name):
return "%s/%s.brl" % (self.directory, name)
def relfile(self, name, mode="rb"):
if self.recovery_mode:
return self.getfile_fallback(
self.backup_relfilename(name), self.relfilename(name), mode)
else:
name = self.relfilename(name)
return open(name, mode)
def getfile_fallback(self, first, second, mode):
try:
return open(first, mode)
except:
return open(second, mode)
def get_relation(self, name):
f = self.relfile(name, "rb")
rel = self.relation_implementation(())
try:
rel.load(f)
except StorageError:
if self.recovery_mode:
f = open(self.relfilename(name), "rb")
rel.load(f)
else:
raise StorageError, \
"fs: could not unpack backup rel file or rel file in recovery mode: "+name
return rel
def dbfilename(self):
return "%s/%s.gfd" % (self.directory, self.dbname)
def backup_dbfilename(self):
return "%s/%s.bfd" % (self.directory, self.dbname)
def logfilename(self):
return "%s/%s.gfl" % (self.directory, self.dbname)
def backup_logfilename(self):
return "%s/%s.glb" % (self.directory, self.dbname)
def get_initstat_file(self, mode):
if self.recovery_mode:
return self.getfile_fallback(
self.backup_dbfilename(), self.dbfilename(), mode)
else:
return open(self.dbfilename(), mode)
def get_initstatements(self):
f = self.get_initstat_file("rb")
if self.verbose:
print "init statement from file", f
try:
data = checksum_undump(f)
except StorageError:
if self.recovery_mode:
f = open(self.dbfilename, "rb")
data = checksum_undump(f)
else:
raise StorageError, \
"could not unpack ddf backup or ddf file in recovery mode: "+self.dbname
f.close()
from sqlsem import deserialize
stats = map(deserialize, data)
return stats
def dump(self, db):
"""perform a checkpoint (no active transactions!)"""
# db should be non-shadowing db
# first thing: back up the log
backup_file(self.logfilename(), self.backup_logfilename())
verbose = self.verbose
if verbose: print "fs0: checkpointing db"
if db.is_scratch or db.readonly:
# don't need to do anything.
if verbose: print "fs0: scratch or readonly, returning"
return
log = db.log
if log:
log.commit()
if verbose:
print "DEBUG LOG TRACE"
log.dump()
log.shutdown()
if db.touched:
if verbose: print "fs0: db touched, backing up ddf file"
backup_file(self.dbfilename(),
self.backup_dbfilename())
relations = db.relations()
for r in relations:
rel = db[r]
#print r
if rel.touched:
if verbose: print "fs0: backing up touched rel", r
backup_file(self.relfilename(r),
self.backup_relfilename(r))
for r in relations:
if verbose: print "fs0: dumping relations now"
self.dumprelation(r, db[r])
if verbose: print "fs0: dumping datadefs now"
self.dumpdatadefs(db)
# del of logfile signals successful commit.
if verbose: print "fs0: successful dump, deleting log file"
logfilename = self.logfilename()
blogfilename = self.backup_logfilename()
del_file(logfilename)
del_file(blogfilename)
if db.touched:
if verbose: print "fs0: deleting backup ddf file"
del_file(self.backup_dbfilename())
db.touched = 0
for r in relations:
rel = db[r]
if rel.touched:
if verbose: print "fs0: deleting rel backup", r
del_file(self.backup_relfilename(r))
rel.touched = 0
if verbose: print "fs0: restarting db log"
log = db.log = DB_Logger(logfilename, blogfilename)
log.startup()
if verbose: print "fs0: dump complete"
self.recovery_mode = 0
def dumprelation(self, name, rel, force=0):
"""set force to ignore the "touch" flag."""
# ignore self.backup_mode
if (force or rel.touched) and not rel.is_view:
fn = self.relfilename(name)
if self.verbose:
print "dumping touched rel", name, "to", fn
f = open(fn, "wb")
rel.dump(f)
def dumpdatadefs(self, db, force=0):
"""set force to ignore the touch flag"""
# ignore self.backup_mode
if not (force or db.touched): return
#from marshal import dump, dumps
fn = self.dbfilename()
f = open(fn, "wb")
datadefs = db.getdatadefs()
from sqlsem import serialize
datadefsd = map(serialize, datadefs)
#for (defn, ser) in map(None, datadefs, datadefsd):
#print defn
#print ser
#dumps(ser) ### debug test
checksum_dump(datadefsd, f)
f.close()
class Relation0:
"""quick and dirty in core relation representation.
self.tuples contains tuples or 0 if erased.
tuples must not move (to preserve indices)
unless indices regenerate.
"""
is_view = 0 # Relation0 is not a view
def __init__(self, attribute_names, tuples=None, filter=None):
from sqlsem import kjbuckets
self.indices = kjbuckets.kjGraph()
self.index_list = []
self.attribute_names = attribute_names
if tuples is None:
tuples = []
self.filter = filter
self.set_empty()
self.add_tuples(tuples)
# indices map attname --> indices containing att
# relation to shadow and log (if non-null)
self.log = None
self.name = None # anonymous by default
self.is_shadow = 0
self.touched = 0
def shadow(self, otherrelation, log, name, inshadowdb):
"""return structural replica of otherrelation (as self)
for non-updatable relation (eg, view) may return otherrelation"""
if otherrelation.is_view:
# for now, assume VIEWS CANNOT BE UPDATED
return otherrelation
self.is_shadow = 1
self.shadow_of_shadow = otherrelation.is_shadow
self.log = log
self.name = name
# don't make any updates permanent if set.
self.tuples = otherrelation.tuples[:]
self.attribute_names = otherrelation.attribute_names
self.filter = otherrelation.filter
for index in otherrelation.index_list:
copy = index.copy()
name = copy.name
self.add_index(copy, recordtuples=0)
# record in shadowdb, but don't log it
inshadowdb.add_index(name, copy)
#inshadowdb.add_datadef(name, copy, logit=0)
self.touched = otherrelation.touched
return self
def unshadow(self):
"""make self into a replacement for shadowed, return self."""
if self.is_shadow:
self.log = None
self.is_shadow = self.shadow_of_shadow
return self
def dump(self, file):
attributes = tuple(self.attributes())
rows = self.rows()
newrows = rows[:]
count = 0
tt = type
from types import IntType
for i in xrange(len(rows)):
this = rows[i]
if this is not None and tt(this) is not IntType:
newrows[count] = rows[i].dump(attributes)
count = count + 1
newrows = newrows[:count]
newrows.append(attributes)
checksum_dump(newrows, file)
def load(self, file):
"""checksum must succeed."""
rows = checksum_undump(file)
attributes = rows[-1]
self.attribute_names = attributes
rows = rows[:-1]
from sqlsem import kjbuckets
undump = kjbuckets.kjUndump
for i in xrange(len(rows)):
rows[i] = undump(attributes, rows[i])
self.set_empty()
self.add_tuples(rows)
# in sync with disk copy!
self.touched = 0
def add_index(self, index, recordtuples=1):
"""unset recordtuples if the index is initialized already."""
# does not "touch" the relation
index_list = self.index_list
indices = self.indices
atts = index.attributes()
for a in atts:
indices[a] = index
if recordtuples:
(tuples, seqnums) = self.rows(1)
index.clear()
if tuples:
index.add_tuples(tuples, seqnums)
index_list.append(index)
def drop_index(self, index):
# does not "touch" the relation
name = index.name
if verbosity:
print "rel.drop_index", index
print "...", self.indices, self.index_list
indices = self.indices
for a in index.attributes():
# contorted since one index be clone of the other.
aindices = indices.neighbors(a)
for ind in aindices:
if ind.name == name:
indices.delete_arc(a, ind)
theind = ind
# the (non-clone) index ought to have been found above...
self.index_list.remove(theind)
def choose_index(self, attributes):
"""choose an index including subset of attributes or None"""
from sqlsem import kjbuckets
kjSet = kjbuckets.kjSet
atts = kjSet(attributes)
#print "choosing index", atts
indices = (atts * self.indices).values()
choice = None
for index in indices:
indexatts = index.attributes()
#print "index atts", indexatts
iatts = kjSet(indexatts)
if iatts.subset(atts):
if choice is None:
#print "chosen", index.name
choice = index
else:
if len(choice.attributes())<len(indexatts):
choice = index
return choice
def __repr__(self):
rows = self.rows()
atts = self.attributes()
list_rep = [list(atts)]
for r in rows:
rlist = []
for a in atts:
try:
elt = r[a]
except KeyError:
elt = "NULL"
else:
elt = str(elt)
rlist.append(elt)
list_rep.append(rlist)
# compute maxen for formatting
maxen = [0] * len(atts)
for i in xrange(len(atts)):
for l in list_rep:
maxen[i] = max(maxen[i], len(l[i]))
for i in xrange(len(atts)):
mm = maxen[i]
for l in list_rep:
old = l[i]
l[i] = old + (" " * (mm-len(old)))
from string import join
for i in xrange(len(list_rep)):
list_rep[i] = join(list_rep[i], " | ")
first = list_rep[0]
list_rep.insert(1, "=" * len(first))
return join(list_rep, "\n")
def irepr(self):
List = [self] + list(self.index_list)
List = map(str, List)
from string import join
return join(List, "\n")
def set_empty(self):
self.tuples = []
for index in self.index_list:
index.clear()
def drop_indices(self, db):
for index in self.index_list:
name = index.name
db.drop_datadef(name)
db.drop_index(name)
self.index_list = []
from sqlsem import kjbuckets
self.indices = kjbuckets.kjGraph()
def regenerate_indices(self):
(tuples, seqnums) = self.rows(1)
#self.tuples = tuples
for index in self.index_list:
index.clear()
index.add_tuples(tuples, seqnums)
def add_tuples(self, tuples):
if not tuples: return
tuples = filter(self.filter, tuples)
oldtuples = self.tuples
first = len(oldtuples)
oldtuples[first:] = list(tuples)
last = len(oldtuples)
for index in self.index_list:
index.add_tuples(tuples, xrange(first,last))
self.touched = 1
def attributes(self):
return self.attribute_names
def rows(self, andseqnums=0):
tups = self.tuples
# short cut
if 0 not in tups:
if andseqnums:
return (tups, xrange(len(tups)))
else:
return tups
tt = type
from types import IntType
result = list(self.tuples)
if andseqnums: seqnums = result[:]
count = 0
for i in xrange(len(result)):
t = result[i]
if tt(t) is not IntType:
result[count] = t
if andseqnums: seqnums[count] = i
count = count+1
result = result[:count]
if andseqnums:
return (result, seqnums[:count])
else:
return result
def erase_tuples(self, seqnums):
if not seqnums: return
tups = self.tuples
# order important! indices first!
for index in self.index_list:
index.erase_tuples(seqnums, tups)
for i in seqnums:
tups[i] = 0
self.touched = 1
def reset_tuples(self, tups, seqnums):
# KISS for indices, maybe optimize someday...
if not tups: return
mytups = self.tuples
for index in self.index_list:
index.erase_tuples(seqnums, mytups)
for i in xrange(len(seqnums)):
seqnum = seqnums[i]
mytups[seqnum] = tups[i]
for index in self.index_list:
index.add_tuples(tups, seqnums)
self.touched = 1
# should views be here?
class View(Relation0):
"""view object, acts like relation, with addl operations."""
touched = 0
is_view = 1
is_shadow = 0
### must fix namelist!
def __init__(self, name, namelist, selection, indb):
"""set namelist to None for implicit namelist"""
self.name = name
self.namelist = namelist
self.selection = selection
# attempt a relbind, no outer bindings!
self.relbind(indb, {})
self.cached_rows = None
self.translate = None
def __repr__(self):
return "view %s as %s" % (self.name, self.selection)
irepr = __repr__
def uncache(self):
self.cached_rows = None
def UNDEFINED_OP_FOR_VIEW(*args, **kw):
raise ValueError, "operation explicitly undefined for view object"
shadow = dump = load = add_index = drop_index = set_empty = \
add_tuples = erase_tuples = reset_tuples = UNDEFINED_OP_FOR_VIEW
def ignore_op_for_view(*args, **kw):
"""ignore this op when applied to view"""
pass
drop_indices = regenerate_indices = ignore_op_for_view
def choose_index(s, a):
"""no indices on views (might change this?)"""
return None
def relbind(self, db, atts):
"""bind self to db, ignore atts"""
name = self.name
selection = self.selection
selection = self.selection = selection.relbind(db)
namelist = self.namelist
if namelist is not None:
from sqlsem import kjbuckets
target_atts = selection.attributes()
if len(namelist)!=len(target_atts):
raise "select list and namelist don't match in %s"%name
pairs = map(None, namelist, target_atts)
self.translate = kjbuckets.kjGraph(pairs)
return self
def attributes(self):
namelist = self.namelist
if self.namelist is None:
return self.selection.attributes()
return namelist
def rows(self, andseqs=0):
cached_rows = self.cached_rows
if cached_rows is None:
cached_rows = self.cached_rows = self.selection.eval().rows()
if self.namelist is not None:
# translate the attribute names
translate = self.translate
for i in range(len(cached_rows)):
cached_rows[i] = cached_rows[i].remap(translate)
if andseqs:
return (cached_rows[:], range(len(cached_rows)))
else:
return cached_rows[:]
class Index:
"""Index for tuples in relation. Tightly bound to relation rep."""
### should add "unique index" and check enforce uniqueness...
def __init__(self, name, attributes):
self.name = name
self.atts = tuple(attributes)
# values --> tuples
self.index = {}
self.dseqnums = {}
def __repr__(self):
return "index %s on %s" % (self.name, self.atts)
def copy(self):
"""make a fast structural copy of self"""
result = Index(self.name, self.atts)
rindex = result.index
rdseqnums = result.dseqnums
myindex = self.index
mydseqnums = self.dseqnums
for k in myindex.keys():
rindex[k] = myindex[k][:]
for k in mydseqnums.keys():
rdseqnums[k] = mydseqnums[k][:]
return result
def attributes(self):
return self.atts
def matches(self, tuple, translate=None):
"""return (tuples, seqnums) for tuples matching tuple
(with possible translations"""
if translate:
tuple = translate * tuple
atts = self.atts
dump = tuple.dump(atts)
index = self.index
if index.has_key(dump):
return (index[dump], self.dseqnums[dump])
else:
return ((), ())
def clear(self):
self.index = {}
self.dseqnums = {}
def add_tuples(self, tuples, seqnums):
atts = self.atts
index = self.index
dseqnums = self.dseqnums
test = index.has_key
for i in xrange(len(tuples)):
tup = tuples[i]
seqnum = seqnums[i]
dump = tup.dump(atts)
#print self.name, dump
if test(dump):
index[dump].append(tup)
dseqnums[dump].append(seqnum)
else:
index[dump] = [tup]
dseqnums[dump] = [seqnum]
def erase_tuples(self, seqnums, all_tuples):
# all_tuples must be internal rel tuple list
atts = self.atts
index = self.index
dseqnums = self.dseqnums
for seqnum in seqnums:
tup = all_tuples[seqnum]
dump = tup.dump(atts)
index[dump].remove(tup)
dseqnums[dump].remove(seqnum)
class shadow_dict:
"""shadow dictionary. defer & remember updates."""
verbose = verbosity
def __init__(self, shadowing, value_transform=None):
self.shadowed = shadowing
shadow = self.shadow = {}
self.touched = {}
for key in shadowing.keys():
shadow[key] = shadowing[key]
self.value_transform = value_transform
# defeats inheritance! careful!
self.values = shadow.values
self.items = shadow.items
self.keys = shadow.keys
self.has_key = shadow.has_key
def is_shadowed(self, name):
return self.touched.has_key(name)
def __len__(self):
return len(self.shadow)
def commit(self, verbose=0):
"""apply updates to shadowed."""
import sys
verbose = verbose or self.verbose
if self.touched:
shadowed = self.shadowed
shadow = self.shadow
value_transform = self.value_transform
keys = shadowed.keys()
if verbose:
print "shadowdict oldkeys", keys
for k in keys:
del shadowed[k]
keys = shadow.keys()
if verbose:
print "shadowdict newkeys", keys
for k in shadow.keys():
value = shadow[k]
if value_transform is not None:
try:
value = value_transform(value)
except:
raise "transform fails", (sys.exc_type, sys.exc_value, k, value)
shadowed[k] = value
self.touched = {}
def __getitem__(self, key):
return self.shadow[key]
def __setitem__(self, key, item):
from types import StringType
if type(key) is not StringType:
raise "nonstring", key
if item is None:
raise "none set", (key, item)
self.touched[key] = 1
self.shadow[key] = item
def __delitem__(self, key):
self.touched[key] = 1
del self.shadow[key]
# stored mutations on relations
class Add_Tuples:
"""stored rel.add_tuples(tuples)"""
def __init__(self, name):
self.to_rel = name
self.indb = None
def initargs(self):
return (self.to_rel,)
def set_data(self, tuples, rel):
"""store self.data as tuple with tuple[-1] as to_rel, rest data"""
attributes = tuple(rel.attributes())
ltuples = len(tuples)
data = list(tuples)
for i in xrange(ltuples):
tdata = tuples[i].dump(attributes)
data[i] = tdata
self.data = tuple(data)
def __repr__(self):
from string import join
datarep = map(repr, self.data)
datarep = join(datarep, "\n ")
return "add tuples to %s\n %s\n\n" % (self.to_rel, datarep)
def marshaldata(self):
return self.data
def demarshal(self, data):
self.data = data
def relbind(self, db):
self.indb = db
def eval(self, dyn=None):
"""apply operation to db"""
db = self.indb
data = self.data
name = self.to_rel
rel = db[name]
attributes = tuple(rel.attributes())
tuples = list(data)
from sqlsem import kjbuckets
undump = kjbuckets.kjUndump
for i in xrange(len(tuples)):
tuples[i] = undump(attributes, tuples[i])
rel.add_tuples(tuples)
class Erase_Tuples(Add_Tuples):
"""stored rel.erase_tuples(seqnums)"""
def set_data(self, seqnums, rel):
seqnums = list(seqnums)
self.data = tuple(seqnums)
def __repr__(self):
return "Erase seqnums in %s\n %s\n\n" % (self.to_rel, self.data)
def eval(self, dyn=None):
db = self.indb
seqnums = self.data
name = self.to_rel
rel = db[name]
rel.erase_tuples(seqnums)
class Reset_Tuples(Add_Tuples):
"""stored rel.reset_tuples(tups, seqnums)"""
def set_data(self, tups, seqnums, rel):
attributes = tuple(rel.attributes())
dtups = list(tups)
for i in xrange(len(dtups)):
dtups[i] = dtups[i].dump(attributes)
self.data = (tuple(dtups), tuple(seqnums))
def __repr__(self):
(dtups, seqnums) = self.data
pairs = map(None, seqnums, dtups)
from string import join
datarep = map(repr, pairs)
datarep = join(datarep, " \n")
return "Reset tuples in %s\n %s\n\n" % (self.to_rel, datarep)
def eval(self, dyn=None):
db = self.indb
(dtups, seqnums) = self.data
tups = list(dtups)
rel = db[self.to_rel]
attributes = tuple(rel.attributes())
from sqlsem import kjbuckets
undump = kjbuckets.kjUnDump
for i in xrange(len(dtups)):
tups[i] = undump(attributes, dtups[i])
rel.reset_tuples(tups, seqnums)
# Log entry tags
START = "START"
COMMIT = "COMMIT"
ABORT = "ABORT"
UNREADABLE = "UNREADABLE"
class Transaction_Logger:
"""quick and dirty Log implementation per transaction."""
verbose = verbosity
def __init__(self, db_log, transactionid, is_scratch=0):
self.db_log = db_log
self.transactionid = transactionid
# ignore all operations if set
self.is_scratch = is_scratch
self.dirty = 0
self.deferred = []
def reset(self):
self.deferred = []
def __repr__(self):
return "Transaction_Logger(%s, %s, %s)" % (
self.db_log, self.transactionid, self.is_scratch)
def log(self, operation):
verbose = self.verbose
tid = self.transactionid
if not self.is_scratch:
self.deferred.append(operation)
if verbose:
print "tid logs", tid, operation
def flush(self):
if not self.is_scratch:
tid = self.transactionid
deferred = self.deferred
self.deferred = []
db_log = self.db_log
if db_log:
for operation in deferred:
db_log.log(operation, tid)
self.dirty = 1
elif verbose:
print "scratch log ignored", tid, operation
def commit(self, verbose=0):
verbose = self.verbose or verbose
tid = self.transactionid
if verbose: print "committing trans log", tid
if self.is_scratch:
if verbose:
print "scratch commit ignored", tid
return
if not self.dirty:
if verbose:
print "nondirty commit", tid
return
self.flush()
db_log = self.db_log
db_log.commit(verbose, tid)
if verbose:
print "transaction is considered recoverable", tid
class DB_Logger:
"""quick and dirty global db logger."""
verbose = verbosity
is_scratch = 0
def __init__(self, filename, backupname):
self.filename = filename
# backup name is never kept open: existence indicates log in use.
self.backupname = backupname
self.file = None
self.dirty = 0
if self.verbose:
print id(self), "created DB_Logger on", self.filename
def __repr__(self):
return "DB_Logger(%s)" % self.filename
def startup(self):
if self.verbose:
print id(self), "preparing", self.filename
# open happens automagically
#self.file = open(self.filename, "wb")
self.clear()
self.dirty = 0
def shutdown(self):
if self.verbose:
print id(self), "shutting down log", self.filename
file = self.file
if file:
file.close()
self.file = None
def clear(self):
if self.verbose:
print id(self), "clearing"
self.shutdown()
del_file(self.filename)
def restart(self):
if self.verbose:
print id(self), "restarting log file", self.filename
if self.file is not None:
self.file.close()
self.file = open(self.filename, "ab")
dummy = open(self.backupname, "ab")
dummy.close()
self.dirty = 0
def clear_log_file(self):
if self.verbose:
print id(self), "clearing logfile", self.filename
if self.file is not None:
self.file.close()
self.file = None
del_file(self.filename)
del_file(self.backupname)
self.dirty = 0
def log(self, operation, transactionid=None):
"""transactionid of None means no transaction: immediate."""
file = self.file
if file is None:
self.restart()
file = self.file
verbose = self.verbose
from sqlsem import serialize
serial = serialize(operation)
data = (transactionid, serial)
if verbose:
print id(self), "logging:", transactionid
print operation
checksum_dump(data, file)
self.dirty = 1
def commit(self, verbose=0, transactionid=None):
"""add commit, if appropriate, flush."""
verbose = self.verbose or verbose
if not self.dirty and transactionid is None:
if verbose: print "commit not needed", transactionid
return
elif verbose:
print "attempting commit", transactionid
if transactionid is not None:
self.log( COMMIT, transactionid )
if verbose: print "committed", transactionid
if verbose: print "flushing", self.filename
self.file.flush()
self.dirty = 0
def recover(self, db, restart=1):
import sys
verbose = self.verbose
filename = self.filename
if verbose:
print "attempting recovery from", self.filename
file = self.file
if file is not None:
if verbose: print "closing file"
self.file.close()
self.file = None
if verbose:
print "opens should generate an error if no recovery needed"
try:
file = open(filename, "rb")
file2 = open(self.backupname, "rb")
except:
if verbose:
print "no recovery needed:", filename
print sys.exc_type, sys.exc_value
sys.exc_traceback = None
return
file2.close()
if verbose: print "log found, recovering from", filename
records = self.read_records(file)
if verbose: print "scan for commit records"
commits = {}
for (i, (tid, op)) in records:
if op==COMMIT:
if verbose: print "transaction", tid, "commit at", i
commits[tid] = i
elif verbose:
print i, tid, "operation\n", op
if verbose: print commits, "commits total"
if verbose: print "applying commited operations, in order"
committed = commits.has_key
from types import StringType
for (i, (tid, op)) in records:
if tid is None or (committed(tid) and commits[tid]>i):
if type(op) is StringType:
if verbose:
print "skipping marker", tid, op
if verbose:
print "executing for", tid, i
print op
op.relbind(db)
op.eval()
elif verbose:
print "uncommitted operation", tid, i
op
if verbose:
print "recovery successful: clearing log file"
self.clear()
if restart:
if verbose:
print "recreating empty log file"
self.startup()
def read_records(self, file):
"""return log record as (index, (tid, op)) list"""
verbose = self.verbose
if verbose: print "reading log records to error"
import sys
records = {}
from sqlsem import deserialize
count = 0
while 1:
try:
data = checksum_undump(file)
except:
if verbose:
print "record read terminated with error", len(records)
print sys.exc_type, sys.exc_value
break
(transactionid, serial) = data
operation = deserialize(serial)
records[count] = (transactionid, operation)
if verbose:
print count, ": read for", transactionid
print operation
count = count+1
if verbose: print len(records), "records total"
records = records.items()
records.sort()
return records
def dump(self):
verbose = self.verbose
self.shutdown()
print "dumping log"
self.verbose = 1
try:
file = open(self.filename, "rb")
except:
print "DUMP FAILED, cannot open", self.filename
else:
self.read_records(file)
self.verbose = verbose
self.restart()
\ No newline at end of file
#!/usr/local/bin/python
"""build the sql grammar.
usage
python <thismodule>
for a simple install or
python <thismodule> force
for a full rebuild (with grammar regeneration).
In the current directory find or create sql.mar and sqlwhere.py
where sql.mar has the marshalled grammar data structures
for parsing sql and sqlwhere.py is a module that indicates
where the grammar file is as value of sqlwhere.filename.
"""
marfile = "sql.mar"
modfile = "sqlwhere.py"
print __doc__
from os import getcwd
cwd = getcwd()
modtemplate ="""
'''this module indicates where the sql datastructures are marshalled
Auto generated on install: better not touch!
'''
filename = '%s'
"""
wheremod = cwd + "/" + modfile
where = cwd + "/" + marfile
print
print "now creating", wheremod
f = open(wheremod, "w")
f.write( modtemplate % (where,) )
f.close()
from sqlgen import BuildSQL, getSQL
import sys
argv = sys.argv
force = 0
#print argv
if len(argv)>1 and argv[1]=="force":
force = 1
if not force:
try:
sql = getSQL()
except:
print "exception", sys.exc_type, sys.exc_value
print "during load of SQL grammar structures."
print "Apparently the SQL grammar requires regeneration"
force = 1
if force:
print "now generating parser structures (this might take a while)..."
where = cwd + "/" + marfile
print "building in", where
sql = BuildSQL(cwd + "/" + marfile)
print
print "done."
<HTML>
<HEAD>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=windows-1252">
<META NAME="Generator" CONTENT="Microsoft Word 97">
<TITLE>Gadfly: recovery</TITLE>
<META NAME="Template" CONTENT="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
</HEAD>
<BODY LINK="#0000ff" VLINK="#800080" BGCOLOR="#ff5555">
<H1>Gadfly Recovery</H1>
<P>In the event of a software glitch or crash Gadfly may terminate without having stored committed updates.
A recovery strategy attempts to make sure
that the unapplied commited updates are applied when the database restarts.
It is always assumed that there is only one primary (server) process controlling the database (possibly with
multiple clients). </P>
<P>Gadfly uses a simple LOG with deferred updates recovery mechanism. Recovery should be possible in the
presence of non-disk failures (server crash, system crash). Recovery after a disk crash is not available
for Gadfly as yet, sorry. </P>
<P>Due to portability problems Gadfly does not prevent multiple processes from "controlling" the database at
once. For read only access multiple instances are not a problem, but for access with modification, the processes
may collide and corrupt the database. For a read-write database, make sure only one (server) process controls
the database at any given time. </P>
<P>The only concurrency control mechanism that provides serializability for Gadfly as yet is the trivial one --
the server serves all clients serially. This will likely change for some variant of the system at some point. </P>
<P>This section explains the basic recovery mechanism. </P>
<H1>Normal operation</H1>
<H3>Precommit</H3>
<P>During normal operations any active tables are in memory in the process.
Uncommitted updates for a transaction are kept in "shadow tables" until the transaction commits using
<pre>
connection.commit()
</pre>
The shadow tables remember the mutations that have been applied to them. The permanent table copies
are only modified after commit time. A commit commits all updates for all cursors for the connection.
Unless the autocommit feature is disabled (see below) a
commit normally always triggers a checkpoint too.</P>
A rollback
<pre>
connection.rollback()
</pre>
explicitly discards all uncommitted updates and restores the connection to the previously
committed state.</p>
<P>There is a 3rd level of shadowing for statement sequences executed by a cursor.
In particular the design attempts to make sure that if
<pre>
cursor.execute(statement)
</pre>
fails with an error, then the shadow database will contain no updates from
the partially executed statement (which may be a sequence of statements)
but will reflect other completed updates that may have not been committed.
<H3>Commit</H3>
<P>At commit, operations applied to shadow tables are written
out in order of application to a log file before being permanently
applied to the active database. Finally a commit record is written to
the log and the log is flushed. At this point the transaction is considered
committed and recoverable, and a new transaction begins.
Finally the values of the shadow tables replace
the values of the permanent tables in the active database,
(but not in the database disk files until checkpoint, if autocheckpoint
is disabled). </P>
<H3>Checkpoint</H3>
<P>A checkpoint operation brings the persistent copies of the tables on
disk in sync with the in-memory copies in the active database. Checkpoints
occur at server shut down or periodically during server operation.
The checkpoint operation runs in isolation (with no database access
allowed during checkpoint). </P>
<p><em>Note: database connections normally run a checkpoint
after every commit, unless you set
<pre>
connection.autocheckpoint = 0
</pre>
which asks that checkpoints be done explicitly by the program using
<pre>
connection.commit() # if appropriate
connection.checkpoint()
</pre>
Explicit checkpoints should make the database perform better,
since the disk files are written less frequently, but
in order to prevent unneeded (possibly time consuming)
recovery operations after a database
is shutdown and restarted it is important to always execute an explicit
checkpoint at server shutdown, and periodically during long server
runs.</em></p>
<p><strong>Note that if any outstanding operations are uncommitted
at the time of a checkpoint (when autocheckpoint is disabled) the
updates will be lost (ie, it is equivalent to a rollback).
</strong></p>
<P>At checkpoint the old persistent value of each table that has been updated since
the last checkpoint is copied to a back up file, and the currently active value is
written to the permanent table file. Finally if the data definitions have changed
the old definitions are stored to a backup file and the new definitions are written
to the permanent data definition file. To signal successful checkpoint the
log file is then deleted.</P>
<P>
At this point (after log deletion) the database is considered
quiescent (no recovery required). Finally all back up table files are deleted.
[Note, it might be good to keep old logs around... Comments?] </P>
<P>Each table file representation is annotated with a checksum,
so the recovery system can check that the file was stored correctly. </P>
<H1>Recovery</H1>
<P>When a database restarts it automatically determines whether
the last active instance shut down normally and whether recovery
is required. Gadfly discovers the need for recovery by detecting
a non-empty current log file. </P>
<P>To recover the system Gadfly first scans the log file to determine committed transactions.
Then Gadfly rescans the log file applying the operations of committed
transactions to the in memory table values in the order recorded.
When reading in table values for the purpose of recovery Gadfly looks
for a backup file for the table first. If the backup is not corrupt,
its value is used, otherwise the permanent table file is used. </P>
<P>After recovery Gadfly runs a normal checkpoint before resuming
normal operation. </P>
<p>
<strong>
Please note: Although I have attempted to provide a robust
implementation
for this software I do not guarantee its correctness. I hope
it will work well for you but I do not assume any legal
responsibility for problems anyone may have during use
of these programs.
</strong>
</BODY>
</HTML>
"""gadfly server mode
script usage
python gfserve.py port database directory password [startup]
test example
python gfserve.py 2222 test dbtest admin gfstest
port is the port to listen to
database is the database to start up. (must exist!)
directory is the directory the database is in.
password is the administrative access password.
startup if present should be the name of a module to use
for startup. The Startup module must contain a function
Dict = startup(admin_policy, connection, Server_instance)
which performs any startup actions on the database needed
and returns either None or a Dictionary of
name --> policy objects
where the policy objects describe policies beyond the
admin policy. The startup function may also
modify the admin_policy (disabling queries for example).
The arguments passed to startup are:
admin_policy: the administrative policy
eg you could turn queries off for admin, using admin
only for server maintenance, or you could add prepared
queries to the admin_policy.
connection: the database connection
eg you could perform some inserts before server start
also needed to make policies.
Server_instance
Included for additional customization.
Create policies using
P = gfserve.Policy(name, password, connection, queries=0)
-- for a "secure" policy with only prepared queries allowed,
or
P = gfserve.Policy(name, password, connection, queries=1)
-- for a policy with full access arbitrary statement
execution.
add a "named prepared statement" to a policy using
P[name] = statement
for example
P["updatenorm"] = '''
update frequents
set bar=?, perweek=?
where drinker='norm'
'''
in this case 'updatenorm' requires 2 dynamic parameters when
invoked from a client.
Script stdout lists server logging information.
Some server administration services (eg shutdown)
are implemented by the script interpretion of gfclient.py.
"""
import socket, gadfly
from gfsocket import \
reply_exception, reply_success, Packet_Reader, certify
def main():
"""start up the server."""
import sys
try:
done = 0
argv = sys.argv
nargs = len(argv)
#print nargs, argv
if nargs<5:
sys.stderr.write("gfserve: not enough arguments: %s\n\n" % argv)
sys.stderr.write(__doc__)
return
[port, db, dr, pw] = argv[1:5]
print "gfserve startup port=%s db=%s, dr=%s password omitted" % (
port, db, dr)
from string import atoi
port = atoi(port)
startup = None
if nargs>5:
startup = argv[5]
print "gfserve: load startup module %s" % startup
S = Server(port, db, dr, pw, startup)
S.init()
print "gfserve: server initialized, setting stderr=stdout"
sys.stderr = sys.stdout
print "gfserve: starting the server"
S.start()
done = 1
finally:
if not done:
print __doc__
# general error
ServerError = "ServerError"
# no such prepared name
PreparedNameError = "PreparedNameError"
# actions
# shut down the server (admin policy only)
# arguments = ()
# shutdown the server with no checkpoint
SHUTDOWN = "SHUTDOWN"
# restart the server (admin only)
# arguments = ()
# restart the server (recover)
# no checkpoint
RESTART = "RESTART"
# checkpoint the server (admin only)
# arguments = ()
# checkpoint the server
CHECKPOINT = "CHECKPOINT"
# exec prepared statement
# arguments = (prepared_name_string, dyn=None)
# execute the prepared statement with dynamic args.
# autocommit.
EXECUTE_PREPARED = "EXECUTE_PREPARED"
# exec any statement (only if not disabled)
# arguments = (statement_string, dyn=None)
# execute the statement with dynamic args.
# autocommit.
EXECUTE_STATEMENT = "EXECUTE_STATEMENT"
ACTIONS = [SHUTDOWN, RESTART, CHECKPOINT,
EXECUTE_PREPARED, EXECUTE_STATEMENT]
class Server:
"""database server: listen for commands"""
verbose = 1
# wait X minutes on each server loop
select_timeout = 60*5
# do a checkpoint each X times thru server loop
check_loop = 5
# for now works like finger/http
# == each command is a separate connection.
# all sql commands constitute separate transactions
# which are automatically committed upon success.
# for now commands come in as
# 1 length (marshalled int)
# 2 (password, data) (marshalled tuple)
# responses come back as
# 1 length (marshalled int)
# 2 results (marshalled value)
def __init__(self, port, db, dr, pw, startup=None):
self.port = port
self.db = db
self.dr = dr
self.pw = pw
self.startup = startup
self.connection = None
self.socket = None
# prepared cursors dictionary.
self.cursors = {}
self.policies = {}
self.admin_policy = None
def start(self):
"""after init, listen for commands."""
from gfsocket import READY, ERROR, unpack_certified_data
import sys
verbose = self.verbose
socket = self.socket
connection = self.connection
policies = self.policies
admin_policy = self.admin_policy
from select import select
pending_connects = {}
while 1:
try:
# main loop
if self.check_loop<0: self.check_loop=5
for i in xrange(self.check_loop):
if verbose:
print "main loop on", socket, connection
# checkpoint loop
sockets = [socket]
if pending_connects:
sockets = sockets + pending_connects.keys()
# wait for availability
if verbose:
print "server: waiting for connection(s)"
(readables, dummy, errors) = select(\
sockets, [], sockets[:], self.select_timeout)
if socket in errors:
raise ServerError, \
"listening socket in error state: aborting"
# clean up error connection sockets
for s in errors:
del pending_connects[s]
s.close()
# get a new connection, if available
if socket in readables:
readables.remove(socket)
(conn, addr) = socket.accept()
if 1 or verbose:
print "connect %s" % (addr,)
reader = Packet_Reader(conn)
pending_connects[conn] = reader
# poll readable pending connections, if possible
for conn in readables:
reader = pending_connects[conn]
mode = reader.mode
if not mode==READY:
if mode == ERROR:
# shouldn't happen
try:
conn.close()
del pending_connects[conn]
except: pass
continue
else:
try:
reader.poll()
finally:
pass # AFTER DEBUG CHANGE THIS!
# in blocking mode, service ready request,
# commit on no error
for conn in pending_connects.keys():
reader = pending_connects[conn]
mode = reader.mode
if mode == ERROR:
try:
del pending_connects[conn]
conn.close()
except: pass
elif mode == READY:
try:
del pending_connects[conn]
data = reader.data
(actor_name, cert, md) = \
unpack_certified_data(data)
# find the policy for this actor
if not policies.has_key(actor_name):
if verbose:
print "no such policy: "+actor_name
reply_exception(NameError,
"no such policy: "+actor_name, conn)
policy = None
else:
if verbose:
print "executing for", actor_name
policy = policies[actor_name]
policy.action(cert, md, conn)
except SHUTDOWN:
if policy is admin_policy:
print \
"shutdown on admin policy: terminating"
connection.close()
socket.close()
# NORMAL TERMINATION:
return
except RESTART:
if policy is admin_policy:
print \
"restart from admin policy: restarting connection"
connection.restart()
except CHECKPOINT:
if policy is admin_policy:
print \
"checkpoint from admin policy: checkpointing now."
connection.checkpoint()
except:
tb = sys.exc_traceback
info = "%s %s" % (sys.exc_type,
str(sys.exc_value))
if verbose:
from traceback import print_tb
print_tb(tb)
print "error in executing action: "+info
reply_exception(
ServerError, "exception: "+info, conn)
#break # stop after first request serviced!
except:
# except of main while 1 try statement
tb = sys.exc_traceback
ty = sys.exc_type
va = sys.exc_value
print "UNEXPECTED EXCEPTION ON MAINLOOP"
from traceback import print_tb
print_tb(tb)
print "exception:", ty, va
if not pending_connects:
pending_connects = {}
print "server: checkpointing"
connection.checkpoint()
def init(self):
self.getconnection()
self.startup_load()
# get socket last in case of failure earlier
self.getsocket()
HOST = ""
BACKLOG = 5
def getsocket(self):
"""get the listening socket"""
verbose = self.verbose
import socket, sys
if verbose:
print "initializing listener socket"
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
if verbose:
print "trying to set REUSEADDR",\
sock.getsockopt(socket.SOL_SOCKET,
socket.SO_REUSEADDR)
sock.setsockopt(socket.SOL_SOCKET,
socket.SO_REUSEADDR, 1)
except:
if verbose:
print "set of REUSEADDR failed", sys.exc_type, sys.exc_value
pass
sock.bind(self.HOST, self.port)
sock.listen(self.BACKLOG)
self.socket = sock
return sock
def getconnection(self):
"""get the db connection"""
from gadfly import gadfly
c = self.connection = gadfly(self.db, self.dr)
# don't automatically checkpoint upon commit
c.autocheckpoint = 0
def startup_load(self):
"""setup the policies and load startup module"""
admin_policy = self.get_admin_policy()
module_name = self.startup
if module_name:
module = __import__(module_name)
# startup(admin_policy, connection, Server_instance)
test = module.startup(admin_policy, self.connection, self)
if test is not None:
self.policies = test
self.policies["admin"] = admin_policy
def get_admin_policy(self):
"""return the admin policy for priviledged access."""
p = self.admin_policy = Policy(
"admin", self.pw, self.connection, queries=1)
return p
class Policy:
"""security policy"""
verbose = 0
# allow arbitrary sql statments
general_queries = 0
# dictionary of named accesses as strings
named_accesses = None
# dictionary of prepared named accesses
prepared_cursors = None
def __init__(self, name, password, connection, queries=0):
"""create a policy (name, password, connection)
name is the name of the policy
password is the access policy (None for no password)
connection is the database connection.
set queries to allow general accesses (unrestricted)
"""
if self.verbose:
print "policy.__init__", name
self.general_queries = queries
self.name = name
self.password = password
self.connection = connection
self.socket = None
self.named_accesses = {}
self.prepared_cursors = {}
def __setitem__(self, name, value):
if self.verbose:
print "policy", self.name, ":", (name, value)
from types import StringType
if type(name) is not StringType or type(value) is not StringType:
raise ValueError, "cursor names and contents must be strings"
self.named_accesses[name] = value
def execute_named(self, name, params=None):
"""execute a named (prepared) sql statement"""
if self.verbose:
print "policy", self.name, "executes", name, params
na = self.named_accesses
pc = self.prepared_cursors
con = self.connection
if not na.has_key(name):
raise PreparedNameError, "unknown access name: %s" % name
stat = na[name]
if pc.has_key(name):
# get prepared query
cursor = pc[name]
else:
# prepare a new cursor
pc[name] = cursor = con.cursor()
return self.execute(cursor, stat, params)
def execute(self, cursor, statement, params=None):
"""execute a statement in a cursor"""
if self.verbose:
print "policy", self.name, "executes", statement, params
cursor.execute(statement, params)
# immediate commit!
self.connection.commit()
try:
result = cursor.fetchall()
description = cursor.description
result = (description, result)
except:
result = None
return result
def execute_any_statement(self, statement, params=None):
"""execute any statement."""
if self.verbose:
print "policy", self.name, "executes", statement, params
con = self.connection
cursor = con.cursor()
return self.execute(cursor, statement, params)
def action(self, certificate, datastring, socket):
"""perform a database/server action after checking certificate"""
verbose = self.verbose
if verbose:
print "policy", self.name, "action..."
# make sure the certificate checks out
if not self.certify(datastring, certificate, self.password):
raise ServerError, "password certification failure"
# unpack the datastring
from marshal import loads
test = loads(datastring)
#if verbose:
#print "data is", test
(action, moredata) = test
import sys
if action in ACTIONS:
action = "policy_"+action
myaction = getattr(self, action)
try:
data = apply(myaction, moredata+(socket,))
#self.reply_success(data)
# pass up server level requests as exceptions
except SHUTDOWN, detail:
raise SHUTDOWN, detail
except RESTART, detail:
raise RESTART, detail
except CHECKPOINT, detail:
raise CHECKPOINT, detail
except:
tb = sys.exc_traceback
exceptiondata = "%s\n%s" %(sys.exc_type,
str(sys.exc_value))
if verbose:
from traceback import print_tb
print_tb(tb)
self.reply_exception(ServerError,
"unexpected exception: "+exceptiondata, socket)
raise ServerError, exceptiondata
else:
raise ServerError, "unknown action: "+`action`
def certify(self, datastring, certificate, password):
# hook for subclassing
return certify(datastring, certificate, password)
def policy_SHUTDOWN(self, socket):
self.reply_success("attempting server shutdown", socket)
raise SHUTDOWN, "please shut down the server"
def policy_RESTART(self, socket):
self.reply_success("attempting server restart", socket)
raise RESTART, "please restart the server"
def policy_CHECKPOINT(self, socket):
self.reply_success("attempting server checkpoint", socket)
raise CHECKPOINT, "please checkpoint the server"
def policy_EXECUTE_PREPARED(self, name, dyn, socket):
try:
result = self.execute_named(name, dyn)
self.reply_success(result, socket)
except PreparedNameError, detail:
self.reply_exception(PreparedNameError,
"no such prepared statement: "+name,
socket)
def policy_EXECUTE_STATEMENT(self, stat, dyn, socket):
if not self.general_queries:
self.reply_exception(ServerError,
"general statements disallowed on this policy",
socket)
raise ServerError, "illegal statement attempt for: "+self.name
result = self.execute_any_statement(stat, dyn)
self.reply_success(result, socket)
def reply_exception(self, exc, info, socket):
# hook for subclassing
reply_exception(exc, info, socket)
def reply_success(self, data, socket):
# hook for subclassing
reply_success(data, socket)
if __name__=="__main__": main()
"""socket interactions for gadfly client and server"""
from select import select
# responses
SUCCESS = "SUCCESS"
EXCEPTION = "EXCEPTION"
def reply_exception(exception, info, socket):
"""send an exception back to the client"""
# any error is invisible to client
from gfserve import ServerError
import sys
try:
reply( (EXCEPTION, (exception, info)), socket)
except:
#info = "%s %s" % (sys.exc_type, sys.exc_value)
socket.close()
#raise ServerError, "reply_exception failed: "+`info`
def reply_success(data, socket):
"""report success with data back to client"""
reply( (SUCCESS, data), socket)
def reply(data, socket):
from marshal import dumps
marshaldata = dumps(data)
send_packet(socket, marshaldata)
socket.close()
def send_packet(socket, data):
"""blast out a length marked packet"""
send_len(data, socket)
socket.send(data)
def send_len(data, socket):
"""send length of data as cr terminated int rep"""
info = `len(data)`+"\n"
socket.send(info)
def send_certified_action(actor_name, action, arguments, password, socket):
from marshal import dumps
marshaldata = dumps( (action, arguments) )
cert = certificate(marshaldata, password)
#print actor_name, cert, marshaldata
marshaldata = dumps( (actor_name, cert, marshaldata) )
send_packet(socket, marshaldata)
def unpack_certified_data(data):
from marshal import loads
# sanity check
unpack = (actor_name, certificate, marshaldata) = loads(data)
return unpack
def recv_data(socket, timeout=10):
"""receive data or time out"""
from time import time
endtime = time() + timeout
reader = Packet_Reader(socket)
done = 0
while not done:
timeout = endtime - time()
if timeout<0:
raise IOError, "socket time out (1)"
(readable, dummy, error) = select([socket], [], [socket], timeout)
if error:
raise IOError, "socket in error state"
if not readable:
raise IOError, "socket time out (2)"
reader.poll()
done = (reader.mode==READY)
return reader.data
def interpret_response(data):
"""interpret response data, raise exception if needed"""
from marshal import loads
(indicator, data) = loads(data)
if indicator==SUCCESS:
return data
elif indicator==EXCEPTION:
# ???
raise EXCEPTION, data
else:
raise ValueError, "unknown indicator: "+`indicator`
# packet reader modes
LEN = "LEN"
DATA = "DATA"
READY = "READY"
ERROR = "ERROR"
BLOCK_SIZE = 4028
LEN_LIMIT = BLOCK_SIZE * 10
class Packet_Reader:
"""nonblocking pseudo-packet reader."""
# packets come in as decimal_len\ndata
# (note: cr! not crlf)
# kick too large requests if set
limit_len = LEN_LIMIT
def __init__(self, socket):
self.socket = socket
self.length = None
self.length_remaining = None
self.len_list = []
self.data_list = []
self.received = ""
self.data = None
self.mode = LEN
def __len__(self):
if self.mode is LEN:
raise ValueError, "still reading length"
return self.length
def get_data(self):
if self.mode is not READY:
raise ValueError, "still reading"
return self.data
def poll(self):
mode = self.mode
if mode is READY:
raise ValueError, "data is ready"
if mode is ERROR:
raise ValueError, "socket error previously detected"
socket = self.socket
(readable, dummy, error) = select([socket], [], [socket], 0)
if error:
self.socket.close()
self.mode = ERROR
raise ValueError, "socket is in error state"
if readable:
if mode is LEN:
self.read_len()
# note: do not fall thru automatically
elif mode is DATA:
self.read_data()
def read_len(self):
"""assume socket is readable now, read length"""
socket = self.socket
received = self.received
len_list = self.len_list
if not received:
# 10 bytes at a time until len is read.
received = socket.recv(10)
while received:
# consume, test one char
input = received[0]
received = received[1:]
if input == "\n":
# done reading length
from string import join, atoi
try:
length = self.length = atoi(join(len_list, ""))
except:
self.mode = ERROR
socket.close()
raise ValueError, "bad len string? "+`len_list`
self.received = received
self.length_remaining = length
self.mode = DATA
limit_len = self.limit_len
if limit_len and length>limit_len:
raise ValueError, "Length too big: "+`(length, limit_len)`
return
if len(len_list)>10:
self.mode = ERROR
socket.close()
raise ValueError, "len_list too long: "+`len_list`
len_list.append(input)
if not received:
(readable, dummy, error) = select(\
[socket], [], [socket], 0)
if error:
self.mode = ERROR
socket.close()
raise ValueError, "socket in error state"
if readable:
received = socket.recv(10)
# remember extra data received.
self.received = received
def read_data(self):
# assume socket is readable
socket = self.socket
received = self.received
length_remaining = self.length_remaining
data_list = self.data_list
if received:
data_list.append(received)
self.received = ""
length_remaining = length_remaining - len(received)
recv_len = max(length_remaining, BLOCK_SIZE)
received = socket.recv(recv_len)
if received:
data_list.append(received)
length_remaining = length_remaining - len(received)
if length_remaining<1:
self.mode = READY
from string import join
self.data = join(data_list, "")
self.length_remaining = length_remaining
def certificate(String, password):
"""generate a certificate for a string, using a password"""
from md5 import new
if not String:
raise ValueError, "cannot generate certificate for empty string"
taggedstring = password + String
return new(taggedstring).digest()
def certify(String, cert, password):
"""check a certificate for a string"""
return certificate(String, password) == cert
"""test script for gadfly client and server
Usage: This script interacts with the test database generated
by gftest.py. To start the server from the directory containing
the dbtest directory use:
python gfstest.py start
THIS WILL ONLY WORK IF YOU CREATED THE test DATABASE IN
DIRECTORY dbtest FIRST USING
python gftest.py dbtest
UNLESS YOU RUN THE SERVER IN THE BACKGROUND THE SERVER WILL
HOG THE WINDOW YOU STARTED IT IN AND YOU WILL HAVE TO USE ANOTHER
WINDOW UNTIL THE SERVER IS SHUT DOWN (SEE BELOW).
Then from *anywhere* (on the same machine) access the database
using
python gfstest.py restart
- restart the server (reread the database)
python gfstest.py checkpoint
- force checkpoint the server
python gfstest.py queries
- run some example queries and updates
python gfstest.py policy_test
- test the policies test and test1 created by the startup
function in this module.
python gfstest.py bogusshutdown
- attempt to shut down the server with a bogus password
[should generate an exception]
...and finally
python gfstest.py shutdown
- shut down the server for real.
As mentioned the startup function of this module illustrates
how to create a "startup" function for a server and initialize policy
objects with named, prepared queries.
"""
PORT = 2222
DB = "test"
DBDIR = "dbtest"
PW = "admin"
STARTUP = "gfstest"
import sys, socket
def main():
argv = sys.argv
command = argv[1]
if command=="start":
print "attempting to start the server"
from gfserve import Server
print "making a server on", PORT, DB, DBDIR, PW, STARTUP
S = Server(PORT, DB, DBDIR, PW, STARTUP)
print "initializing the server"
S.init()
print "starting the server", S.connection
S.start()
elif command=="shutdown":
dosimple("shutdown", PW)
elif command=="bogusshutdown":
print "BOGUS shutdown attempt"
dosimple("shutdown", "bad password")
elif command=="restart":
dosimple("restart", PW)
elif command=="checkpoint":
dosimple("checkpoint", PW)
elif command=="queries":
doqueries()
elif command=="policy_test":
policy_test()
else:
print "unknown command", command
print __doc__
def policy_test():
"""test the test1 and test policies"""
print "testing non-admin policies test and test1"
from gfclient import gfclient
import sys
machine = socket.gethostname()
conn = gfclient("test", PORT, "test", machine)
cursor = conn.cursor()
print "testing test policy: nan values before:"
cursor.execute_prepared("getnan")
for x in cursor.fetchall():
print x
print "updating nan"
cursor.execute_prepared("updatenan", ("pabst", 4))
print "nan after"
cursor.execute_prepared("getnan")
for x in cursor.fetchall():
print x
print "updating nan again"
cursor.execute_prepared("updatenan", ("rollingrock", 1))
print "trying an illegal update"
try:
cursor.execute("delete from frequents")
except:
print "exception", sys.exc_type, sys.exc_value
print "as expected"
else:
raise "DAMN!", "illegal query apparently completed!!!"
print; print "testing policy test1"; print
conn = gfclient("test1", PORT, "test1", machine)
cursor = conn.cursor()
print "getting norm"
cursor.execute_prepared("qlike", ("norm",))
print cursor.description
for x in cursor.fetchall():
print x
print "trying an illegal query again"
try:
cursor.execute("create table test(name varchar)")
except:
print "exception", sys.exc_type, sys.exc_value
print "as expected"
else:
raise "Damn!(2)", "illegal query apparently completed"
def startup(admin_policy, connection, Server_instance):
"""example startup script.
add a policies test and test1 passwords same
test1 is allowed to query the frequents table by name
test is allowed to update likes where drinker='nan'
also add prepared query dumpwork to admin_policy.
"""
from gfserve import Policy
admin_policy["dumpwork"] = "select * from work"
test1 = Policy("test1", "test1", connection, queries=0)
test = Policy("test", "test", connection, queries=0)
test1["qlike"] = "select * from likes where drinker=?"
test["updatenan"] = """
update likes
set beer=?, perday=?
where drinker='nan'
"""
test["getnan"] = """
select * from likes where drinker='nan'
"""
return {"test": test, "test1": test1}
def doqueries():
print "executing queries and updates"
from gfclient import gfclient
import sys
machine = socket.gethostname()
conn = gfclient("admin", PORT, PW, machine)
cursor = conn.cursor()
for q in admin_queries:
print;print;print q;print
try:
cursor.execute(q)
except:
print "exception in execute"
print sys.exc_type
v = sys.exc_value
from types import TupleType, ListType
if type(v) in (TupleType, ListType):
for x in v: print x
else:
print v
else:
print "executed"
#print q
print "description"
print cursor.description
print "results"
try:
r = cursor.fetchall()
if r is None:
print "no results"
else:
for x in r:
print x
except:
print "exception in results"
print sys.exc_type, sys.exc_value
print dir(cursor)
# try dumpwork
print; print; print "dumpwork"; print
cursor.execute_prepared("dumpwork")
for x in cursor.fetchall():
print x
# try dynamic parameters
stat = """
select distinct drinker
from likes l, serves s
where l.beer = s.beer and s.bar=?
"""
print; print stat; print "dynamic query ?=cheers"
cursor.execute(stat, ("cheers",))
for x in cursor.fetchall():
print x
admin_queries = [
"""select count(*) from work""",
"""select * from frequents""",
"""select count(*) from frequents""",
"""select count(drinker) from frequents""",
"""insert into frequents(drinker, bar, perweek)
values ('sally', 'cheers', 2)""",
"""select * from frequents""",
"""select syntax error from work""",
"""select drinker, count(bar) from frequents
group by drinker""",
]
def dosimple(command, pw):
print "attempting remote %s (%s) for server on local machine" % (command, pw)
from gfclient import gfclient
machine = socket.gethostname()
conn = gfclient("admin", PORT, pw, machine)
action = getattr(conn, command)
print action()
if __name__=="__main__":
main()
\ No newline at end of file
"""test script for gadfly
usage gftest.py <directory>
run in current directory creates a database in files
test.dfs LIKES.grl SERVES.grl FREQUENTS.grl
"""
def test(directory):
print "testing"
from gadfly import gadfly
connect = gadfly()
connect.startup("test", directory)
curs = connect.cursor()
print
print "TABLE CREATES"
for x in table_creates:
print x
curs.execute(x)
curs.execute("create table empty (nothing varchar)")
C = """
CREATE TABLE work (
name VARCHAR,
hours INTEGER,
rate FLOAT)
"""
print C
curs.execute(C)
print
C = """
CREATE TABLE accesses (
page VARCHAR,
hits INTEGER,
month INTEGER)
"""
print C
curs.execute(C)
print
print "INSERTS"
C = """
INSERT INTO work(name, hours, rate) VALUES (?, ?, ?)
"""
D = [
("sam", 30, 40.2),
("norm", 45, 10.2),
("woody", 80, 5.4),
("diane", 3, 4.4),
("rebecca", 120, 12.9),
("cliff", 26, 200.00),
("carla", 9, 3.5),
]
for x in D: print x
curs.execute(C, D)
C = """
INSERT INTO accesses(page, month, hits) VALUES (?, ?, ?)
"""
D = [
("index.html", 1, 2100),
("index.html", 2, 3300),
("index.html", 3, 1950),
("products.html", 1, 15),
("products.html", 2, 650),
("products.html", 3, 98),
("people.html", 1, 439),
("people.html", 2, 12),
("people.html", 3, 665),
]
for x in D: print x
curs.execute(C, D)
for (table, stuff) in dpairs:
ins = "insert into %s values (?, ?, ?)" % table
if table!="frequents":
for parameters in dataseq(stuff):
print "singleinsert", table, parameters
curs.execute(ins, parameters)
else:
print
print "multiinsert", table
parameters = dataseq(stuff)
for p in parameters:
print p
print "multiinsert..."
curs.execute(ins, parameters)
print;print
print
print "INDICES"
for ci in indices:
print ci
curs.execute(ci)
print
print "QUERIES"
for x in workqueries:
print;print
print x
curs.execute(x)
print curs.pp()
statement = """select name, hours
from work"""
curs.execute(statement)
print "Hours worked this week"
print
for (name,hours) in curs.fetchall():
print "worker", name, "worked", hours, "hours"
print
print "end of work report"
#return
for x in queries:
print; print
print x
curs.execute(x)
#for x in curs.commands:
# print x
all = curs.fetchall()
if not all:
print "empty!"
else:
print curs.pp()
#for t in all:
#print t
#return
print
print "DYNAMIC QUERIES"
for (x,y) in dynamic_queries:
print; print
print x
print "dynamic=", y
curs.execute(x, y)
#for x in curs.commands:
# print x
all = curs.fetchall()
if not all:
print "empty!"
else:
for t in all:
print t
print "repeat test"
from time import time
for x in repeats:
print "repeating", x
now = time()
curs.execute(x)
print time()-now, "first time"
now = time()
curs.execute(x)
print time()-now, "second time"
now = time()
curs.execute(x)
print time()-now, "third time"
print "*** committing work"
connect.commit()
connect.close()
print; print
return connect
table_creates = [
"create table frequents (drinker varchar, bar varchar, perweek integer)",
"create table likes (drinker varchar, beer varchar, perday integer)",
"create table serves (bar varchar, beer varchar, quantity integer)",
"""Create view nondrinkers(d, b)
as select drinker, bar
from frequents
where drinker not in
(select drinker from likes)""",
]
fdata = """\
adam lolas 1
woody cheers 5
sam cheers 5
norm cheers 3
wilt joes 2
norm joes 1
lola lolas 6
norm lolas 2
woody lolas 1
pierre frankies 0"""
sdata = """\
cheers bud 500
cheers samaddams 255
joes bud 217
joes samaddams 13
joes mickies 2222
lolas mickies 1515
lolas pabst 333
winkos rollingrock 432
frankies snafu 5"""
ldata = """\
adam bud 2
wilt rollingrock 1
sam bud 2
norm rollingrock 3
norm bud 2
nan sierranevada 1
woody pabst 2
lola mickies 5"""
dpairs = [
("frequents", fdata),
("serves", sdata),
("likes", ldata),
]
def dataseq(s):
from string import split
l = split(s, "\n")
result = map(split, l)
from string import atoi
for l in result:
l[2] = atoi(l[2])
result = map(tuple, result)
return result
indices = [
"""create index fd on frequents (drinker)""",
"""create index sbb on serves (beer, bar)""",
"""create index lb on likes (beer)""",
"""create index fb on frequents (bar)""",
]
repeats = [
"""-- drinkers bars and beers
-- where the drinker likes the beer
-- the bar serves the beer
-- and the drinker frequents the bar
select f.drinker, l.beer, s.bar
from frequents f, serves s, likes l
where f.drinker=l.drinker and s.bar=f.bar and s.beer=l.beer""",
"""select *
from frequents as f, serves as s
where f.bar = s.bar and
not exists(
select l.drinker, l.beer
from likes l
where l.drinker=f.drinker and s.beer=l.beer)""",
"""select * from frequents
where drinker = 'norm'""",
]
workqueries = [
"""-- everything from work
select * from work""",
"""select median(hours)
from work""",
"""select *
from work
where name='carla' -- just carla""",
"""select name, ' ain''t worth ', rate
from work -- the works table has more columns
where name='carla'""",
"""select name, -- name of worker
hours -- hours worked
from work""",
"""select name, hours*rate as pay
from work
order by name""",
"""select name, rate
from work
where rate>=20 and rate<=100""",
"""select name, rate
from work
where rate between 20 and 100""",
"""select name, rate
from work
where rate not between 20 and 100""",
"""select name, rate, hours, hours*rate as pay
from work""",
"""select name, rate, hours, hours*rate as pay
from work
where hours*rate>500 and (rate<100 or hours>5)""",
"""select name, rate, hours, hours*rate as pay
from work
where hours*rate>500 and rate<100 or hours>5""",
"""select avg(rate), min(hours), max(hours), sum(hours*rate) as expenses
from work""",
"""select * from accesses""",
"""select month, sum(hits) as totalhits
from accesses
where month<>1
group by month
order by 2""",
"""select month, sum(hits) as totalhits
from accesses
group by month
order by 2 desc""",
"""select month, sum(hits) as totalhits
from accesses
group by month
having sum(hits)<3000
order by 2 desc""",
"""select count(distinct month), count(distinct page)
from accesses""",
"""select month, hits, page
from accesses
order by month, hits desc""",
]
queries = [
"""select * from nondrinkers""",
"""select drinker as x from likes
union select beer as x from serves
union select drinker as x from frequents""",
"""select f.drinker, s.bar, l.beer
from frequents f, serves s, likes l
where f.drinker=l.drinker and s.beer=l.beer and s.bar=f.bar""",
"""select * from
likes where beer in ('bud', 'pabst')""",
"""select l.beer, l.drinker, count(distinct s.bar)
from likes l, serves s
where l.beer=s.beer
group by l.beer, l.drinker
order by 3 desc""",
"""select l.beer, l.drinker, count(distinct s.bar) as nbars
from likes l, serves s
where l.beer=s.beer
group by l.beer, l.drinker
union distinct
select beer, drinker, 0 as nbars
from likes
where beer not in (select beer from serves)
order by 3 desc""",
"""select avg(perweek) from frequents""",
"""select *
from frequents
where perweek <= (select avg(perweek) from frequents)""",
"""select *
from serves""",
"""select bar, avg(quantity)
from serves
group by bar""",
"""select *
from serves s1
where quantity <= (select avg(quantity)
from serves s2
where s1.bar=s2.bar)""",
"""select * from frequents
where perweek > (select avg(perweek) from frequents)""",
"""select * from frequents f1
where perweek > (
select avg(perweek) from frequents f2
where f1.drinker = f2.drinker)""",
"""select * from frequents
where perweek < any (select perweek from frequents)""",
"""select * from frequents
where perweek >= all (select perweek from frequents)""",
"""select * from frequents
where perweek <= all (select perweek from frequents)""",
"""select * from frequents f1
where perweek < any
(select perweek from frequents f2
where f1.drinker = f2.drinker)""",
"""select * from frequents f1
where perweek = all
(select perweek from frequents f2
where f1.drinker = f2.drinker)""",
"""select * from frequents f1
where perweek <> all
(select perweek from frequents f2
where f1.drinker <> f2.drinker)""",
"""select beer
from serves
where beer = any (select beer from likes)""",
"""select beer
from serves
where beer <> all (select beer from likes)""",
"""select beer
from serves
where beer in (select beer from likes)""",
"""select beer
from serves
where beer not in (select beer from likes)""",
#"""select f1.drinker, f2.drinker
# from frequents f1, frequents f2
# where f1.drinker<>f2.drinker""",
#"""select *
# from frequents f1
# where not exists
# (select f2.drinker
# from frequents f2
# where f1.drinker<>f2.drinker and f1.bar=f2.bar)""",
"""select *
from frequents
where perweek between 2 and
(select avg(perweek) from frequents)""",
"""select *
from frequents
where perweek not between 2 and 5""",
# "stop",
"""select f.drinker, l.beer, s.bar
from frequents f, serves s, likes l
where f.drinker=l.drinker and s.bar=f.bar and s.beer=l.beer""",
#"stop!",
"""select * from serves""",
"""select * from likes""",
"""select * from frequents
where drinker = 'norm'""",
"""select drinker from likes
union
select drinker from frequents""",
"""select drinker from likes
union distinct
select drinker from frequents""",
"""select drinker from likes
except
select drinker from frequents""",
"""select drinker from likes
intersect
select drinker from frequents""",
# "stop!",
"""select * from frequents
where drinker>'norm'""",
"""select * from frequents
where drinker<='norm'""",
"""select * from frequents
where drinker>'norm' or drinker<'b'""",
"""select * from frequents
where drinker<>'norm' and 'pierre'<>drinker""",
"""select * from frequents
where drinker<>'norm'""",
"""select (drinker+' ')*2+bar
from frequents
where drinker>bar""",
"""select *
from frequents as f, serves as s
where f.bar = s.bar""",
"""select *
from frequents as f, serves as s
where f.bar = s.bar and
not exists(
select l.drinker, l.beer
from likes l
where l.drinker=f.drinker and s.beer=l.beer)""",
"""select *
from likes l, frequents f
where f.bar='cheers' and l.drinker=f.drinker and l.beer='bud'""",
"""select *
from serves s
where not exists (
select *
from likes l, frequents f
where f.bar = s.bar and f.drinker=l.drinker and s.beer=l.beer)""",
"""select 'nonbeer drinker '+f.drinker
from frequents f
where not exists
(select l.drinker, l.beer from likes l where l.drinker=f.drinker)""",
"""select l.drinker+' likes '+l.beer+' but goes to no bar'
from likes l
where not exists (select f.drinker from frequents f where f.drinker=l.drinker)""",
"""select bar from frequents""",
"""select distinct bar from frequents""",
"""select sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves""",
"""select beer, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
group by beer""",
"""select sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
where beer<>'bud'
""",
"""select bar, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
where beer<>'bud'
group by bar
having sum(quantity)>500 or count(*)>3
order by 2 desc
""",
"""select beer, sum(quantity), avg(quantity), count(*)
from serves
where beer<>'bud'
group by beer
having sum(quantity)>100
order by 4 desc, beer
""",
"""select l.drinker, l.beer, count(*), sum(l.perday*f.perweek)
from likes l, frequents f
where l.drinker=f.drinker
group by l.drinker, l.beer
order by 4 desc, l.drinker, l.beer
""",
"""select l.drinker, l.beer, f.bar, l.perday, f.perweek
from likes l, frequents f
where l.drinker=f.drinker
order by l.drinker, l.perday desc, f.perweek desc
""",
]
dynamic_queries = [
( "select bar from frequents where drinker=?", ("norm",) ),
( "select * from frequents where drinker=? or bar=?", ("norm", "cheers") )
]
updates = [
"""select * from frequents""",
"""select * from likes""",
"""select * from serves""",
"""select bar, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity)
from serves
where beer<>'bud'
group by bar
having sum(quantity)>500 or count(*)>3
order by 2 desc
""",
"""select count(*), d from nondrinkers group by d""",
"""insert into frequents (drinker, perweek, bar)
values ('billybob', 4, 'cheers')""",
"""select * from nondrinkers""",
"""create table templikes (dr varchar, be varchar)""",
"""select * from templikes""",
"""insert into templikes(dr, be)
select drinker, beer from likes""",
"""create index tdindex on templikes(dr)""",
"""create index tbindex on templikes(be)""",
"""select * from templikes""",
"""delete from templikes where be='rollingrock' """,
"""select * from templikes""",
"""update templikes set dr='norman' where dr='norm' """,
"""drop index tdindex""",
"""delete from templikes
where dr=(select min(dr) from templikes)""",
"""insert into templikes (dr, be)
select max(dr), min(be) from templikes""",
"""select * from templikes""",
"""select * from frequents""",
"""update frequents
set perweek=(select max(perweek)
from frequents
where drinker='norm')
where drinker='woody'""",
"""select * from frequents""",
"""create view lazy as
select drinker, sum(perweek) as wasted
from frequents
group by drinker
having sum(perweek)>4
order by drinker""",
"""select * from lazy""",
"""drop view lazy""",
"""drop table templikes""",
]
trace_updates = [
"""drop index tdindex""",
]
rollback_queries = [
"""select * from likes""",
"""select * from frequents""",
"""select * from nondrinkers""",
"""select * from alldrinkers""",
"""select * from dummy""",
]
rollback_updates = [
"""create table dummy (nothing varchar)""",
"""insert into frequents(drinker, bar, perweek)
values ('nobody', 'nobar', 0)""",
"""insert into likes(drinker, beer, perday)
values ('wally', 'nobar', 0)""",
"""drop view alldrinkers""",
]
keep_updates = [
"""insert into frequents(drinker, bar, perweek)
values ('peter', 'pans', 1)""",
"""create view alldrinkers as
select drinker from frequents
union
select drinker from likes""",
]
def rollbacktest(directory):
print "*" * 30
print "*** recovery test ***"
print; print; print
import sys
from gadfly import gadfly
print "*** connecting"
connect = gadfly("test", directory)
cursor = connect.cursor()
connect.autocheckpoint = 0
print "*** executing updates to commit"
for x in keep_updates:
print x
cursor.execute(x)
connect.verbose=1
print "*** COMMITTING OPERATIONS (connection set to verbose)"
connect.commit()
print "*** DUMP LOG"
connect.dumplog()
print; print "*** RUNNING OPS TO ROLL BACK"
preresults = []
for s in rollback_queries:
print; print; print s
try:
cursor.execute(s)
preresults.append(cursor.fetchall())
print cursor.pp()
except:
d = sys.exc_type
print "exception", d
preresults.append(d)
print; print "*** now updating with ops to rollback"
for s in rollback_updates:
print; print; print s
cursor.execute(s)
print; print; print "*** testing noncommitted results"
for dummy in (1,2):
postresults = []
for s in rollback_queries:
print s
try:
cursor.execute(s)
postresults.append(cursor.fetchall())
print cursor.pp()
except:
d = sys.exc_type
print "*** exception", d
postresults.append(d)
if preresults==postresults:
print "*** same results as before uncommitted updates"
else:
print "*** differing results from before uncommitted updates"
if dummy==1:
print; print "*** ROLLING BACK!"
connect.rollback()
print; print "*** EMULATING RECOVERY"
for s in rollback_updates:
print; print; print s
cursor.execute(s)
for dummy in (1,2):
postresults = []
for s in rollback_queries:
print s
try:
cursor.execute(s)
postresults.append(cursor.fetchall())
print cursor.pp()
except:
d = sys.exc_type
print "*** exception", d
postresults.append(d)
if preresults==postresults:
print "*** same results as before uncommitted updates"
else:
print "*** differing results from before uncommitted updates"
if dummy==1:
print "*** RESTART: DUMPLOG"
connect.dumplog()
print "*** RESTARTING (RECOVER FROM LOG, DISCARD UNCOMMITTED)"
connect.restart()
def retest(directory):
print "*" * 30
print "*** reconnect test"
from gadfly import gadfly
connect = gadfly("test", directory)
cursor = connect.cursor()
for s in updates:
print; print
print s
if s in trace_updates:
cursor.EVAL_DUMP = 1
cursor.execute(s)
cursor.EVAL_DUMP = 0
print cursor.pp()
#print; print "CONNECTION DATA BEFORE COMMIT"
#connect.DUMP_ALL()
connect.commit()
#print; print "CONNECTION DATA AFTER COMMIT"
#connect.DUMP_ALL()
connect.close()
return connect
if __name__=="__main__":
import sys
argv = sys.argv
if len(argv)<2:
print "USAGE: python <thismodule> <db_directory>"
print " please provide a directory for test database!"
else:
directory = argv[1]
test(directory)
rollbacktest(directory)
retest(directory)
\ No newline at end of file
# idl grammar
#
# Note, this grammar requires a special hack at the lexical
# level in order to parse the fragment
#
# ...
# case abc::def: jjj::www: whatever...
#
# (Yuck!)
# Some would argue this is a language design flaw, but whatever...
# It causes a shift/reduce problem without special handling for ::
# below coloncolon is a 'fake' keyword that parses as two colons.
idlgramstring = """
specification ::
## 1
@R r1a :: specification >> definition speclist
@R r1b :: speclist >> specification
@R r1c :: speclist >>
## 2 punct ;
@R r2a :: definition >> type_dcl ;
@R r2b :: definition >> const_dcl ;
@R r2c :: definition >> except_dcl ;
@R r2d :: definition >> interface_nt ;
@R r2e :: definition >> module_nt ;
## 3 identifier=term, module=kw puncts {}
@R r3 :: module_nt >> module identifier { specification }
## 4
@R r4a :: interface_nt >> interface_dcl
@R r4b :: interface_nt >> forward_dcl
## 5
@R r5 :: interface_dcl >> interface_header { interface_body }
## 6 interface=kw
@R r6 :: forward_dcl >> interface identifier
## 7 puncts []
@R r7 :: interface_header >> interface identifier [ inheritance_spec ]
## 8
@R r8a :: interface_body >>
@R r8b :: interface_body >> export interface_body
## 9
@R r9a :: export >> type_dcl
@R r9b :: export >> const_dcl
@R r9c :: export >> except_dcl
@R r9d :: export >> attr_dcl
@R r9e :: export >> op_dcl
## 10 punct ,:
@R r10a :: inheritance_spec >> : scoped_name_list
@R r10b :: scoped_name_list >> scoped_name
@R r10c :: scoped_name_list >> scoped_name_list , scoped_name
## 11
@R r11a :: scoped_name >> identifier
@R r11b :: scoped_name >> colon_colon identifier
@R r11d :: scoped_name >> scoped_name coloncolon identifier
## 12 const=kw punct =
@R r12 :: const_dcl >> const const_type identifier = const_expr
## 13
@R r13a :: const_type >> integer_type
@R r13b :: const_type >> char_type
@R r13c :: const_type >> boolean_type
@R r13d :: const_type >> floating_type
@R r13e :: const_type >> string_type
@R r13f :: const_type >> scoped_name
## 14
@R r14 :: const_expr >> or_expr
##15 punct |
@R r15a :: or_expr >> xor_expr
@R r15b :: or_expr >> or_expr | xor_expr
##16 punct ^
@R r16a :: xor_expr >> and_expr
@R r16b :: xor_expr >> xor_expr ^ and_expr
##17 punct &
@R r17a :: and_expr >> shift_expr
@R r17b :: and_expr >> and_expr & shift_expr
##18 punct > <
@R r18a :: shift_expr >> add_expr
@R r18b :: shift_expr >> shift_expr > > add_expr
@R r18c :: shift_expr >> shift_expr < < add_expr
##19 punct +-
@R r19a :: add_expr >> mult_expr
@R r19b :: add_expr >> add_expr + mult_expr
@R r19c :: add_expr >> add_expr - mult_expr
##20 punct */%
@R r20a :: mult_expr >> unary_expr
@R r20b :: mult_expr >> mult_expr * unary_expr
@R r20c :: mult_expr >> mult_expr / unary_expr
@R r20d :: mult_expr >> mult_expr % unary_expr
##21
@R r21a :: unary_expr >> unary_operator primary_expr
@R r21b :: unary_expr >> primary_expr
##22
@R r22a :: unary_operator >> -
@R r22b :: unary_operator >> +
@R r22c :: unary_operator >> ~
##23 punct ()
@R r23a :: primary_expr >> scoped_name
@R r23b :: primary_expr >> literal
@R r23c :: primary_expr >> ( const_expr )
##24 terms = *_literal (?) except boolean
@R r24a :: literal >> integer_literal
@R r24b :: literal >> string_literal
@R r24c :: literal >> character_literal
@R r24d :: literal >> floating_pt_literal
@R r24e :: literal >> boolean_literal
##25 kw TRUE FALSE
@R r25a :: boolean_literal >> TRUE
@R r25b :: boolean_literal >> FALSE
##26
@R r26 :: positive_int_literal >> const_expr
##27 kw typedef
@R r27a :: type_dcl >> typedef type_declarator
@R r27b :: type_dcl >> struct_type
@R r27c :: type_dcl >> union_type
@R r27d :: type_dcl >> enum_type
##28
@R r28 :: type_declarator >> type_spec declarators
##29
@R r29a :: type_spec >> simple_type_spec
@R r29b :: type_spec >> constr_type_spec
##30
@R r30a :: simple_type_spec >> base_type_spec
@R r30b :: simple_type_spec >> template_type_spec
@R r30c :: simple_type_spec >> scoped_name
##31
@R r31a :: base_type_spec >> floating_pt_type
@R r31b :: base_type_spec >> integer_type
@R r31c :: base_type_spec >> char_type
@R r31d :: base_type_spec >> boolean_type
@R r31e :: base_type_spec >> octet_type
@R r31f :: base_type_spec >> any_type
## 32
@R r32a :: template_type_spec >> sequence_type
@R r32b :: template_type_spec >> string_type
##33
@R r33a :: constr_type_spec >> struct_type
@R r33b :: constr_type_spec >> union_type
@R r33c :: constr_type_spec >> enum_type
##34
@R r34a :: declarators >> declarator
@R r34b :: declarators >> declarators , declarator
##35
@R r35a :: declarator >> simple_declarator
@R r35b :: declarator >> complex_declarator
##36
@R r36 :: simple_declarator >> identifier
##37
@R r37 :: complex_declarator >> array_declarator
##38 kw float double
@R r38a :: floating_pt_type >> float
@R r38b :: floating_pt_type >> double
##39
@R r39a :: integer_type >> signed_int
@R r39b :: integer_type >> unsigned_int
##40
@R r40 :: signed_int >> signed_long_int
@R r40 :: signed_int >> signed_short_int
##41 kw long
@R r41 :: signed_long_int >> long
##42 kw short
@R r42 :: signed_short_int >> short
##43
@R r43 :: unsigned_int >> unsigned_long_int
@R r43 :: unsigned_int >> unsigned_short_int
##44 kw unsigned
@R r44 :: unsigned_long_int >> unsigned long
##45
@R r45 :: unsigned_short_int >> unsigned short
##46 kw char
@R r46 :: char_type >> char
##47 kw boolean
@R r47 :: boolean_type >> boolean
##48 kw octet
@R r48 :: octet_type >> octet
##49 kw any
@R r49 :: any_type >> any
##50 kw struct
@R r50 :: struct_type >> struct identifier { member_list }
##51
@R r51a :: member_list >> member
@R r51b :: member_list >> member_list member
##52
@R r52 :: member >> type_spec declarators ;
##53 kw union switch
@R r53 :: union_type >>
union identifier switch ( switch_type_spec ) { switch_body }
##54
@R r54a :: switch_type_spec >> integer_type
@R r54b :: switch_type_spec >> char_type
@R r54c :: switch_type_spec >> boolean_type
@R r54d :: switch_type_spec >> enum_type
@R r54e :: switch_type_spec >> scoped_name
##55
@R r55a :: switch_body >> case_nt
@R r55b :: switch_body >> switch_body case_nt
##56
@R r56a :: case_nt >> case_labels element_spec ;
@R r56b :: case_labels >> case_label
@R r56c :: case_labels >> case_labels case_label
##57 kw default case
@R r57a :: case_label >> case const_expr :
@R r57b :: case_label >> default :
##58
@R r58 :: element_spec >> type_spec declarator
##59 kw enum
@R r59a :: enum_type >> enum identifier { enumerators }
@R r59b :: enumerators >> enumerator
@R r59c :: enumerators >> enumerators , enumerator
##60
@R r60 :: enumerator >> identifier
##61 kw sequence
@R r61 :: sequence_type >> sequence < simple_type_spec , positive_int_const >
##62 kw string
@R r62a :: string_type >> string < positive_int_const >
@R r62b :: string_type >> string
##63
@R r63a :: array_declarator >> identifier fixed_array_sizes
@R r63b :: fixed_array_sizes >> fixed_array_size
@R r63c :: fixed_array_sizes >> fixed_array_sizes fixed_array_size
##64
@R r64 :: fixed_array_size >> [ positive_int_const ]
##65 kw attribute readonly
@R r65a :: attr_dcl >> maybe_readonly attribute param_type_spec simple_declarators
@R r65b :: maybe_readonly >> readonly
@R r65c :: maybe_readonly >>
@R r65d :: simple_declarators >> simple_declarator
@R r65e :: simple_declarators >> simple_declarators , simple_declarator
##66 kw exception
@R r66a :: except_dcl >> exception identifier { members }
@R r66b :: members >>
@R r66c :: members >> member_list
##67
@R r67a :: op_dcl >>
maybe_op_attribute op_type_spec identifier parameter_dcls
maybe_raises_expr maybe_context_expr
@R r67b :: maybe_op_attribute >>
@R r67c :: maybe_op_attribute >> op_attribute
@R r67d :: maybe_raises_expr >>
@R r67e :: maybe_raises_expr >> raises_expr
@R r67f :: maybe_context_expr >>
@R r67g :: maybe_context_expr >> context_expr
##68 kw oneway
@R r68a :: op_attribute >> oneway
##69 kw void
@R r69a :: op_type_spec >> param_type_spec
@R r69b :: op_type_spec >> void
##70
@R r70a :: parameter_dcls >> ( parameterlist )
@R r70b :: parameter_dcls >> ( )
@R r70c :: parameterlist >> param_dcl
@R r70d :: parameterlist >> parameterlist , param_dcl
##71
@R r71 :: param_dcl >> param_attribute param_type_spec simple_declarator
##72 kw in out inout
@R r72 :: param_attribute >> in
@R r72 :: param_attribute >> out
@R r72 :: param_attribute >> inout
##73 kw raises
@R r73 :: raises_expr >> raises ( scoped_name_list )
##74 kw context
@R r74 :: context_expr >> context ( string_literal_list )
@R r74b :: string_literal_list >> string_literal
@R r74c :: string_literal_list >> string_literal_list , string_literal
@R r75 :: param_type_spec >> base_type_spec
@R r75 :: param_type_spec >> string_type
@R r75 :: param_type_spec >> scoped_name
"""
nonterms = """
colon_colon
param_attribute
unsigned_long_int unsigned_short_int param_dcl
parameterlist string_literal_list
members maybe_op_attribute maybe_raises_expr maybe_context_expr
op_type_spec parameter_dcls op_attribute raises_expr context_expr
maybe_readonly param_type_spec simple_declarators simple_declarator
fixed_array_sizes fixed_array_size
element_spec enumerator enumerators
switch_type_spec switch_body case_nt case_labels case_label
member_list member
signed_int unsigned_int signed_long_int signed_short_int
simple_declarator complex_declarator array_declarator
declarator
sequence_type string_type
floating_pt_type integer_type char_type boolean_type
octet_type any_type
base_type_spec template_type_spec
simple_type_spec constr_type_spec
type_spec declarators
type_declarator struct_type union_type enum_type
literal boolean_literal positive_int_literal
mult_expr unary_expr unary_operator primary_expr
or_expr xor_expr and_expr shift_expr add_expr
integer_type char_type boolean_type floating_type string_type
const_type const_expr
scoped_name_list scoped_name
attr_dcl op_dcl
inheritance_spec export
interface_header interface_body
interface_dcl forward_dcl
type_dcl const_dcl except_dcl interface_nt module_nt
specification definition speclist
"""
keywords = """
exception oneway void in out inout raises context
interface module const TRUE FALSE typedef float double long
unsigned short char boolean octet any struct union switch
enum string attribute readonly default case sequence ::
"""
# NOTE: FOR NECESSARY HACKERY REASONS :: IS A KEYWORD!
punctuations = ";{}()[],:|^&<>+-*/%~="
# dummy regexen
identifierre = "identifier"
integer_literalre = "123"
positive_int_constre = "999"
string_literalre = "'string'"
character_literalre= "'c'"
floating_pt_literalre = "1.23"
# dummy interp fun for all terminals
def echo (str):
return str
def DeclareTerminals(Grammar):
Grammar.Addterm("identifier", identifierre, echo)
Grammar.Addterm("integer_literal", integer_literalre, echo)
Grammar.Addterm("string_literal", string_literalre, echo)
Grammar.Addterm("character_literal", character_literalre, echo)
Grammar.Addterm("floating_pt_literal", floating_pt_literalre, echo)
Grammar.Addterm("positive_int_const", positive_int_constre, echo)
## we need to override LexDictionary to recognize :: as a SINGLE punctuation.
## (not possible using standard kjParsing, requires a special override)
import kjParser
class myLexDictionary(kjParser.LexDictionary):
def __init__(self):
kjParser.LexDictionary.__init__(self)
map = ((kjParser.KEYFLAG, "coloncolon"), "coloncolon")
self.keywordmap["::"] = map
self.keywordmap["coloncolon"] = map
def Token(self, String, StartPosition):
if String[StartPosition:StartPosition+2] == "::":
tok = self.keywordmap["::"]
return (tok, 2)
# default:
return kjParseBuild.LexDictionary.Token(self, String, StartPosition)
# default bind all rules
def GrammarBuild():
import kjParseBuild
idl = kjParseBuild.NullCGrammar()
idl.LexD = myLexDictionary()
#idl.SetCaseSensitivity(0) # grammar is not case sensitive for keywords
DeclareTerminals(idl)
idl.Keywords(keywords)
idl.punct(punctuations)
idl.Nonterms(nonterms)
#idl.comments([LISPCOMMENTREGEX])
idl.Declarerules(idlgramstring)
print "now compiling"
idl.Compile()
return idl
if __name__=="__main__": GrammarBuild()
\ No newline at end of file
<HTML>
<HEAD>
<TITLE>
Gadfly/kwParsing directory
</TITLE>
</HEAD>
<BODY bgcolor="#3399ff">
<center>
<h1>Gadfly! An SQL Database in Python</h1>
<p>
<table BORDER bgcolor="#00ffff">
<tr>
<td>
<a href="gadfly.py">gadfly.py</a> main module<br>
<a href="gfserve.py">gfserve.py</a> main TCP/IP server mode<br>
<a href="gfclient.py">gfclient.py</a> main TCP/IP client mode<br>
<a href="gfsocket.py">gsocket.py</a> client/server support module<br>
<a href="gfstest.py">gstest.py</a> client/server test module<br>
<a href="gfinstall.py">gfinstall.py</a> installation script<br>
<a href="gftest.py">gftest.py</a> test suite<br>
<a href="sqlbind.py">sqlbind.py</a> grammar bindings<br>
<a href="sqlgen.py">sqlgen.py</a> grammar generation<br>
<a href="sqlgram.py">sqlgram.py</a> SQL grammar<br>
<a href="sqlgtest.py">sqlgtest.py</a> grammar tests<br>
<a href="sqlsem.py">sqlsem.py</a> main semantic objects<br>
<a href="sqlmod.py">sqlmod.py</a> update semantic objects<br>
<a href="gfdb0.py">gfdb0.py</a> data and file archiving<br>
<a href="relalg.py">relalg.py</a> relational algebra interpreter (toy)<br>
<a href="kjbuckets0.py">kjbuckets0.py</a> base data structures
(python version)<br>
</td>
<td>
These are the core files to the
<a href="gadfly.html">
Gadfly SQL database engine
</a>
(beta). A relational database query engine that supports
the Structured Query Language (SQL), implemented entirely
in Python (with optional builtin support from the
<a href="http://www.pythonpros.com/arw/kjbuckets">
kjbuckets builtin data structure accelerator</a>).
<hr>
Gadfly has been tested on Windows 95, Windows NT, Linux, and Unix
(solaris),
and it should run anywhere that Python runs (bebox,
maybe Palm Pilot/WinCE eventually...)
<hr>
Supports <a href="gfrecover.html">transactions and recovery</a>
<hr>
Supports <a href="gfSQL.html">a LARGE subset of SQL</a>
<hr>
Supports <a href="server.html">client/server access
via TCP/IP sockets.</a>
<hr>
Requires <a href="http://www.pythonpros.com/arw/kwParsing/kwP.tar.gz">
the kwParsing package</a>
(and included in it too).
</td>
</tr>
</table>
<h2>kwParser, Python lint, IDL, etc...</h2>
kwParser is a parser generator for Python. It transforms an
abstract specification of a language grammar (for example the
CORBA Interface Definition Language) together with "interpretation
functions" that define the semantics of the language into a
compiler or translator or interpreter. In the case of CORBA IDL
a python program using kwParser could generate stubs and support
code (in Python or some other language) to talk to a CORBA interface.
<p>
The release given here has had some micro-optimizations (26 June 1997)
which with luck don't break anything. In particular regexen are
used more intelligently and the generation phase will use
<a href="../kjbuckets">kjbuckets</a>
if it's available.
</p>
This is moderately heavy computer science. Not for the timid.
<p>
<TABLE BORDER bgcolor="#00ffff">
<TR>
<TD><a href="COPYRIGHT">COPYRIGHT</a>
</TD>
<td>Do what you like, just don't sue me (roughly translated).
</td>
</TR>
<TR>
<TD><a href="DLispShort.py">DLispShort.py</a>
</TD>
<td>A very simple example language specification.
</td>
</TR>
<TR>
<TD><a href="DumbLispGen.py">DumbLispGen.py</a>
</TD>
<td>Another example specification
</td>
</TR>
<TR>
<TD><a href="arefize.py">arefize.py</a>
</TD>
<td>(not related: simple program used to generate this page)
</td>
</TR>
<TR>
<TD><a href="idl.py">idl.py</a>
</TD>
<td>A very complex example: CORBA IDL parser generator.
This is a good example of a complex grammar. The
interpretation functions and terminal regexes are
all stubbed.
</td>
</TR>
<TR>
<TD><a href="pygram.py">pygram.py</a>
</TD>
<td>A very complex example: The python grammar.
This module uses a hand written lexer to handle
Python's beautiful peculiarities. Used by kypylint.py.<br>
Look at the top of the module for editable parameters.
</td>
</TR>
<TR>
<TD><a href="kjpylint.py">kjpylint.py</a>
</TD>
<td>An attempt to use the pygram parser to do simple
checking on python source files. Reports references
not set, assignments not used, etcetera. not all warnings
indicate real problems of course. Used like this:
<pre>
% python kjpylint.py /home/app/arw/Python-1.5a1/Lib/SocketServer.py
setup
loading
now parsing
(verify_request) 'request' defined before 225 not used
(verify_request) 'self' defined before 225 not used
(verify_request) 'client_address' defined before 225 not used
(handle_error) 'request' defined before 245 not used
(handle_error) 'self' defined before 245 not used
(collect_children) 'status' defined before 293 not used
(setup) 'self' defined before 368 not used
(__del__) 'self' defined before 371 not used
(handle) 'self' defined before 374 not used
(finish) 'self' defined before 377 not used
(<module global>) '__version__' defined before 104 not used
269: (qref) 'max_packet_size' not defined in module?
</pre>
Here only the last line indicates a possible real bug
in SocketServer.py
<hr>
<em>Bugs/Features: <br>
Barfs on inconsistent indentation (ie space-tab is not the same as tab).
<br>
Grumpy about one line for loops and lambdas.<br>
For very weird cases may not parse strings correctly.<br>
See top of module for more info.<br>
Right now kjpylint might like to see an extra newline
at the end of the file. I'll look into this, sorry.</em>
<br>
Latest: Thu Jul 17 13:50:03 EDT 1997
</td>
</TR>
<TR>
<TD><a href="kjParseBuild.py">kjParseBuild.py</a>
</TD>
<td>The Parser generator module, used only to generate
the data structures required for parsing.
</td>
</TR>
<TR>
<TD><a href="kjParser.py">kjParser.py</a>
</TD>
<td>The Parser module, used both during the generation
phase and also after generation when the generated
parser is used.
</td>
</TR>
<TR>
<TD><a href="kwParsing.html">kwParsing.html</a>
</TD>
<td>HTML documentation for the package.
</td>
</TR>
<TR>
<TD><a href="kjSet.py">kjSet.py</a>
</TD>
<td>support module for parser generation.
(uses <a href="../kjbuckets">kjbuckets</a>
builtin if available, or uses a straight
Python implementation if not.)
</td>
</TR>
<TR>
<TD><a href="kwP.tar.gz">kwP.tar.gz</a>
</TD>
<td>The package (tarred, gzipped)
</td>
</TR>
<CAPTION ALIGN="top">kwParsing parser generator related files.
</CAPTION>
</TABLE>
<TABLE BGCOLOR="#FFFFFF">
<TR>
<TD><a href="../index.html">
<em>humble servant</em></a></TD>
</TR>
</TABLE><br>
Not speaking for <a href="http://www.pythonpros.com">the "pros".</a>
</center>
</BODY>
</HTML>
#
# python code for building a parser from a grammar
# Copyright Aaron Robert Watters, 1994
#
# BUGS:
# A bad grammar that has no derivations for
# the root nonterminal may cause a name error
# on the variable "GoodStartingPlace"
# this needs to be modified so the RULEGRAM is loaded from a
# compiled representation if available.
import string
import kjSet
import kjParser
import regex
# import some constants
from kjParser import \
TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, TRANSFLAG, KEYFLAG, \
NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
PMODULE = kjParser.THISMODULE
# errors raised here
TokenError = "TokenError" # may happen on autogen with bad grammar
NotSLRError = "NotSLRError" # may happen for nonSLR grammar
# set this flag for regression testing at each load
RUNTESTS = 0
# set this flag to abort automatic generation on Errors
ABORTONERROR = 0
# token used to mark null productions
NULLTOKEN = (None,None)
# a derived FSM class, with closure computation methods defined
# (compilable FSMachine)
#
class CFSMachine(kjParser.FSMachine):
def __init__(self, nonterm):
kjParser.FSMachine.__init__(self, nonterm)
# return the epsilon closure of the FSM as a new FSM
#
# DoNullMap, if set, will map unexpected tokens to
# the "empty" state (usually creating a really big fsm)
#
def Eclosure(self, Epsilon, DoNullMaps=0):
Closure = CFSMachine( self.root_nonTerminal )
# compute the Epsilon Graph between states
EGraph = kjSet.NewDG([])
for State in range(0,self.maxState+1):
# every state is E-connected to self
kjSet.AddArc( EGraph, State, State )
# add possible transition on epsilon (ONLY ONE SUPPORTED!)
key = (State, Epsilon)
if self.StateTokenMap.has_key(key):
keymap = self.StateTokenMap[key]
if keymap[0][0] != MOVETOFLAG:
raise TypeError, "unexpected map type in StateTokenMap"
for (Flag,ToState) in keymap:
kjSet.AddArc( EGraph, State, ToState )
#endfor
# transitively close EGraph
kjSet.TransClose( EGraph )
# Translate EGraph into a dictionary of lists
EMap = {}
for State in range(0,self.maxState+1):
EMap[State] = kjSet.Neighbors( EGraph, State )
# make each e-closure of each self.state a state of the closure FSM.
# here closure states assumed transient -- reset elsewhere.
# first do the initial state
Closure.States[ Closure.initial_state ] = \
[TRANSFLAG, kjSet.NewSet(EMap[self.initial_state]) ]
# do all other states (save initial and successful final states)
#for State in range(0,self.maxState+1):
# if State != self.initial_state \
# and State != self.successful_final_state:
# Closure.NewSetState(TRANSFLAG, kjSet.NewSet(EMap[State]) )
##endfor
# compute set of all known tokens EXCEPT EPSILON
Tokens = kjSet.NewSet( [] )
for (State, Token) in self.StateTokenMap.keys():
if Token != Epsilon:
kjSet.addMember(Token, Tokens)
# tranform it into a list
Tokens = kjSet.get_elts(Tokens)
# for each state of the the closure FSM (past final) add transitions
# and add new states as needed until all states are processed
# (uses convention that states are allocated sequentially)
ThisClosureState = 1
while ThisClosureState <= Closure.maxState:
MemberStates = kjSet.get_elts(Closure.States[ThisClosureState][1])
# for each possible Token, compute the union UTrans of all
# e-closures for all transitions for all member states,
# on the Token, make UTrans a new state (if needed),
# and transition ThisClosureState to UTrans on Token
for Token in Tokens:
UTrans = kjSet.NewSet( [] )
for MState in MemberStates:
# if MState has a transition on Token, include
# EMap for the destination state
key = (MState, Token)
if self.StateTokenMap.has_key(key):
DStateTup = self.StateTokenMap[key]
if DStateTup[0][0] != MOVETOFLAG:
raise TypeError, "unknown map type"
for (DFlag, DState) in DStateTup:
for EDState in EMap[DState]:
kjSet.addMember(EDState, UTrans)
#endif
#endfor MState
# register UTrans as a new state if needed
UTState = Closure.NewSetState(TRANSFLAG, UTrans)
# record transition from
# ThisClosureState to UTState on Token
if DoNullMaps:
Closure.SetMap( ThisClosureState, Token, UTState)
else:
if not kjSet.Empty(UTrans):
Closure.SetMap( ThisClosureState, Token, UTState)
#endfor Token
ThisClosureState = ThisClosureState +1
#endwhile
return Closure
#enddef Eclosure
# add an set-marked state to self if not present
# uses self.States[s][1] as the set marking the state s
#
# only used by Eclosure above
#
def NewSetState(self, kind, InSet):
# return existing state if one is present that matches the set
LastState= self.maxState
# skip state 0 (successful final state)???
for State in range(1,LastState+1):
MarkSet = self.States[State][1]
if kjSet.Same(InSet,MarkSet):
return State # nonlocal
#endfor
# if not exited then allocate a new state
LastState = LastState + 1
self.States[LastState] = [ kind , InSet ]
self.maxState = LastState
return LastState
#enddef newSetState
#endclass CFSMachine
# Ruleset class, used to compute NFA and then DFA for
# parsing based on a list of rules.
#
class ruleset:
def __init__(self, StartNonterm, Rulelist):
# initialize the ruleset
self.StartNonterm = StartNonterm
self.Rules = Rulelist
# method to compute prefixes and First sets for nonterminals
def CompFirst(self):
# uses the special null production token NULLTOKEN
# snarfed directly from Aho+Ullman (terminals glossed)
First = kjSet.NewDG( [] )
# repeat the while loop until no change is made to First
done = 0
while not done:
done = 1 # assume we're done until a change is made to First
# iterate through all rules looking for a new arc to add
# indicating Terminal --> possible first token derivation
#
for R in self.Rules:
GoalNonterm = R.Nonterm
Bodylength = len(R.Body)
# look through the body of the rule up to the token with
# no epsilon production (yet seen)
Bodyindex = 0
Processindex = 1
while Processindex:
# unless otherwise indicated below, don't go to next token
Processindex = 0
# if index is past end of body then record
# an epsilon production for this nonterminal
if Bodyindex >= Bodylength:
if not kjSet.HasArc(First, GoalNonterm, NULLTOKEN ):
kjSet.AddArc( First, GoalNonterm, NULLTOKEN )
done = 0 # change made to First
else:
# otherwise try to add firsts of this token
# to firsts of the Head of the rule.
Token = R.Body[Bodyindex]
(type, name) = Token
if type in (KEYFLAG,TERMFLAG):
# try to add this terminal to First for GoalNonterm
if not kjSet.HasArc(First, GoalNonterm, Token):
kjSet.AddArc( First, GoalNonterm, Token)
done = 0
elif type == NONTERMFLAG:
# try to add each First entry for nonterminal
# to First entry for GoalNonterm
for FToken in kjSet.Neighbors( First, Token ):
if not kjSet.HasArc(First, GoalNonterm, FToken):
kjSet.AddArc( First, GoalNonterm, FToken)
done = 0
# does this nonterminal have a known e production?
if kjSet.HasArc( First, Token, NULLTOKEN ):
# if so, process next token in rule
Processindex = 1
else:
raise TokenError, "unknown token type in rule body"
#endif
Bodyindex = Bodyindex + 1
#endwhile Processindex
#endfor R in self.Rules
#endwhile not done
self.First = First
#enddef CompFirst
# computing the Follow set for the ruleset
# the good news: I think it's correct.
# the bad news: It's slower than it needs to be for epsilon cases.
def CompFollow(self):
Follow = kjSet.NewDG( [] )
# put end marker on follow of start nonterminal
kjSet.AddArc(Follow, self.StartNonterm, kjParser.ENDOFFILETOKEN)
# now compute other follows using the rules;
# repeat the loop until no change to Follow.
done = 0
while not done:
done = 1 # assume done unless Follow changes
for R in self.Rules:
#print R
# work backwards in the rule body to
# avoid retesting for epsilon nonterminals
Bodylength = len(R.Body)
EpsilonTail = 1 # the tail of rule may expand to null
BodyIndex = Bodylength - 1
Last = 1 # loop starts at the last
from types import TupleType
while BodyIndex >= 0:
Token = R.Body[BodyIndex]
(Ttype,Tname) = Token
if Ttype in (KEYFLAG,TERMFLAG):
# keywords etc cancel epsilon tail, otherwise ignore
EpsilonTail = 0
elif Ttype == NONTERMFLAG:
# if the tail expands to epsilon, map
# follow for the goal nonterminal to this token
# and also follow for the tail nonterms
if EpsilonTail:
# add follow for goal
for FToken in kjSet.Neighbors(Follow,R.Nonterm):
if not kjSet.HasArc(Follow,Token,FToken):
kjSet.AddArc(Follow,Token,FToken)
#if type(FToken[0])==TupleType:
# raise ValueError, "bad FToken"+`FToken`
#print "new", Token, FToken
done = 0 # follow changed, loop again
# add follow for tail members
#for Index2 in range(BodyIndex+1, Bodylength):
# TailToken = R.Body[Index2]
# for FToken in kjSet.Neighbors(Follow,TailToken):
# if not kjSet.HasArc(Follow,Token,FToken):
# kjSet.AddArc(Follow,Token,FToken)
# done = 0
#endif EpsilonTail
# if we are not at the end use First set for next token
if not Last:
NextToken = R.Body[BodyIndex+1]
(NTtype, NTname) = NextToken
if NTtype in (KEYFLAG,TERMFLAG):
if not kjSet.HasArc(Follow,Token,NextToken):
kjSet.AddArc(Follow,Token,NextToken)
#print "next", Token, NextToken
done = 0
elif NTtype == NONTERMFLAG:
for FToken in kjSet.Neighbors(self.First, NextToken):
if FToken != NULLTOKEN:
if not kjSet.HasArc(Follow,Token,FToken):
kjSet.AddArc(Follow,Token,FToken)
#print "neighbor", Token, FToken
done = 0
else:
# next token expands to epsilon:
# add its follow, unless already done above
#if not EpsilonTail:
for FToken in kjSet.Neighbors(Follow,NextToken):
if not kjSet.HasArc(Follow,Token,FToken):
kjSet.AddArc(Follow,Token,FToken)
#print "epsilon", Token, FToken
done = 0
else:
raise TokenError, "unknown token type in rule body"
#endif not Last
# finally, check whether next iteration has epsilon tail
if not kjSet.HasArc(self.First, Token, NULLTOKEN):
EpsilonTail = 0
else:
raise TokenError, "unknown token type in rule body"
BodyIndex = BodyIndex - 1
Last = 0 # no longer at the last token of the rule
#endwhile
#endfor
#endwhile
self.Follow = Follow
#enddef CompFollow
def DumpFirstFollow(self):
First = self.First
Follow = self.Follow
print "First:"
for key in First.keys():
name = key[1]
print name," :: ",
for (flag2,name2) in First[key].keys():
print name2,", ",
print
print "Follow:"
for key in Follow.keys():
name = key[1]
print name," :: ",
for (flag2,name2) in Follow[key].keys():
print name2,", ",
print
# computing the "first" of the tail of a rule followed by an
# optional terminal
# doesn't include NULLTOKEN
# requires self.First to be computed
#
def FirstOfTail(self, Rule, TailIndex, Token=None):
Result = kjSet.NewSet( [] )
# go through all tokens in rule tail so long as there is a
# null derivation for the remainder
Nullprefix = 1
BodyLength = len(Rule.Body)
ThisIndex = TailIndex
while Nullprefix and ThisIndex < BodyLength:
RToken = Rule.Body[ThisIndex]
(RTtype, RTname) = RToken
if RTtype == NONTERMFLAG:
for FToken in kjSet.Neighbors(self.First, RToken):
if FToken != NULLTOKEN:
kjSet.addMember(FToken, Result)
#endfor
# check whether this symbol might have a null production
if not kjSet.HasArc(self.First, RToken, NULLTOKEN):
Nullprefix = 0
elif RTtype in [KEYFLAG, TERMFLAG]:
kjSet.addMember(RToken, Result)
Nullprefix = 0
else:
raise TokenError, "unknown token type in rule body"
ThisIndex = ThisIndex + 1
#endwhile
# add the optional token if given and Nullprefix still set
if Nullprefix and Token != None:
kjSet.addMember(Token, Result)
return Result
#enddef FirstOfTail
# compute an SLR NFA for the ruleset with states for each SLR "item"
# and transitions, eg:
# X --> .AB
# on A maps to X --> A.B
# on epsilon maps to A --> .ZC
# and A --> .WK
# an item is a pair (rulenumber, bodyposition)
# where body position 0 is interpreted to point before the
# beginning of the body.
#
# SLR = "simple LR" in Aho+Ullman terminology
#
def CompSLRNFA(self):
NFA = CFSMachine(self.StartNonterm)
Nrules = len(self.Rules)
itemStateMap = {}
for Ruleindex in range(0,Nrules):
Rule = self.Rules[Ruleindex]
# make an item for each "dot" position in the body
for DotPos in range(0, len(Rule.Body) + 1):
item = (Ruleindex, DotPos)
itemState = NFA.NewState(TRANSFLAG, [item])
itemStateMap[item] = itemState
#endfor DotPos
#endfor Ruleindex
# now that the states are initialized
# compute transitions except for the last item of a rule
# (which has none)
for Ruleindex in range(0,Nrules):
Rule = self.Rules[Ruleindex]
for DotPos in range(0, len(Rule.Body)):
item = (Ruleindex, DotPos)
CurrentToken = Rule.Body[DotPos]
ThisState = itemStateMap[item]
NextState = itemStateMap[ (Ruleindex, DotPos + 1) ]
NFA.SetMap( ThisState, CurrentToken, NextState )
# if the current token is a nonterminal
# ad epsilon transitions to first item for any
# rule that derives this nonterminal
(CTtype, CTname) = CurrentToken
if CTtype == NONTERMFLAG:
for Rule2index in range(0,Nrules):
Rule2 = self.Rules[Rule2index]
Head = Rule2.Nonterm
if Head == CurrentToken:
NextState = itemStateMap[( Rule2index, 0 )]
NFA.SetMap( ThisState, NULLTOKEN, NextState )
#endfor Rule2index
#endif CTtype == NONTERMFLAG
#endfor DotPos
#endfor Ruleindex
# must handle the initial state properly here!
# Make a dummy state with e-transitions to all first items
# for rules that derive the initial nonterminal
ThisState = NFA.initial_state
GoodStartingPlace = None
for Ruleindex in range(0,Nrules):
Rule = self.Rules[Ruleindex]
Head = Rule.Nonterm
if Head == self.StartNonterm:
GoodStartingPlace= (Ruleindex, 0)
NextState = itemStateMap[ GoodStartingPlace ]
NFA.SetMap( ThisState, NULLTOKEN, NextState )
# fix the NFA.States entry
if GoodStartingPlace == None:
raise NotSLRError, "No derivation for root nonterminal."
NFA.States[ NFA.initial_state ] = \
[ 'transient', GoodStartingPlace ]
self.SLRNFA = NFA
#enddef CompSLRNFA
# dump an item
def ItemDump(self, item):
(ruleindex, position) = item
Rule = self.Rules[ruleindex]
print Rule.Nonterm[1],' >> ',
for bindex in range(0, len(Rule.Body)):
if position == bindex:
print " (*) ",
print Rule.Body[bindex][1],
if position == len(Rule.Body):
print " (*) "
else:
print
# utility function -- returns true if an item is a final item
def SLRItemIsFinal(self, item):
(ruleindex, position) = item
Rule = self.Rules[ruleindex]
if position == len(Rule.Body):
return 1
else:
return 0
# dump the NFA
def DumpSLRNFA(self):
NFA = self.SLRNFA
print "root: ", NFA.root_nonTerminal
for key in NFA.StateTokenMap.keys():
map = NFA.StateTokenMap[key]
(fromstate, token) = key
fromitem = NFA.States[ fromstate ][1]
self.ItemDump(fromitem)
print " on ", token[1], " maps "
for Tostate in map:
Toitem = NFA.States[Tostate][1]
print " ",
self.ItemDump(Toitem)
# compute DFA for ruleset by computing the E-closure of the
# NFA
def CompDFA(self):
self.DFA = self.SLRNFA.Eclosure(NULLTOKEN)
def DumpDFAsets(self):
DFA = self.DFA
print "root: ", DFA.root_nonTerminal
for State in range(1, len(DFA.States) ):
self.DumpItemSet(State)
def DumpItemSet(self,State):
DFA = self.DFA
NFA = self.SLRNFA
print
print "STATE ", State, " *******"
fromNFAindices = kjSet.get_elts(DFA.States[State][1])
for NFAindex in fromNFAindices:
item = NFA.States[NFAindex][1]
print " ", NFAindex, ": ",
self.ItemDump(item)
# this function completes the computation of an SLR DFA
# by adding reduction states for each DFA state S containing
# item H --> B.
# which reduces rule H --> B
# for each token T in Follow of H.
# if S already has a transition for T then there is a conflict!
#
# assumes DFA and SLRNFA and Follow have been computed.
#
def SLRFixDFA(self):
DFA = self.DFA
NFA = self.SLRNFA
# look through the states (except 0=success) of the DFA
# initially don't add any new states, just record
# actions to be done
# uses convention that 0 is successful final state
# ToDo is a dictionary which maps
# (State, Token) to a item to reduce
ToDo = {}
Error = None
for State in range(1, len(DFA.States) ):
# look for a final item for a rule in this state
fromNFAindices = kjSet.get_elts(DFA.States[State][1])
for NFAindex in fromNFAindices:
item = NFA.States[NFAindex][1]
# if the item is final remember to do the reductions...
if self.SLRItemIsFinal(item):
(ruleindex, position) = item
Rule = self.Rules[ruleindex]
Head = Rule.Nonterm
Following = kjSet.Neighbors( self.Follow, Head )
for Token in Following:
key = (State, Token)
if not ToDo.has_key(key):
ToDo[ key ] = item
else:
# it might be okay if the items are identical?
item2 = ToDo[key]
if item != item2:
print "reduce/reduce conflict on ",key
self.ItemDump(item)
self.ItemDump(item2)
Error = " apparent reduce/reduce conflict"
#endif
#endfor
#endif
#endfor NFAindex
#endfor State
# for each (State,Token) pair which indicates a reduction
# record the reduction UNLESS the map is already set for the pair
for key in ToDo.keys():
(State,Token) = key
item = ToDo[key]
(rulenum, dotpos) = item
ExistingMap = DFA.map( State, Token )
if ExistingMap[0] == NOMATCHFLAG:
DFA.SetReduction( State, Token, rulenum )
else:
print "apparent shift/reduce conflict"
print "reduction: ", key, ": "
self.ItemDump(item)
print "existing map ", ExistingMap
Error = " apparent shift/reduce conflict"
#endfor
if Error and ABORTONERROR:
raise NotSLRError, Error
#enddef SLRfixDFA()
# do complete SLR DFA creation starting after initialization
def DoSLRGeneration(self):
self.CompFirst()
self.CompFollow()
self.CompSLRNFA()
self.CompDFA()
self.SLRFixDFA()
#endclass ruleset
################ the following are interpretation functions
################ used by RULEGRAM meta grammar
# some constants used here
COMMENTFORM = "##.*\n"
RSKEY = "@R"
COLKEY = "::"
LTKEY = ">>"
IDNAME = "ident"
# an identifier in the meta grammar is any nonwhite string
# except the keywords @R :: >> or comment flag ##
IDFORM = "[^" + string.whitespace + "]+"
# for identifiers simply return the string
def IdentFun(string):
return string
# RootReduction should receive list of form
# [ nontermtoken, keyword COLKEY, RuleList ]
def RootReduction(list, ObjectGram):
if len(list) != 3 or list[1] != COLKEY:
raise FlowError, "unexpected metagrammar root reduction"
return (list[0], list[2])
# NullRuleList should receive list of form
# []
def NullRuleList(list, ObjectGram):
if list != []:
raise FlowError, "unexpected null RuleList form"
return []
# FullRuleList should receive list of form
# [ Rule, RuleList ]
def FullRuleList(list, ObjectGram):
if type(list) != type([]) or len(list)!=2:
raise FlowError, "unexpected full RuleList form"
NewRule = list[0]
OldRules = list[1]
return [NewRule] + OldRules
# InterpRule should receive list of form
# [keyword RSKEY,
# RuleNameStr,
# keyword COLKEY,
# Nontermtoken,
# keyword LTKEY,
# Bodylist]
#
def InterpRule(list, ObjectGram):
# check keywords:
if len(list) != 6 or \
list[0] != RSKEY or \
list[2] != COLKEY or \
list[4] != LTKEY:
raise FlowError, "unexpected meta rule reduction form"
ruleName = list[1]
ruleNonterm = list[3]
ruleBody = list[5]
# upcase the the representation of keywords if needed
if not ObjectGram.LexD.isCaseSensitive():
for i in range(0,len(ruleBody)):
(flag, name) = ruleBody[i]
if flag == KEYFLAG:
ruleBody[i] = (KEYFLAG, string.upper(name))
elif not flag in (TERMFLAG, NONTERMFLAG):
raise FlowError, "unexpected rule body member"
rule = kjParser.ParseRule( ruleNonterm, ruleBody )
rule.Name = ruleName
return rule
# InterpRuleName should receive
# [ string ]
def InterpRuleName(list, ObjectGram):
#print list
# add error checking?
return list[0]
# InterpNonTerm should receive
# [ string ]
def InterpNonTerm(list, ObjectGram):
#print list
if type(list)!=type([]) or len(list)!=1:
raise FlowError, "unexpected rulename form"
Name = list[0]
# determine whether this is a valid nonterminal
if not ObjectGram.NonTermDict.has_key(Name):
#print Name
raise TokenError, "LHS of Rule must be nonterminal: "+Name
return ObjectGram.NonTermDict[Name]
# NullBody should receive []
def NullBody(list, ObjectGram):
#print list
if list != []:
raise FlowError, "unexpected null Body form"
return []
# FullBody should receive
# [ string, Bodylist]
# must determine whether the string represents
# a keyword, a nonterminal, or a terminal of the object
# grammar.
# returns (KEYFLAG, string) (TERMFLAG, string) or
# (NONTERMFLAG, string) respectively
#
def FullBody(list,ObjectGram):
#print list
if type(list)!=type([]) or len(list)!=2:
raise FlowError, "unexpected body form"
Name = list[0]
# Does the Name rep a nonterm, keyword or term
# of the object grammar (in that order).
if ObjectGram.NonTermDict.has_key(Name):
kind = NONTERMFLAG
elif ObjectGram.LexD.keywordmap.has_key(Name):
kind = KEYFLAG
elif ObjectGram.TermDict.has_key(Name):
kind = TERMFLAG
else:
raise TokenError, "Rule body contains unregistered string: "+Name
restOfBody = list[1]
return [(kind, Name)] + restOfBody
# function to generate a grammar for parsing grammar rules
#
def ruleGrammar():
LexD = kjParser.LexDictionary()
# use SQL/Ansi style comments
LexD.comment( COMMENTFORM )
# declare keywords
RStart = LexD.keyword( RSKEY )
TwoColons = LexD.keyword( COLKEY )
LeadsTo = LexD.keyword( LTKEY )
# declare terminals
ident = LexD.terminal(IDNAME, IDFORM, IdentFun )
# declare nonterminals
Root = kjParser.nonterminal("Root")
Rulelist = kjParser.nonterminal("RuleList")
Rule = kjParser.nonterminal("Rule")
RuleName = kjParser.nonterminal("RuleName")
NonTerm = kjParser.nonterminal("NonTerm")
Body = kjParser.nonterminal("Body")
# declare rules
# Root >> NonTerm :: Rulelist
InitRule = kjParser.ParseRule( Root, \
[NonTerm, TwoColons, Rulelist], RootReduction )
# Rulelist >>
RLNull = kjParser.ParseRule( Rulelist, [], NullRuleList)
# Rulelist >> Rule Rulelist
RLFull = kjParser.ParseRule( Rulelist, [Rule,Rulelist], FullRuleList)
# Rule >> "@R :: NonTerm >> Body
RuleR = kjParser.ParseRule( Rule, \
[RStart, RuleName, TwoColons, NonTerm, LeadsTo, Body],\
InterpRule)
# Rulename >> ident
RuleNameR = kjParser.ParseRule( RuleName, [ident], InterpRuleName)
# NonTerm >> ident
NonTermR = kjParser.ParseRule( NonTerm, [ident], InterpNonTerm)
# Body >>
BodyNull = kjParser.ParseRule( Body, [], NullBody)
# Body >> ident Body
BodyFull = kjParser.ParseRule( Body, [ident,Body], FullBody)
# declare Rules list and Associated Name dictionary
Rules = [RLNull, RLFull, RuleR, RuleNameR, NonTermR,\
BodyNull, BodyFull, InitRule]
RuleDict = \
{ "RLNull":0, "RLFull":1, "RuleR":2, "RuleNameR":3, \
"NonTermR":4, "BodyNull":5, "BodyFull":6 , "InitRule":7 }
# make the RuleSet and compute the associate DFA
RuleSet = ruleset( Root, Rules )
RuleSet.DoSLRGeneration()
# construct the Grammar object
Result = kjParser.Grammar( LexD, RuleSet.DFA, Rules, RuleDict )
return Result
#enddef RuleGrammar()
# this is the rule grammar object for
# parsing
RULEGRAM = ruleGrammar()
# a derived grammar class (object oriented programming is cool!)
# this is a compilable grammar for automatic parser generation.
#
class CGrammar(kjParser.Grammar):
# initialization is handled by the base class
# insert a white separated list of keywords into the LexD
# THIS SHOULD CHECK FOR KEYWORD/NONTERMINAL/PUNCT NAME
# COLLISIONS (BUT DOESN'T YET).
def Keywords(self, Stringofkeys):
keywordlist = string.split(Stringofkeys)
for keyword in keywordlist:
self.LexD.keyword( keyword )
# insert a string of punctuations into the LexD
def punct(self, Stringofpuncts):
for p in Stringofpuncts:
self.LexD.punctuation(p)
# register a list of regular expression strings
# to represent comments in LexD
def comments(self, listOfCommentStrings):
for str in listOfCommentStrings:
self.LexD.comment(str)
# register a white separated list of nonterminal strings
def Nonterms(self, StringofNonterms):
nonTermlist = string.split(StringofNonterms)
for NonTerm in nonTermlist:
self.NonTermDict[NonTerm] = kjParser.nonterminal(NonTerm)
# initialize or add more rules to the RuleString
def Declarerules(self, StringWithRules):
self.RuleString = self.RuleString + "\n" + StringWithRules
# The compilation function assumes
# NonTermDict
# RuleString
# LexD
# TermDict
# have all been set up properly
# (at least if the default MetaGrammar is used).
# On successful completion it will set up
# DFA
# RuleL
# RuleNameToIndex
def Compile(self, MetaGrammar=RULEGRAM):
# the following should return a list of rules
# with punctuations of self.LexD interpreted as trivial keywords
# keywords of seld.LexD interpreted as keywords
# and nonterminals registered in NonTermDict interpreted as
# nonterms.
# ParseResult should be of form ( (rootNT, RuleL), self )
ParseResult = MetaGrammar.DoParse1( self.RuleString, self )
(RootNonterm, Rulelist) = ParseResult
# make a ruleset and compute its DFA
RuleS = ruleset( RootNonterm, Rulelist )
RuleS.DoSLRGeneration()
# make the rulename to index map to allow future bindings
for i in range(0,len(Rulelist)):
Rule = Rulelist[i]
self.RuleNameToIndex[ Rule.Name ] = i
# fill in the blanks
self.DFA = RuleS.DFA
self.RuleL = Rulelist
# FOR DEBUG AND TESTING
self.Ruleset = RuleS
# DON'T clean up the grammar (misc structures are used)
# in future bindings
#enddef Compile
# Write a reconstructable representation for this grammar
# to a file
#EXCEPT:
# - rule associations to reduction functions
# will be lost (must be reset elsewhere)
# - terminals in the lexical dictionary
# will not be initialized
#
# IND is used for indentation, should be whitespace (add check!)
#
# FName if given will cause the reconstructed to be placed
# inside a function `FName`+"()" returning the grammar object
#
# NOTE: this function violates information hiding principles;
# in particular it "knows" the guts of the FSM and LexD classes
#
def Reconstruct(self, VarName, Tofile, FName=None, indent=""):
Reconstruction = codeReconstruct(VarName, Tofile, self, FName, indent)
GrammarDumpSequence(Reconstruction)
#enddef Reconstruct
# marshalling of a grammar to a file
def MarshalDump(self, Tofile):
Reconstruction = marshalReconstruct(self, Tofile)
GrammarDumpSequence(Reconstruction)
#endclass CGrammar
# general procedure for different types of archiving for grammars
def GrammarDumpSequence(ReconstructObj):
# assume an initialized Reconstruct Object with appropriate grammar etc.
# put the lexical part
ReconstructObj.PutLex()
# put the rules
ReconstructObj.PutRules()
# put transitions
ReconstructObj.PutTransitions()
# finish up
ReconstructObj.Cleanup()
# function to create a "null CGrammar"
def NullCGrammar():
return CGrammar(None,None,None,{})
# utility classes -- Grammar reconstruction objects
# encapsulate the process of grammar archiving.
#
class Reconstruct:
# this "virtual class" is only for common behaviors of subclasses.
def MakeTokenArchives(self):
# make a list of all tokens and
# initialize token --> int dictionary
keys = self.Gram.DFA.StateTokenMap.keys()
tokenToInt = {}
tokenSet = kjSet.NewSet([])
for k in keys:
kjSet.addMember(k[1], tokenSet)
tokens = kjSet.get_elts(tokenSet)
for i in range(0,len(tokens)):
tokenToInt[ tokens[i] ] = i
self.keys = keys
self.tokens = tokens # global sub
self.tokInt = tokenToInt # global sub
# grammar reconstruction to a file
class codeReconstruct(Reconstruct):
def __init__(self, VarName, Tofile, Grammar, FName=None, indent =""):
# do global subs for each of these
self.Var = VarName
self.File = Tofile
self.FName = FName
self.Gram = Grammar
# put the reconstruction in a function if FName is given
if FName != None:
Tofile.write("\n\n")
Tofile.write(indent+"def "+FName+"():\n")
IND = indent+" "
else:
IND = indent
self.I = IND # global sub!
Tofile.write("\n\n")
Tofile.write(IND+"# ******************************BEGIN RECONSTRUCTION\n")
Tofile.write(IND+"# Python declaration of Grammar variable "+VarName+".\n")
Tofile.write(IND+"# automatically generated by module "+PMODULE+".\n")
Tofile.write(IND+"# Altering this sequence by hand will probably\n")
Tofile.write(IND+"# leave it unusable.\n")
Tofile.write(IND+"#\n")
Tofile.write(IND+"import "+PMODULE+"\n\n")
Tofile.write(IND+"# variable declaration:\n")
Tofile.write(IND+VarName+"= "+PMODULE+".NullGrammar()\n\n")
# make self.keys list of dfa keys,
# self.tokens list of grammar tokens,
# self.tokInt inverted dictionary for self.tokens
self.MakeTokenArchives()
Tofile.write("\n\n"+IND+"# case sensitivity behavior for keywords.\n")
if self.Gram.LexD.isCaseSensitive():
Tofile.write(IND+VarName+".SetCaseSensitivity(1)\n")
else:
Tofile.write(IND+VarName+".SetCaseSensitivity(0)\n")
#enddef __init__
def PutLex(self):
IND = self.I
Tofile = self.File
VarName = self.Var
LexD = self.Gram.LexD
tokens = self.tokens
Tofile.write("\n\n"+IND+"# declaration of lexical dictionary.\n")
Tofile.write(IND+"# EXCEPT FOR TERMINALS\n")
Tofile.write(IND+VarName+".LexD.punctuationlist = ")
Tofile.write(`LexD.punctuationlist`+"\n")
Tofile.write(IND+"# now comment patterns\n")
for comment in LexD.commentstrings:
Tofile.write(IND+VarName+".LexD.comment("+`comment`+")\n")
Tofile.write(IND+"# now define tokens\n")
for i in range(0,len(tokens)):
tok = tokens[i]
(kind, name) = tok
if kind == TERMFLAG:
# put warning at end!
# nonterminal not installed in lexical dictionary here!
Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
Tofile.write(PMODULE+".termrep("+`name`+")\n")
elif kind == KEYFLAG:
Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
Tofile.write(VarName+".LexD.keyword("+`name`+")\n")
elif kind == NONTERMFLAG:
Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
Tofile.write(PMODULE+".nonterminal("+`name`+")\n")
else:
raise FlowError, "unknown token type"
#enddef PutLex
def PutRules(self):
IND = self.I
VarName = self.Var
Rules = self.Gram.RuleL
Tofile = self.File
Root = self.Gram.DFA.root_nonTerminal
Tofile.write("\n\n"+IND+"# declaration of rule list with names.\n")
Tofile.write(IND+"# EXCEPT FOR INTERP FUNCTIONS\n")
nrules = len(Rules)
Tofile.write(IND+VarName+".RuleL = [None] * "+`nrules`+"\n")
for i in range(0,nrules):
# put warning at end:
# rule reduction function not initialized here!
rule = Rules[i]
name = rule.Name
Tofile.write(IND+"rule = "+`rule`+"\n")
Tofile.write(IND+"name = "+`name`+"\n")
Tofile.write(IND+"rule.Name = name\n")
Tofile.write(IND+VarName+".RuleL["+`i`+"] = rule\n")
Tofile.write(IND+VarName+".RuleNameToIndex[name] = "+`i`+"\n")
Tofile.write("\n\n"+IND+"# DFA root nonterminal.\n")
Tofile.write(IND+VarName+".DFA.root_nonTerminal =")
Tofile.write(`Root`+"\n")
#enddef PutRules
def PutTransitions(self):
IND = self.I
Tofile = self.File
VarName = self.Var
maxState = self.Gram.DFA.maxState
tokenToInt = self.tokInt
StateTokenMap = self.Gram.DFA.StateTokenMap
keys = self.keys
Tofile.write("\n\n"+IND+"# DFA state declarations.\n")
for state in range(1, maxState+1):
Tofile.write(IND+VarName+".DFA.States["+`state`+"] = ")
Tofile.write('['+`TRANSFLAG`+']\n')
Tofile.write(IND+VarName+".DFA.maxState = "+`maxState`+"\n")
Tofile.write("\n\n"+IND+"# DFA transition declarations.\n")
for key in keys:
(fromState, TokenRep) = key
TokenIndex = tokenToInt[TokenRep]
TokenArg = VarName+".IndexToToken["+`TokenIndex`+"]"
TMap = StateTokenMap[key]
TMaptype = TMap[0][0]
if TMaptype == REDUCEFLAG:
# reduction
rulenum = TMap[0][1]
Args = "("+`fromState`+","+TokenArg+","+`rulenum`+")"
Tofile.write(IND+VarName+".DFA.SetReduction"+Args+"\n")
elif TMaptype == MOVETOFLAG:
# MoveTo
Args = "("+`fromState`+","+TokenArg+","+`TMap[0][1]`+")"
Tofile.write(IND+VarName+".DFA.SetMap"+Args+"\n")
else:
raise FlowError, "unexpected else (2)"
#enddef
def Cleanup(self):
Tofile = self.File
RuleL = self.Gram.RuleL
tokens = self.tokens
VarName = self.Var
IND = self.I
FName = self.FName
Tofile.write("\n\n"+IND+"# Clean up the grammar.\n")
Tofile.write(IND+VarName+".CleanUp()\n")
# if the Fname was given return the grammar as function result
if FName != None:
Tofile.write("\n\n"+IND+"# return the grammar.\n")
Tofile.write(IND+"return "+VarName+"\n")
Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n")
Tofile.write(IND+"# You must bind the following rule names \n")
Tofile.write(IND+"# to reduction interpretation functions \n")
for R in RuleL:
Tofile.write(IND+"# "+VarName+".Bind("+`R.Name`+", ??function??)\n")
Tofile.write(IND+"#(last rule)\n")
Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n")
Tofile.write(IND+"# You must bind the following terminals \n")
Tofile.write(IND+"# to regular expressions and interpretation functions \n")
warningPrinted = 0
for tok in tokens:
(kind, name) = tok
if kind == TERMFLAG and tok != ENDOFFILETOKEN:
Tofile.write(IND+"# "+VarName+\
".Addterm("+`name`+", ??regularExp??, ??function??)\n")
warningPrinted = 1
if not warningPrinted:
Tofile.write(IND+"# ***NONE** \n")
Tofile.write(IND+"#(last terminal)\n")
Tofile.write(IND+"# ******************************END RECONSTRUCTION\n")
#enddef
#endclass
# reconstruction using marshalling to a file
# encodes internal structures for grammar using marshal-able
# objects. Final marshalling to the file is done at CleanUp()
# storing one big tuple.
#
class marshalReconstruct(Reconstruct):
def __init__(self, Grammar, Tofile):
self.Gram = Grammar
self.File = Tofile
# should archive self.tokens structure
self.MakeTokenArchives()
# archive this
self.CaseSensitivity = Grammar.LexD.isCaseSensitive()
def PutLex(self):
LexD = self.Gram.LexD
# archive these
self.punct = LexD.punctuationlist
self.comments = LexD.commentstrings
def PutRules(self):
# archive this
self.Root = self.Gram.DFA.root_nonTerminal
# make a list of tuples that can be used with
# rule = apply(ParseRule, tuple[1])
# rule.Name = tuple[0]
Rules = self.Gram.RuleL
nrules = len(Rules)
RuleTuples = [None] * nrules
for i in range(nrules):
rule = Rules[i]
RuleTuples[i] = (rule.Name, rule.components())
#archive this
self.RuleTups = RuleTuples
def PutTransitions(self):
keys = self.keys
tokenToInt = self.tokInt
StateTokenMap = self.Gram.DFA.StateTokenMap
# archive this
self.MaxStates = self.Gram.DFA.maxState
# create two lists,
# one for reductions with contents (fromState, tokennumber, rulenum)
# one for movetos with contents (fromstate, tokennumber, tostate)
# (note: token number not token itself to allow sharing)
# to allow arbitrary growing, first use dicts:
reductDict = {}
nreducts = 0
moveToDict = {}
nmoveTos = 0
for key in self.keys:
(fromState, TokenRep) = key
TokenIndex = tokenToInt[TokenRep]
TMap = StateTokenMap[key]
TMaptype = TMap[0][0]
if TMaptype == REDUCEFLAG:
rulenum = TMap[0][1]
reductDict[nreducts] = (fromState, TokenIndex, rulenum)
nreducts = nreducts + 1
elif TMaptype == MOVETOFLAG:
ToState = TMap[0][1]
moveToDict[nmoveTos] = (fromState, TokenIndex, ToState)
nmoveTos = nmoveTos + 1
else:
raise FlowError, "unexpected else"
#endfor
# translate dicts to lists
reducts = [None] * nreducts
for i in range(nreducts):
reducts[i] = reductDict[i]
moveTos = [None] * nmoveTos
for i in range(nmoveTos):
moveTos[i] = moveToDict[i]
# archive these
self.reducts = reducts
self.moveTos = moveTos
# this is the function that does the marshalling
def Cleanup(self):
import marshal
# make the big list to marshal
BigList = [None] * 9
BigList[0] = self.tokens
BigList[1] = self.punct
BigList[2] = self.comments
BigList[3] = self.RuleTups
BigList[4] = self.MaxStates
BigList[5] = self.reducts
BigList[6] = self.moveTos
BigList[7] = self.Root
BigList[8] = self.CaseSensitivity
# dump the big list to the file
marshal.dump( BigList, self.File )
#end class
#######################testing stuff
if RUNTESTS:
def echo(x): return x
# simple grammar stolen from a text
LD0 = kjParser.LexDictionary()
id = LD0.terminal("id","id",echo)
plus = LD0.punctuation("+")
star = LD0.punctuation("*")
oppar = LD0.punctuation("(")
clpar = LD0.punctuation(")")
equals = LD0.punctuation("=")
E = kjParser.nonterminal("E")
T = kjParser.nonterminal("T")
Tp = kjParser.nonterminal("Tp")
Ep = kjParser.nonterminal("Ep")
F = kjParser.nonterminal("F")
rule1 = kjParser.ParseRule( E, [ T, Ep ] )
rule2 = kjParser.ParseRule( Ep, [ plus, T, Ep ] )
rule3 = kjParser.ParseRule( Ep, [ ] )
rule4 = kjParser.ParseRule( T, [ F, Tp ] )
rule5 = kjParser.ParseRule( Tp, [ star, F, Tp ] )
rule6 = kjParser.ParseRule( Tp, [ ] )
rule7 = kjParser.ParseRule( F, [ oppar, E, clpar ] )
rule8 = kjParser.ParseRule( F, [ id ] )
rl0 = [ rule1, rule2, rule3, rule4, rule5, rule6, rule7,rule8]
rs0 = ruleset(E, rl0)
rs0.CompFirst()
Firstpairs = kjSet.GetPairs(rs0.First)
rs0.CompFollow()
Followpairs = kjSet.GetPairs(rs0.Follow)
rs0.CompSLRNFA()
NFA0 = rs0.SLRNFA
rs0.CompDFA()
rs0.SLRFixDFA()
DFA0 = rs0.DFA
class dummy: pass
ttt0 = dummy()
def TESTDFA( STRING , ttt, DFA, Rulelist, DOREDUCTIONS = 1):
ttt.STRING = STRING
#ttt.List = kjParser.LexList(LD0, ttt.STRING)
ttt.Stream = kjParser.LexStringWalker( ttt.STRING, LD0 )
ttt.Stack = {-1:0}# Walkers.SimpleStack()
ttt.ParseObj = kjParser.ParserObj( Rulelist, \
ttt.Stream, DFA, ttt.Stack,DOREDUCTIONS)
ttt.RESULT = ttt.ParseObj.GO()
#ttt.Stack.Dump(10)
return ttt.RESULT
def TESTDFA0( STRING , DOREDUCTIONS = 1):
return TESTDFA( STRING, ttt0, DFA0, rl0, DOREDUCTIONS )
TESTDFA0( " id + id * id ")
# an even simpler grammar
S = kjParser.nonterminal("S")
M = kjParser.nonterminal("M")
A = kjParser.nonterminal("A")
rr1 = kjParser.ParseRule( S, [M] )
#rr2 = kjParser.ParseRule( A, [A, plus, M])
#rr3 = kjParser.ParseRule( A, [M], echo)
#rr4 = kjParser.ParseRule( M, [M, star, M])
rr5 = kjParser.ParseRule( M, [oppar, M, clpar])
rr6 = kjParser.ParseRule( M, [id])
rl1 = [rr1,rr5,rr6]
rs1 = ruleset(S, rl1)
rs1.CompFirst()
rs1.CompFollow()
rs1.CompSLRNFA()
rs1.CompDFA()
rs1.SLRFixDFA()
DFA1 = rs1.DFA
ttt1=dummy()
def TESTDFA1( STRING , DOREDUCTIONS = 1):
return TESTDFA( STRING, ttt1, DFA1, rl1, DOREDUCTIONS )
X = kjParser.nonterminal("X")
Y = kjParser.nonterminal("Y")
RX = kjParser.ParseRule( X, [ oppar, Y, clpar ] )
RY = kjParser.ParseRule( Y, [] )
rl2 = [RX,RY]
rs2 = ruleset(X, rl2)
rs2.CompFirst()
rs2.CompFollow()
rs2.CompSLRNFA()
rs2.CompDFA()
rs2.SLRFixDFA()
DFA2 = rs2.DFA
ttt2 = dummy()
def TESTDFA2( STRING, DOREDUCTIONS = 1):
return TESTDFA( STRING, ttt2, DFA2, rl2, DOREDUCTIONS )
# the following grammar should fail to be slr
# (Aho,Ullman p. 213)
S = kjParser.nonterminal("S")
L = kjParser.nonterminal("L")
R = kjParser.nonterminal("R")
RS1 = kjParser.ParseRule( S, [L, equals, R] )
RS2 = kjParser.ParseRule( S, [R], echo )
RL1 = kjParser.ParseRule( L, [star, R])
RL2 = kjParser.ParseRule( L, [id])
RR1 = kjParser.ParseRule( R, [L] )
rs3 = ruleset(S, [RS1,RS2,RL1,RL2,RR1])
rs3.CompFirst()
rs3.CompFollow()
rs3.CompSLRNFA()
rs3.CompDFA()
#rs3.SLRFixDFA() # should fail and does.
# testing RULEGRAM
ObjG = NullCGrammar()
ObjG.Addterm("id","id",echo)
ObjG.Nonterms("T E Ep F Tp")
ObjG.Keywords("begin end")
ObjG.punct("+*()")
ObjG.comments(["--.*\n"])
# PROBLEM WITH COMMENTS???
Rulestr = """
## what a silly grammar!
T ::
@R One :: T >> begin E end
@R Three :: E >>
@R Two :: E >> E + T
@R Four :: E >> ( T )
"""
RL = RULEGRAM.DoParse1( Rulestr, ObjG )
#
# python for parser interpretation
# Copyright Aaron Robert Watters, 1994
#
# BUGS:
# Lexical error handling is not nice
# Parse error handling is not nice
#
# Lex analysis may be slow for big grammars
# Setting case sensitivity for keywords MUST happen BEFORE
# declaration of keywords.
import kjSet
import string
import regex
import regsub
import string
# set this flag for regression testing at each load
RUNTESTS = 0
# set this flag to enable warning for default reductions
WARNONDEFAULTS = 0
# some local constants
TERMFLAG = -1 # FLAG FOR TERMINAL
NOMATCHFLAG = -2 # FLAG FOR NO MATCH IN FSM
MOVETOFLAG = -3 # FLAG FOR "SHIFT" IN SN FSM
REDUCEFLAG = -4 # FLAG FOR REDUCTION IN FSM
TRANSFLAG = -5 # FLAG FOR TRANSIENT STATE IN FSM
KEYFLAG = -6 # FLAG FOR KEYWORD
NONTERMFLAG = -7 # FLAG FOR NONTERMINAL
TERMFLAG = -8 # FLAG FOR TERMINAL
EOFFLAG = "*" # FLAG for End of file
# set this string to the Module name (filename)
# used for dumping reconstructable objects
THISMODULE = "kjParser"
# regular expression for matching whitespace
WHITERE = "["+string.whitespace+"]+"
WHITEREGEX = regex.compile(WHITERE)
# local errors
LexTokenError = "LexTokenError" # may happen on bad string
UnkTermError = "UnkTermError" # ditto
BadPunctError= "BadPunctError" # if try to make whitespace a punct
ParseInitError = "ParseInitError" # shouldn't happen?
#EOFError # may happen on bad string
FlowError = "FlowError" # shouldn't happen!!! (bug)
#SyntaxError # may happen on bad string
#TypeError
ReductError = "ReductError" # shouldn't happen?
NondetError = "NondetError" # shouldn't happen?
# the end of file is interpreted in the lexical stream as
# a terminal...
# this should be appended to the lexical stream:
ENDOFFILETOKEN = (TERMFLAG, EOFFLAG)
# in FSM use the following terminal to indicate eof
ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG)
# utility function for error diagnostics
def DumpStringWindow(Str, Pos, Offset=15):
L = []
L.append("near ::")
start = Pos-Offset
end = Pos+Offset
if start<0: start = 0
if end>len(Str): end = len(Str)
L.append(`Str[start:Pos]`+"*"+`Str[Pos:end]`)
from string import join
return join(L, "\n")
# lexical dictionary class
# this data structure is used by lexical parser below.
#
# basic operations:
# LD.punctuation(string)
# registers a string as a punctuation
# EG: LD.punctuation(":")
# Punctuations are treated as a special kind of keyword
# that is recognized even when not surrounded by whitespace.
# IE, "xend" will not be recognized as "x end", but "x;" will be
# recognized as "x ;" if "end" is a regular keyword but
# ";" is a punctuation. Only single character punctuations
# are supported (now), ie, ":=" must be recognized as
# ":" "=" above the lexical level.
#
# LD.comment(compiled_reg_expression)
# registers a comment pattern
# EG LD.comment(regex.compile("--.*\n"))
# asks to recognize ansi/sql comments like "-- correct?\n"
#
# LD.keyword(keyword_string, canonicalstring)
# specifies a keyword string that should map to the canonicalstring
# when translated to the lexical stream.
# EG: LD.keyword("begin","BEGIN"); LD.keyword("BEGIN","BEGIN")
# will recognize upcase or downcase begins, not mixed case.
# (automatic upcasing is allowed below at parser level).
#
# LD[compiled_reg_expression] = (TerminalFlag, Function) # assignment!
# specifies a regular expression that should be associated
# with the lexical terminal marker TerminalFlag
# EG: LD[regex.compile("[0-9]+")] = ("integer",string.atoi)
# the Function should be a function on one string argument
# that interprets the matching string as a value. if None is
# given, just the string itself will be used as the
# interpretation. (a better choice above would be a function
# which "tries" atoi first and uses atol on overflow).
# NOTE: ambiguity among regular expressions will be decided
# arbitrarily (fix?).
#
# LD[string] # retrieval!
# returns ((KEYFLAG, Keywordstring), Keywordstring)
# if the (entire) string matches a keyword or a
# punctuation Keywordstring.
# otherwise returns ((TERMFLAG, Terminalname), value)
# if the (entire) string matches the regular expression for
# a terminal flaged by Terminalname; value is the interpreted
# value. TerminalFlag better be something other than
# KEYFLAG!
# otherwise raises an error!
# comments not filtered here!
#
# the following additional functions are used for autodocumentation
# in declaring rules, etcetera.
# begin = LD.keyword("begin")
# sets variable "begin" to (KEYFLAG, "BEGIN") if
# "begin" maps to keyword "BEGIN" in LD
# integer = LD.terminal("integer")
# sets variable integer to ("integer", Function)
# if "integer" is a registered terminal Function is
# its associated interpretation function.
#
class LexDictionary:
def __init__(self):
# commentpatterns is simply a list of compiled regular expressions
# that represent comments
self.commentpatterns = []
# commentstrings is used for debugging/dumping/reconstruction etc.
self.commentstrings = []
# punctuationlist is a string of punctuations
self.punctuationlist = ""
# keywordmap is a dictionary mapping recognized keyword strings
# and punctuations to their constant representations.
self.keywordmap = KeywordDict()
# regexprlist is a list of triples (regex,Flag,function) mapping
# regular expressions to their flag and interpreter function.
self.regexprlist = []
def Dump(self):
print "comments = ", self.commentstrings
print "punctuations = ", self.punctuationlist
print "keywordmap ="
self.keywordmap.Dump()
print "regexprlist =", self.regexprlist
def __getitem__(self,key):
# try to match string to a keyword
try:
return self.keywordmap[key]
except KeyError:
# try to match a regular expression
found = 0 # so far not found
length = len(key)
for triple in self.regexprlist:
(regexpr, Flag, Function) = triple
index = regexpr.match(key)
if index == length:
found = 1
# use the function to interpret the string, if given
if Function != None:
value = Function(key)
else:
value = key
# NONLOCAL RETURN
return (Flag, value)
#endfor
raise LexTokenError, "no match for string: " + `key`
#enddef __getitem__
# LD.keyword("this") will make a new keyword "this" if not found
#
def keyword(self,str):
# upcase the string, if needed
if self.keywordmap.caseInsensitive:
str = string.upper(str)
if not self.keywordmap.has_key(str):
# redundancy for to avoid excess construction during parsing
token = (KEYFLAG,str)
self.keywordmap[str] = (token,str)
else:
(token, str2) = self.keywordmap[str]
return token
# LD.terminal("this") will just look for "this"
# LD.terminal("this", RE, F) will register a new terminal
# RE must be a compiled regular expression or string reg ex
# F must be an interpretation function
#
def terminal(self, string, RegExpr=None, Function=None):
if RegExpr != None and Function != None:
if type(RegExpr) == type(""):
RegExpr = regex.compile(RegExpr)
self[ RegExpr ] = ( string, Function)
for triple in self.regexprlist:
(regexpr,token,Function) = triple
if token[1] == string:
# nonlocal exit
return token
#endfor
# error if no exit by now
raise UnkTermError, "no such terminal"
def __setitem__(self,key,value):
if type(key) == type(''):
# if it's a string it must be a keyword
if self.keywordmap.caseInsensitive:
value = string.upper(value)
key = string.upper(key)
self.keywordmap[key] = ( (KEYFLAG, value), value)
else:
# otherwise it better be a compiled regular expression (not
#verified)
(Name, Function) = value
Flag = (TERMFLAG, Name)
regexpr = key
self.regexprlist = self.regexprlist + \
[ (regexpr, Flag, Function) ]
# register a regular expression as a comment
def comment(self, string):
# regexpr better be a uncompiled string regular expression! (not verified)
regexpr = regex.compile(string)
self.commentpatterns = self.commentpatterns + [ regexpr ]
self.commentstrings = self.commentstrings + [ string ]
# register a string as a punctuation
def punctuation(self,Instring):
if type(Instring) != type("") or len(Instring)!=1:
raise BadPunctError, "punctuation must be string of length 1"
if Instring in string.whitespace:
raise BadPunctError, "punctuation may not be whitespace"
self.punctuationlist = self.punctuationlist + Instring
return self.keyword(Instring)
# testing and altering case sensitivity behavior
def isCaseSensitive(self):
return not self.keywordmap.caseInsensitive
# setting case sensitivity MUST happen before keyword
# declarations!
def SetCaseSensitivity(self, Boolean):
self.keywordmap.caseInsensitive = not Boolean
# function to do same as __getitem__ above but looking _inside_ a string
# instead of at the whole string
# returns (token,skip)
# where token is one of
# ((KEYFLAG,name),name) or ((TERMFLAG,termname),value)
# and skip is the length of substring of string that matches thetoken
def Token(self, String, StartPosition):
finished = 0 # dummy, exit should be nonlocal
totalOffset = 0
while not finished:
# flag EOF if past end of string?
if len(String) <= StartPosition:
return (ENDOFFILETERM, 0)
# skip whitespace
whitespacefound = 0
skip = WHITEREGEX.match(String, StartPosition)
if skip > 0:
StartPosition = StartPosition + skip
totalOffset = totalOffset + skip
whitespacefound = 1
# try to find comment, keyword, term in that order:
# looking for comment
commentfound = 0
for commentexpr in self.commentpatterns:
offset = commentexpr.match(String,StartPosition)
if offset != -1:
if offset<1:
info = DumpStringWindow(String,StartPosition)
raise LexTokenError, "zero length comment "+info
commentfound = 1
StartPosition = StartPosition + offset
totalOffset = totalOffset + offset
# looking for a keyword
keypair = self.keywordmap.hasPrefix(String,StartPosition,
self.punctuationlist)
if keypair != 0:
return ( keypair[0], keypair[1] + totalOffset)
# looking for terminal
for (regexpr, Flag, Function) in self.regexprlist:
offset = regexpr.match(String,StartPosition)
if offset != -1:
matchstring = String[StartPosition : offset+StartPosition]
if Function != None:
value = Function(matchstring)
else:
value = matchstring
return ((Flag, value) , offset + totalOffset)
if not (commentfound or whitespacefound):
info = DumpStringWindow(String,StartPosition)
raise LexTokenError, "Lexical parse failure "+info
#endwhile
#enddef
#endclass LexDictionary
# alternate, experimental implementation
class lexdictionary:
def __init__(self):
self.skip = ""
self.commentstrings = []
self.punctuationlist = ""
self.keywordmap = KeywordDict()
self.termlist = [] # list of (term, regex, flag, interpret_fn)
self.uncompiled = 1 # only compile after full initialization.
self.laststring= self.lastindex= self.lastresult = None
def Dump(self, *k):
raise "sorry", "not implemented"
__getitem__ = Dump
def keyword(self, str):
kwm = self.keywordmap
if kwm.caseInsensitive:
str = string.upper(str)
try:
(token, str2) = kwm[str]
except:
token = (KEYFLAG, str)
self.keywordmap[str] = (token,str)
return token
def terminal(self, str, regexstr=None, Function=None):
if regexstr is not None:
flag = (TERMFLAG, str)
self.termlist.append( (str, regexstr, flag, Function) )
return flag
else:
for (s,fl,fn) in self.termlist:
if fl[1]==str:
return fl
else:
raise UnkTermError, "no such terminal"
__setitem__ = Dump
def comment(self, str):
self.commentstrings.append(str)
def punctuation(self, Instring):
if type(Instring) != type("") or len(Instring)!=1:
raise BadPunctError, "punctuation must be string of length 1"
if Instring in string.whitespace:
raise BadPunctError, "punctuation may not be whitespace"
self.punctuationlist = self.punctuationlist + Instring
return self.keyword(Instring)
def SetCaseSensitivity(self, Boolean):
self.keywordmap.caseInsensitive = not Boolean
def Token(self, String, StartPosition):
# shortcut for reductions.
if self.laststring is String and self.lastindex == StartPosition:
#print "lastresult", self.lastresult
return self.lastresult
self.lastindex = StartPosition
self.laststring = String
#print `String[StartPosition: StartPosition+60]`
if self.uncompiled:
self.compile()
self.uncompiled = None
finished = 0
totalOffset = 0
skipprog = self.skipprog
keypairfn = self.keywordmap.hasPrefix
punctlist = self.punctuationlist
termregex = self.termregex
while not finished:
#print String[StartPosition:]
if len(String) <= StartPosition:
result = self.lastresult = (ENDOFFILETERM, 0)
return result
# skip ws and comments
skip = skipprog.match(String, StartPosition)
if skip>0:
if skip==0:
info = DumpStringWindow(String, StartPosition)
raise LexTokenError, \
"zero length whitespace or comment "+info
#print "skipping", `String[StartPosition: StartPosition+skip]`
StartPosition = StartPosition + skip
totalOffset = totalOffset + skip
continue
# look for keyword
keypair = keypairfn(String, StartPosition, punctlist)
if keypair!=0:
#print "keyword", keypair
result = self.lastresult = (keypair[0], keypair[1]+totalOffset)
return result
# look for terminal
offset = termregex.match(String, StartPosition)
if (offset>0):
g = termregex.group
for (term, regex, flag, fn) in self.termlist:
test = g(term)
if test:
#print "terminal", test
if fn is not None:
value = fn(test)
else:
value = test
result = self.lastresult = (
(flag, value), offset + totalOffset)
return result
# error if we get here
info = DumpStringWindow(String, StartPosition)
raise LexTokenError, "Lexical token not found "+info
def isCaseSensitive(self):
return not self.keywordmap.caseInsensitive
def compile(self):
from string import joinfields, whitespace
import regex
skipregexen = self.commentstrings + [WHITERE]
skipregex = "\(" + joinfields(skipregexen, "\)\|\(") + "\)"
#print skipregex; import sys; sys.exit(1)
self.skipprog = regex.compile(skipregex)
termregexen = []
termnames = []
for (term, rgex, flag, fn) in self.termlist:
fragment = "\(<%s>%s\)" % (term, rgex)
termregexen.append(fragment)
termnames.append(term)
termregex = joinfields(termregexen, "\|")
self.termregex = regex.symcomp(termregex)
self.termnames = termnames
LexDictionary = lexdictionary ##### test!
# a utility class: dictionary of prefixes
# should be generalized to allow upcasing of keyword matches
class KeywordDict:
def __init__(self, caseInsensitive = 0):
self.FirstcharDict = {}
self.KeyDict = {}
self.caseInsensitive = caseInsensitive
def Dump(self):
if self.caseInsensitive:
print " case insensitive"
else:
print " case sensitive"
keys = self.KeyDict.keys()
print " keyDict has ", len(keys), " elts"
for key in keys:
print " ", key," maps to ",self.KeyDict[key]
firstchars = self.FirstcharDict.keys()
print " firstcharDict has ", len(firstchars), " elts"
for char in firstchars:
print " ", char," maps to ",self.FirstcharDict[char]
# set item assumes value has correct case already, if case sensitive
def __setitem__(self, key, value):
if len(key)<1:
raise LexTokenError, "Keyword of length 0"
if self.caseInsensitive:
KEY = string.upper(key)
else:
KEY = key
firstchar = KEY[0:1]
if self.FirstcharDict.has_key(firstchar):
self.FirstcharDict[firstchar] = \
self.FirstcharDict[firstchar] + [(KEY, value)]
else:
self.FirstcharDict[firstchar] = [(KEY, value)]
self.KeyDict[KEY] = value
# if String has a registered keyword at start position
# return its canonical representation and offset, else 0
# keywords that are not punctuations should be
# recognized only if followed
# by a punctuation or whitespace char
#
def hasPrefix(self,String,StartPosition,punctuationlist):
First = String[StartPosition:StartPosition+1]
fcd = self.FirstcharDict
caseins = self.caseInsensitive
if caseins:
First = string.upper(First)
if fcd.has_key(First):
Keylist = fcd[First]
else:
return 0
for (key,value) in Keylist:
offset = len(key)
EndPosition = StartPosition+offset
match = String[StartPosition : EndPosition]
if caseins:
match = string.upper(match)
if key == match:
if len(key)==1 and key in punctuationlist:
# punctuations are recognized regardless of nextchar
return (value,offset)
else:
# nonpuncts must have punct or whitespace following
#(uses punct as single char convention)
if EndPosition == len(String):
return (value, offset)
else:
nextchar = String[EndPosition]
if nextchar in string.whitespace\
or nextchar in punctuationlist:
return (value, offset)
return 0 # if no exit inside for loop, fail
def __getitem__(self,key):
if self.caseInsensitive:
key = string.upper(key)
return self.KeyDict[key]
def has_key(self,key):
if self.caseInsensitive:
key = string.upper(key)
return self.KeyDict.has_key(key)
#endclass KeywordDict:
# LexStringWalker walks through a string looking for
# substrings recognized by a lexical dictionary
#
# ERROR REPORTING NEEDS IMPROVEMENT
class LexStringWalker:
def __init__(self, String, LexDict):
self.Position = 0
self.NextPosition = 0
self.String = String
self.LexDict = LexDict
self.PastEOF = 0
self.Done = 0
def DUMP(self):
return DumpStringWindow(self.String,self.Position)
#reset not defined
def more(self):
return not self.PastEOF
def getmember(self):
(Token,skip) = self.LexDict.Token(self.String, self.Position)
self.NextPosition = self.Position + skip
if Token == ENDOFFILETERM:
self.PastEOF = 1
return Token
def next(self):
if self.Done:
data = self.DUMP()
raise LexTokenError, "no next past end of file "+data
elif self.PastEOF:
self.Done=1
elif self.NextPosition > self.Position:
self.Position = self.NextPosition
else:
dummy = self.getmember()
if self.NextPosition <= self.Position:
data = self.DUMP()
raise LexTokenError, "Lexical walker not advancing "+data
self.Position = self.NextPosition
#endclass LexStringWalker
# the parse class:
# Based loosely on Aho+Ullman, Principles of Compiler Design, Ch.6.
# except that they don't describe how to handle boundary
# conditions, I made them up myself.
#
# Note: This could be implemented using just functions; it's implemented
# as a class to facilitate diagnostics and debugging in case of
# failures of various sorts.
#
# a parse accepts
# a rule list
#
# a lexically analysed stream with methods
# stream.getmember() returns the current token on the stream
# stream.next() moves on to next token
# stream.more() returns false if current token is the last token
#
# and a FSM (finite state machine) with methods
# FSM.root_nonTerminal
# the nonterminal at which to start parsing
# FSM.initial_state
# the initial state to start at
# FSM.successful_final_state
# the final state to go to upon successful parse
# FSM.map(Current_State,Current_Token)
# returns either
# (TERMFLAG, 0)
# if Current_State is terminal (final or reduction).
# (NOMATCHFLAG, 0)
# if Current_State is nonterminal, but the Current_Token
# and Next_Token do not lead to a valid state in the FSM
# (MOVETOFLAG, Next_State)
# if Current_State is nonterminal and Current_Token,
# Next_token map to Next_State from Current_State.
# (REDUCEFLAG, Rulenum)
# if Current_State indicates a reduction at Current_Token
# for rule Rule number Rule
#
# and a Stack with methods (replaced with dictionary)
# (init: {-1:0} )
# Stack.Top() returns top of stack (no pop)
# ( Stack[Stack[-1]] )
# Stack.Push(Object)
# ( Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=Object )
# Stack.MakeEmpty()
# ( Stack[-1]=0 )
# Stack.IsEmpty()
# ( Stack[-1] == 0 )
# Stack.Pop()
# ( Stack[-1] = Stack[-1]-1 )
# stack contents created by Parser will be of form (State,Value)
# where Value was inserted at FSM state State.
# Value of form either (KEYFLAG, Name)
# (NontermName, reductionvalue)
# or (TerminalName, value)
#
# and an optional parameter Evaluate which if 0 indicates that
# rules should be evaluated, otherwise indicates that rules
# should just be reduced and the reduction structure should
# be used as the result of the rule
#
# rule objects must support methods
# Rule.reduce(Stack)
# pops off the elements corresponding to the body of the Rule
# from the stack and returns (NewStack,Red) where NewStack is
# the stack minus the body and Red is the result of evaluating the
# reduction function on this instance of the rule.
# Rule.Nonterm
# the nonterminal at the head of the rule
class ParserObj:
# Evaluate determines whether rules should be evaluated
# after reductions. Context is an argument passed to the
# list reduction function
#
def __init__(self, Rulelist, Stream, FSM, Stack, \
Evaluate=1, \
Context=None):
self.Rules = Rulelist
self.LexStream = Stream
self.FSM = FSM
self.Stack = Stack
self.Context = Context
# start with empty stack, initial_state, no nonterminal
#self.Stack[-1] = 0# self.Stack.MakeEmpty()
self.Stack[:] = []
self.State = FSM.initial_state
self.currentNonterm = None
self.Evaluate = Evaluate
# DoOneReduction accepts tokens from the stream and pushes
# them onto the stack until a reduction state is reached.
#
# Resolve the reduction
#
def DoOneReduction(self):
current=self.State
FSM=self.FSM
Stack = self.Stack
Context = self.Context
Stream = self.LexStream
# the internal FSM.StateTokenMap dictionary is used directly here.
STMap = FSM.StateTokenMap
#if FSM.final_state(current):
# raise ParseInitError, 'trying to reduce starting at final state'
tokenVal = Stream.getmember()
#print "tokenVal", tokenVal
token = tokenVal[0]
# push the token and traverse FSM until terminal state is reached
#(flag, nextThing) = FSM.map(current, token)
key = (current, token)
try:
(flag, nextThing) = STMap[key][0]
except KeyError:
flag = NOMATCHFLAG
while flag == MOVETOFLAG:
nextState = nextThing
#print current, " shift ", token,
# no sanity check, possible infinite loop
# push current token and next state
ThingToPush = (nextState, tokenVal)
#print "pushing ", ThingToPush
#Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=ThingToPush
Stack.append(ThingToPush)
#Stack.Push( ThingToPush )
# move to next token, next state
Stream.next()
# error if end of stream
if not Stream.more(): # optimized Stream.PastEOF (?)
data = Stream.DUMP()
raise EOFError, 'end of stream during parse '+data
current = nextState
tokenVal = Stream.getmember()
token = tokenVal[0]
#MAP = FSM.map(current,token)
key = (current, token)
try:
(flag, nextThing) = STMap[key][0]
except KeyError:
flag = NOMATCHFLAG
# at end of while loop we should be at a reduction state
if flag == REDUCEFLAG:
rulenum = nextThing
#print current, " reduce ", token, self.Rules[rulenum]
# normal case
# perform reduction
rule = self.Rules[rulenum]
Nonterm = rule.Nonterm
self.currentNonterm = Nonterm
(Stack, reduct) = rule.reduce( Stack , Context )
# self.Stack = Stack #not needed, unless stack rep changes
GotoState = self.GotoState(rule)
# push the Gotostate and result of rule reduction on stack
ThingToPush = (GotoState, (Nonterm, reduct) )
# push the result of the reduction and exit normally
#print "pushing ", ThingToPush
#Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=ThingToPush
Stack.append(ThingToPush)
#Stack.Push(ThingToPush)
self.State=GotoState
return 1 # normal successful completion
# some error cases
elif flag == NOMATCHFLAG:
self.ParseError(current,tokenVal, "nomatch1")
#elif FSM.final_state(current):
# raise BadFinalError, 'unexpected final state reached in reduction'
else:
data = Stream.DUMP()
s = """
flag = %s
map = %s """ % (flag, FSM.map(current,token))
data = data + s
raise FlowError, 'unexpected else '+data
#enddef DoOneReduction
# compute the state to goto after a reduction is performed
# on a rule.
# Algorithm: determine the state at beginning of reduction
# and the next state indicated by the head nonterminal of the rule.
# special case: empty stack and root nonterminal --> success.
#
def GotoState(self, rule):
FSM = self.FSM
Stack = self.Stack
Head = rule.Nonterm
if len(Stack)==0: #Stack[-1]==0: #Stack.IsEmpty():
BeforeState = FSM.initial_state
else:
BeforeState = Stack[-1][0] #Stack[Stack[-1]][0] #Stack.Top()[0]
# is this right? if the stack is empty and the Head
# is the root nonterm, then goto is final state
if len(Stack)==0 and Head == FSM.root_nonTerminal:#Stack.isEmpty()
Result = FSM.successful_final_state
else:
# consider eliminating the call to .map here? (efficiency)
(flag, Result) = FSM.map(BeforeState, Head)
if flag != MOVETOFLAG:
#FSM.DUMP()
self.ParseError(BeforeState, Head, "notmoveto")
return Result
def ParseError( self, State, Token, *rest):
# make this parse error nicer (add diagnostic methods?)
L = [""]
L.append("*******************************")
L.append("current state = "+`State`)
L.append("expects: ")
expects = ""
for (flag,name) in self.FSM.Expects(State):
if flag in (TERMFLAG, KEYFLAG):
expects = expects + `name`+ ", "
L.append(expects)
L.append(`rest`)
L.append("current token = " + `Token`)
#print "Stack =",
#self.StackDump(5)
#print
from string import join
data = self.LexStream.DUMP() + join(L, "\n")
raise SyntaxError, 'unexpected token sequence.' + data
def StackDump(self, N):
Stack = self.Stack
Topkey = len(Stack)
if Topkey>N:
Start = Topkey - N
else:
Start = 1
for i in range(Start,Topkey+1):
print " :: ", Stack[i],
# execute parsing until done:
def GO(self):
while self.State != self.FSM.successful_final_state:
#self.FSM.final_state(self.State):
self.DoOneReduction()
# should I check that stack has only one elt here?
# return result of last reduction
return self.Stack[-1][1] #self.Stack.Top()[1]
#endclass ParserObj
# function for declaring a variable to represent a nonterminal:
# eg Program = nonterminal("program")
# included for convenient autodocumentation
#
def nonterminal(string):
return (NONTERMFLAG, string)
# declaring a terminal WITHOUT INSTALLING IT IN A LexDict
def termrep(string):
return (TERMFLAG, string)
# the rule class
# a rule is defined by a goal nonterminal marker of form
# (NONTERMFLAG, Name)
# and a list defining the body which must contain elts of form
# (KEYFLAG, Name) or (NONTERMFLAG, Name) of (TERMFLAG, Name)
# and a reduction function which takes a list of the same size
# as the BodyList (consisting of the results of the evaluations of
# the previous reductions)
# and returns an interpretation for the body
# the following function is used as a default reduction function
# for rules
def DefaultReductFun( RuleResultsList, Context ):
if WARNONDEFAULTS:
print "warn: default reduction."
print " ", RuleResultsList
return RuleResultsList
class ParseRule:
def __init__(self, goalNonTerm, BodyList, \
ReductFunction = DefaultReductFun):
#print BodyList
# check some of the arguments (very limited!)
if len(goalNonTerm) != 2 or goalNonTerm[0] != NONTERMFLAG:
raise TypeError, "goal of rule must be nonterminal"
for m in BodyList:
#print m
if len(m) != 2:
raise TypeError, "invalid body form for rule"
self.Nonterm = goalNonTerm
self.Body = BodyList
self.ReductFun = ReductFunction
# for dumping/reconstruction: LOSES THE INTERPRETATION FUNCTION!
def __repr__(self):
return THISMODULE + ".ParseRule" + `self.components()`
# marshal-able components of a rule
def components(self):
return (self.Nonterm, self.Body)
# rule.reduce(Stack) pops of the stack elements corresponding
# to the body of the rule and prepares the appropriate reduction
# object for evaluation (or not) at higher levels
#
def reduce(self, Stack, Context=None):
#print "reducing", Stack
Blength = len(self.Body)
#print Blength, len(self.Body)
# pop off previous results from stack corresponding to body
BodyResults = [None] * Blength
#BodyNames = [None] * Blength # for debug
#print "popping: "
for i in range(1,Blength+1):
Bindex = Blength - i # stack contents pop off in reverse order
# get and destructure the rule body entry
RuleEntry = self.Body[Bindex]
( REkind , REname ) = RuleEntry
# get and destructure the stack entry
PoppedValue = Stack[-i] #Stack.Top()
#print PoppedValue,
#del Stack[-1]# = Stack[-1]-1 #Stack.Pop()
SETokVal = PoppedValue[1]
SEvalue = SETokVal[1]
SEname = SETokVal[0][1]
# the names from rule and stack must match (?)
if SEname != REname:
print SEname, REname
print self
raise ReductError, " token names don't match"
# store the values for the reduction
BodyResults[Bindex] = SEvalue
#BodyNames[Bindex] = SEname # debug
#endfor
del Stack[len(Stack)-Blength:]
#print "reduced", Stack
#print
# evaluate the reduction, in context
reduct = self.ReductFun(BodyResults, Context)
if WARNONDEFAULTS and self.ReductFun is DefaultReductFun:
# should check whether name is defined before this...
print " default used on ", self.Name
#Reduction( self.ReductFun, BodyResults, BodyNames )
return (Stack, reduct)
#enddef ParseRule.reduce
#endclass ParseRule
# for debugging: look through a rule list
# and print names of rules that have default binding
#
def PrintDefaultBindings(rulelist):
for r in rulelist:
if r.ReductFun is DefaultReductFun:
print r.Name
# the FSM class
#
class FSMachine:
def __init__(self, rootNonTerm):
# start and success state conventions
startState=1
successState=0
self.root_nonTerminal = rootNonTerm
self.initial_state = startState
self.successful_final_state = successState
# the list of states of the FSM, implemented as a dictionary
# entries are identified by their index
# content is
# a list whose first elt is either TRANSFLAG, or TERMFLAG
# other list elts may be added by other layers (parse generator)
# indicating the kind of the state.
self.States = {}
# allocate start and success states
self.States[startState]=[TRANSFLAG]
self.States[successState]=[TERMFLAG]
# the most recently allocated state
self.maxState= startState
# the map of current token+state number to next state
#with entries of form (tokenname,state):nextstate_sequence
#
self.StateTokenMap = {}
#enddef FSM()
# ForbiddenMark is for filtering out maps to an error state
def DUMP(self, DumpMapData=1, DumpStateData=1, ForbiddenMark={}):
print "root nonterminal is ", self.root_nonTerminal
print "start at ", self.initial_state
print "end at ", self.successful_final_state
print "number of states: ", self.maxState
if DumpStateData:
print
for State in range(0,self.maxState+1):
Data = self.States[State]
print State, ": ", Data
if DumpMapData:
print
for key in self.StateTokenMap.keys():
map = self.StateTokenMap[key]
if map[0][0] == MOVETOFLAG:
ToStateData = self.States[map[0][1]]
if len(ToStateData) < 2:
Mark = None
else:
Mark = ToStateData[1]
if Mark != ForbiddenMark:
print key, " --> ", map, " = ", ToStateData
else:
print key, " --> reduction to rule number ", map[0][1]
# what tokens does a state expect?
def Expects(self, State):
keys = self.StateTokenMap.keys()
Tokens = kjSet.NewSet( [] )
for (state1,token) in keys:
if State == state1:
kjSet.addMember(token,Tokens)
return kjSet.get_elts(Tokens)
# "allocate" a new state of specified kind
# kind must either be TRANSFLAG, TERMFLAG or a rule object
# returns the number of the new state
def NewState(self, kind, AdditionalInfo = []):
if not kind in (TRANSFLAG,TERMFLAG,REDUCEFLAG):
raise TypeError, "unknown state kind"
available = self.maxState+1
self.States[available] = [kind] + AdditionalInfo
self.maxState = available
return available
# Install a reduction transition in the FSM:
# a reduction is represented by mapping to a rule index
# no nondeterminism is allowed.
def SetReduction(self, fromState, TokenRep, Rulenum):
key = (fromState, TokenRep)
if not self.StateTokenMap.has_key(key):
self.StateTokenMap[ key ] = ((REDUCEFLAG, Rulenum),)
else:
raise ReductError, "attempt to set ambiguous reduction"
# Install a "shift" or "goto transition in the FSM:
# supports nondeterminism by storing a sequence of possible transitions
#
def SetMap(self, fromState, TokenRep, toState):
key = (fromState, TokenRep)
if self.StateTokenMap.has_key(key):
Old = self.StateTokenMap[key]
if Old[0][0] != MOVETOFLAG:
# if the old value was not an integer, not a "normal state":
# complain:
raise NondetError, \
"attempt to make inappropriate transition ambiguous"
self.StateTokenMap[ key ] = Old + ((MOVETOFLAG,toState),)
else:
self.StateTokenMap[ key ] = ((MOVETOFLAG,toState),)
# Find the action indicated by fsm on
# (current_state, current_token) input.
#
# note: in the event of nondeterministic choice this chooses
# the first possibility listed.
# ParseObj.DoOneReduction() currently uses the internal structure
# of StateTokenMap directly, rather than using this function.
#
def map(self, current_state, current_token):
StateEntry = self.States[current_state][0]
if StateEntry == TERMFLAG:
return (TERMFLAG, 0)
elif StateEntry == TRANSFLAG:
# try to find a transition for this token and state
key = (current_state, current_token)
try:
TMap = self.StateTokenMap[key]
#print "TMap ", TMap
#print "key ", key
#print
return TMap[0]
except KeyError:
return (NOMATCHFLAG, 0)
else:
raise FlowError, "unexpected else (2)"
#enddef map
#endclass FSMachine
# the grammar class:
# a grammar consists of
# - a LexDict lexical dictionary;
# - a deterministic FSMachine;
# - a Rulelist
# and optionally a dictionary that maps Rulenames
# to Rulelist indices (used for dumping and externally)
#
class Grammar:
def __init__(self, LexD, DFA, RuleL, RuleNameDict = None):
# for auto initialization set LexD,DFA,RuleL to None
if LexD == None and DFA == None and RuleL == None:
self.LexD = LexDictionary()
# use a dummy root nonterminal -- must fix elsewhere!
self.DFA = FSMachine("ERROR")
self.RuleL = []
else:
self.LexD = LexD
self.DFA = DFA
self.RuleL = RuleL
if RuleNameDict != None:
self.AddNameDict(RuleNameDict)
self.CleanUp()
#enddef __init__
# look for default bindings
def PrintDefaults(self):
print "Default bindings on:"
PrintDefaultBindings(self.RuleL)
# setting case sensitivity: must happen before keyword installation
# in LexD.
def SetCaseSensitivity( self, Boolean ):
self.LexD.SetCaseSensitivity( Boolean )
# this may be silly, but to save some space in construction
# a token dictionary may be used that facilitates sharing of
# token representations. This method either initializes
# the dictionary or disposes of it if it exists
def CleanUp(self):
self.IndexToToken = {}
# this dictionary is used by automatically
# generated grammars to determine whether
# a string represents a nonterminal
self.NonTermDict = {}
# similarly for terminals
self.TermDict = {}
# this string may be used to keep a printable
# representation of the rules of the grammar
# (usually in automatic grammar generation
self.RuleString = ""
# to associate a token to an integer use
# self.IndexToToken[int] = tokenrep
# this method associates rules to names using a
# RuleNameDict dictionary which maps names to rule indices.
# after invocation
# self.RuleNameToIndex[ name ] gives the index
# in self.RuleL for the rule associated with name, and
# self.RuleL[index].Name gives the name associated
# with the rule self.RuleL[index]
#
def AddNameDict(self, RuleNameDict):
self.RuleNameToIndex = RuleNameDict
# add a Name attribute to the rules of the rule list
for ruleName in RuleNameDict.keys():
index = RuleNameDict[ ruleName ]
self.RuleL[ index ].Name = ruleName
# parse a string using the grammar, return result and context
def DoParse( self, String, Context = None, DoReductions = 1 ):
# construct the ParserObj
Stream = LexStringWalker( String, self.LexD )
Stack = [] # {-1:0} #Walkers.SimpleStack()
ParseOb = ParserObj( self.RuleL, Stream, self.DFA, Stack, \
DoReductions, Context )
# do the parse
ParseResult = ParseOb.GO()
# return final result of reduction and the context
return (ParseResult[1], Context)
#enddef DoParse
# parse a string using the grammar, but only return
# the result of the last reduction, without the context
def DoParse1( self, String, Context=None, DoReductions=1 ):
return self.DoParse(String, Context, DoReductions)[0]
# if the Name dictionary has been initialized
# this method will (re)bind a reduction function to
# a rule associated with Rulename
#
def Bind( self, Rulename, NewFunction ):
ruleindex = self.RuleNameToIndex[ Rulename ]
rule = self.RuleL[ ruleindex ]
rule.ReductFun = NewFunction
#enddef Bind
# bind a terminal to a regular expression and interp function
# in the lexical dictionary (convenience)
def Addterm( self, termname, regexpstr, funct ):
self.TermDict[ termname ] =\
self.LexD.terminal( termname, regexpstr, funct )
#endclass Grammar
# function to create a "null grammar"
def NullGrammar():
return Grammar(None,None,None,{})
# unmarshalling a marshalled grammar created by
# buildmodule.CGrammar.MarshalDump(Tofile)
# tightly coupled with buildmodule code...
# file should be open and "pointing to" the marshalled rep.
#
# warning: doesn't bind semantics!
#
def UnMarshalGram(file):
Grammar = NullGrammar()
UnMarshal = UnMarshaller(file, Grammar)
UnMarshal.MakeLex()
UnMarshal.MakeRules()
UnMarshal.MakeTransitions()
UnMarshal.Cleanup()
return UnMarshal.Gram
# unmarshalling object for unmarshalling grammar from a file
#
class UnMarshaller:
def __init__(self, file, Grammar):
import marshal
self.Gram = Grammar
BigList = marshal.load(file)
if type(BigList) != type([]):
raise FlowError, "bad type for unmarshalled list"
if len(BigList) != 9:
raise FlowError, "unmarshalled list of wrong size"
self.tokens = BigList[0]
self.punct = BigList[1]
self.comments = BigList[2]
self.RuleTups = BigList[3]
self.MaxStates = BigList[4]
self.reducts = BigList[5]
self.moveTos = BigList[6]
self.Root = BigList[7]
self.CaseSensitivity = BigList[8]
Grammar.SetCaseSensitivity( self.CaseSensitivity )
def MakeLex(self):
Grammar=self.Gram
LexD = Grammar.LexD
# punctuations
LexD.punctuationlist = self.punct
# comments
for commentregex in self.comments:
LexD.comment(commentregex)
#LexD.commentstring = self.comments
# keywords, terminals, nonterms
# rewrite the tokens list for sharing and extra safety
LexTokens = {}
tokens = self.tokens
for tokenindex in range(len(tokens)):
(kind,name) = tokens[tokenindex]
if kind == KEYFLAG:
tokens[tokenindex] = LexD.keyword(name)
elif not kind in [TERMFLAG, NONTERMFLAG]:
raise FlowError, "unknown token type"
# not needed
self.tokens = tokens
def MakeRules(self):
Grammar = self.Gram
Grammar.DFA.root_nonTerminal = self.Root
NameIndex = Grammar.RuleNameToIndex
RuleTuples = self.RuleTups
nRules = len(RuleTuples)
RuleList = [None] * nRules
for index in range(nRules):
(Name, Components) = RuleTuples[index]
rule = apply(ParseRule, Components)
rule.Name = Name
RuleList[index] = rule
NameIndex[Name] = index
Grammar.RuleL = RuleList
def MakeTransitions(self):
Grammar = self.Gram
DFA = Grammar.DFA
StateTokenMap = DFA.StateTokenMap
tokens = self.tokens
# record the state number
DFA.maxState = self.MaxStates
# this is historical, unfortunately... CLEAN IT UP SOMEDAY!
# THE DFA.States DICT IS NOT NEEDED (?) (here)
for state in range(1, self.MaxStates+1):
DFA.States[state] = [TRANSFLAG]
# record the reductions
for (fromState, TokenIndex, rulenum) in self.reducts:
DFA.SetReduction(fromState, tokens[TokenIndex], rulenum)
# record the transitions
for (fromState, TokenIndex, ToState) in self.moveTos:
DFA.SetMap(fromState, tokens[TokenIndex], ToState)
def Cleanup(self):
Grammar = self.Gram
Grammar.CleanUp()
################# FOLLOWING CODE IS FOR REGRESSION TESTING ONLY
################# DELETE IT IF YOU WANT/NEED
#### All tests for this module deleted, since
#### ParseBuild module tests are sufficient.
#
# sets implemented using mappings
# Copyright Aaron Robert Watters, 1994
#
# these only work for "immutable" elements.
# probably not terribly efficient, but easy to implement
# and not as slow as concievably possible.
def NewSet(Sequence):
Result = {}
for Elt in Sequence:
Result[Elt] = 1
return Result
def Empty(Set):
if Set == {}:
return 1
else:
return 0
def get_elts(Set):
return Set.keys()
def member(Elt,Set):
return Set.has_key(Elt)
# in place mutators:
# returns if no change otherwise 1
def addMember(Elt,Set):
change = 0
if not Set.has_key(Elt):
Set[Elt] = 1
change = 1
return change
def Augment(Set, OtherSet):
change = 0
for Elt in OtherSet.keys():
if not Set.has_key(Elt):
Set[Elt] = 1
change = 1
return change
def Mask(Set, OtherSet):
change = 0
for Elt in OtherSet.keys():
if Set.has_key(Elt):
del Set[Elt]
change = 1
return change
# side effect free functions
def Intersection(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if Set2.has_key(Elt):
Result[Elt] = 1
return Result
def Difference(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if not Set2.has_key(Elt):
Result[Elt] = 1
return Result
def Union(Set1,Set2):
Result = {}
Augment(Result,Set1)
Augment(Result,Set2)
return Result
def Subset(Set1,Set2):
Result = 1
for Elt in Set1.keys():
if not Set2.has_key(Elt):
Result = 0
return Result # nonlocal
return Result
def Same(Set1,Set2):
if Subset(Set1,Set2) and Subset(Set2,Set1):
return 1
else:
return 0
# directed graphs as Dictionaries of Sets
# also only works for immutable nodes
def NewDG(pairlist):
Result = {}
for (source,dest) in pairlist:
AddArc(Result, source, dest)
return Result
def GetPairs(Graph):
result = []
Sources = Graph.keys()
for S in Sources:
Dests = get_elts( Graph[S] )
ThesePairs = [None] * len(Dests)
for i in range(0,len(Dests)):
D = Dests[i]
ThesePairs[i] = (S, D)
result = result + ThesePairs
return result
def AddArc(Graph, Source, Dest):
change = 0
if Graph.has_key(Source):
Adjacent = Graph[Source]
if not member(Dest,Adjacent):
addMember(Dest,Adjacent)
change = 1
else:
Graph[Source] = NewSet( [ Dest ] )
change = 1
return change
def Neighbors(Graph,Source):
if Graph.has_key(Source):
return get_elts(Graph[Source])
else:
return []
def HasArc(Graph, Source, Dest):
result = 0
if Graph.has_key(Source) and member(Dest, Graph[Source]):
result = 1
return result
def Sources(Graph):
return Graph.keys()
# when G1, G2 and G3 are different graphs this results in
# G1 = G1 U ( G2 o G3 )
# If G1 is identical to one of G2,G3 the result is somewhat
# nondeterministic (depends on dictionary implementation).
# However, guaranteed that AddComposition(G,G,G) returns
# G1 U (G1 o G1) <= G <= TC(G1)
# where G1 is G's original value and TC(G1) is its transitive closure
# hence this function can be used for brute force transitive closure
#
def AddComposition(G1, G2, G3):
change = 0
for G2Source in Sources(G2):
for Middle in Neighbors(G2,G2Source):
for G3Dest in Neighbors(G3, Middle):
if not HasArc(G1, G2Source, G3Dest):
change = 1
AddArc(G1, G2Source, G3Dest)
return change
# in place transitive closure of a graph
def TransClose(Graph):
change = AddComposition(Graph, Graph, Graph)
somechange = change
while change:
change = AddComposition(Graph, Graph, Graph)
if not somechange:
somechange = change
return somechange
########### SQueue stuff
#
# A GrabBag should be used to hold objects temporarily for future
# use. You can put things in and take them out, with autodelete
# that's all!
# make a new baggy with nothing in it
# BG[0] is insert cursor BG[1] is delete cursor, others are elts
#
OLD = 1
NEW = 0
START = 2
def NewBG():
B = [None]*8 #default size
B[OLD] = START
B[NEW] = START
return B
def BGempty(B):
# other ops must maintain this: old == new iff empty
return B[OLD] == B[NEW]
# may return new, larger structure
# must be used with assignment... B = BGadd(e,B)
def BGadd(elt, B):
cursor = B[NEW]
oldlen = len(B)
# look for an available position
while B[cursor] != None:
cursor = cursor+1
if cursor >= oldlen: cursor = START
if cursor == B[NEW]: #back to beginning
break
# resize if wrapped
if B[cursor] != None:
B = B + [None] * oldlen
cursor = oldlen
B[OLD] = START
if B[cursor] != None:
raise IndexError, "can't insert?"
# add the elt
B[cursor] = (elt,)
B[NEW] = cursor
# B nonempty so OLD and NEW should differ.
if B[OLD] == cursor:
B[NEW] = cursor + 1
if B[NEW]<=len(B): B[NEW] = START
return B
def BGgetdel(B):
# find something to delete:
cursor = B[OLD]
blen = len(B)
while B[cursor]==None:
cursor = cursor+1
if cursor>=blen: cursor = START
if cursor == B[OLD]: break # wrapped
if B[cursor] == None:
raise IndexError, "delete from empty grabbag(?)"
# test to see if bag is empty (position cursor2 at nonempty slot)
cursor2 = cursor+1
if cursor2>=blen: cursor2 = START
while B[cursor2]==None:
cursor2 = cursor2+1
if cursor2>=blen: cursor2 = START
# since B[cursor] not yet deleted while will terminate
# get and delete the elt
(result,) = B[cursor]
B[cursor] = None
# cursor == cursor2 iff bag is empty
B[OLD] = cursor2
if B[NEW] == cursor2: B[NEW] = cursor
return result
def BGtest(n):
B = NewBG()
rn = range(n)
rn2 = range(n-2)
for i in rn:
for j in rn:
B = BGadd( (i,j), B)
B = BGadd( (j,i), B)
x = BGgetdel(B)
for j in rn2:
y = BGgetdel(B)
print (i, x, y)
return B
# kjbuckets in pure python
### needs more thorough testing!
import sys # for debug
def kjtabletest(x):
#print "kjtabletest"
try:
return x.is_kjtable
except:
return 0
unhashable = "unhashable key error"
class kjGraph:
is_kjtable = 1
def __init__(self, *args):
#print "kjGraph.__init__", args
key_to_list = self.key_to_list = {}
self.dirty = 0
self.hashed = None
#print args
if args:
if len(args)>1:
raise ValueError, "only 1 or 0 argument supported"
from types import IntType, ListType, TupleType
arg = args[0]
targ = type(arg)
test = key_to_list.has_key
if type(arg) is IntType:
return # ignore int initializer (presize not implemented)
elif type(arg) is ListType or type(arg) is TupleType:
for (x,y) in arg:
if test(x):
key_to_list[x].append(y)
else:
key_to_list[x] = [y]
return
aclass = arg.__class__
if aclass is kjGraph:
aktl = arg.key_to_list
for k in aktl.keys():
key_to_list[k] = aktl[k][:]
return
if aclass is kjDict or aclass is kjSet:
adict = arg.dict
for k in adict.keys():
key_to_list[k] = [ adict[k] ]
return
raise ValueError, "arg for kjGraph must be tuple, list, or kjTable"
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, self.items())
def _setitems(self, thing):
#print "kjGraph._setitem", thing
#print "setitems", thing
if self.hashed is not None:
raise ValueError, "table has been hashed, it is immutable"
try:
for (k,v) in thing:
#print k,v, "going"
#inlined __setitem__
try:
klist = self.key_to_list[k]
#print "klist gotten"
except KeyError:
try:
klist = self.key_to_list[k] = []
except TypeError:
raise unhashable
if v not in klist:
klist.append(v)
except (TypeError, KeyError):
#print sys.exc_type, sys.exc_value
if kjtabletest(thing):
self._setitems(thing._pairs())
self.dirty = thing.dirty
else: raise ValueError, "cannot setitems with %s" % type(thing)
except unhashable:
raise TypeError, "unhashable type"
def __setitem__(self, item, value):
ktl = self.key_to_list
if ktl.has_key(item):
l = ktl[item]
if value not in l:
l.append(value)
else:
ktl[item] = [value]
def __getitem__(self, item):
return self.key_to_list[item][0]
def __delitem__(self, item):
self.dirty = 1
del self.key_to_list[item]
def choose_key(self):
return self.key_to_list.keys()[0]
def _pairs(self, justtot=0):
myitems = self.key_to_list.items()
tot = 0
for (k, v) in myitems:
tot = tot + len(v)
if justtot: return tot
else:
result = [None]*tot
i = 0
for (k,v) in myitems:
for x in v:
result[i] = (k,x)
i = i+1
return result
def __len__(self):
v = self.key_to_list.values()
lv = map(len, v)
from operator import add
return reduce(add, lv, 0)
def items(self):
return self._pairs()
def values(self):
v = self.key_to_list.values()
from operator import add
tot = reduce(add, map(len, v), 0)
result = [None] * tot
count = 0
for l in v:
next = count + len(l)
result[count:next] = l
count = next
return result
def keys(self):
return self.key_to_list.keys()
def member(self, k, v):
ktl = self.key_to_list
if ktl.has_key(k):
return v in ktl[k]
return 0
_member = member # because member redefined for kjSet
def add(self, k, v):
ktl = self.key_to_list
if ktl.has_key(k):
l = ktl[k]
if v not in l:
l.append(v)
else:
ktl[k] = [v]
def delete_arc(self, k, v):
self.dirty = 1
if self.hashed is not None:
raise ValueError, "table has been hashed, it is immutable"
try:
l = self.key_to_list[k]
i = l.index(v)
del l[i]
if not l:
del self.key_to_list[k]
except:
raise KeyError, "not in table"# % (k,v)
def has_key(self, k):
return self.key_to_list.has_key(k)
def subset(self, other):
oc = other.__class__
if oc is kjGraph:
oktl = other.key_to_list
sktl = self.key_to_list
otest = oktl.has_key
for k in sktl.keys():
if otest(k):
l = sktl[k]
ol = oktl[k]
for x in l:
if x not in ol:
return 0
else:
return 0
return 1
elif oc is kjSet or oc is kjDict:
sktl = self.key_to_list
odict = other.dict
otest = odict.has_key
for k in sktl.keys():
if otest(k):
l = sktl[k]
ov = odict[k]
for x in l:
if ov!=x: return 0
else:
return 0
return 1
def neighbors(self, k):
try:
return self.key_to_list[k][:]
except:
return []
def reachable(self, k):
try:
horizon = self.key_to_list[k]
except:
return kjSet()
else:
if not horizon: return []
d = {}
for x in horizon: d[x] = 1
done = 0
while horizon:
newhorizon = []
for n in horizon:
for n2 in self.neighbors(n):
if not d.has_key(n2):
newhorizon.append(n2)
d[n2] = 1
horizon = newhorizon
return kjSet(d.keys())
def items(self):
return self._pairs()
# ????
def ident(self):
result = kjDict(self)
result.dirty = self.dirty or result.dirty
return result
def tclosure(self):
# quick and dirty
try:
raise self
except (kjSet, kjDict):
raise ValueError, "tclosure only defined on graphs"
except kjGraph:
pass
except:
raise ValueError, "tclosure only defined on graphs"
result = kjGraph(self)
result.dirty = self.dirty
addit = result.add
while 1:
#print result
more = result*result
if more.subset(result):
return result
for (x,y) in more.items():
addit(x,y)
def Clean(self):
if self.dirty: return None
return self
def Wash(self):
self.dirty = 0
def Soil(self):
self.dirty = 1
def remap(self, X):
# really only should be defined for kjdict, but whatever
return kjDict(X*self).Clean()
def dump(self, seq):
result = map(None, seq)
for i in range(len(result)):
result[i] = self[result[i]]
if len(seq) == 1:
return result[0]
return tuple(result)
def __hash__(self): # should test better
"""in conformance with kjbuckets, permit unhashable keys"""
if self.hashed is not None:
return self.hashed
items = self._pairs()
for i in xrange(len(items)):
(a,b) = items[i]
try:
b = hash(b)
except:
b = 1877777
items[i] = hash(a)^~b
items.sort()
result = self.hashed = hash(tuple(items))
return result
def __cmp__(self, other):
#print "kjGraph.__cmp__"
ls = len(self)
lo = len(other)
test = cmp(ls, lo)
if test:
return test
si = self._pairs()
oi = other._pairs()
si.sort()
oi.sort()
return cmp(si, oi)
def __nonzero__(self):
if self.key_to_list: return 1
return 0
def __add__(self, other):
result = kjGraph(self)
rktl = result.key_to_list
rtest = rktl.has_key
result.dirty = self.dirty or other.dirty
oc = other.__class__
if oc is kjGraph:
oktl = other.key_to_list
for k in oktl.keys():
l = oktl[k]
if rtest(k):
rl = rktl[k]
for x in l:
if x not in rl:
rl.append(x)
else:
rktl[k] = l[:]
elif oc is kjSet or oc is kjDict:
odict = other.dict
for k in odict.keys():
ov = odict[k]
if rtest(k):
rl = rktl[k]
if ov not in rl:
rl.append(ov)
else:
rktl[k] = [ov]
else:
raise ValueError, "kjGraph adds only with kjTable"
return result
__or__ = __add__
def __sub__(self, other):
result = kjGraph()
rktl = result.key_to_list
sktl = self.key_to_list
oc = other.__class__
if oc is kjGraph:
oktl = other.key_to_list
otest = oktl.has_key
for k in sktl.keys():
l = sktl[k][:]
if otest(k):
ol = oktl[k]
for x in ol:
if x in l:
l.remove(x)
if l:
rktl[k] = l
else:
rktl[k] = l
elif oc is kjSet or oc is kjDict:
odict = other.dict
otest = odict.has_key
for k in sktl.keys():
l = sktl[k][:]
if otest(k):
ov = odict[k]
if ov in l:
l.remove(ov)
if l:
rktl[k] = l
else:
raise ValueError, "kjGraph diffs only with kjTable"
return result
def __mul__(self, other):
result = kjGraph()
rktl = result.key_to_list
sktl = self.key_to_list
oc = other.__class__
if oc is kjGraph:
oktl = other.key_to_list
otest = other.has_key
for sk in sktl.keys():
sklist = []
for sv in sktl[sk]:
if otest(sv):
sklist[0:0] = oktl[sv]
if sklist:
rktl[sk] = sklist
elif oc is kjSet or oc is kjDict:
odict = other.dict
otest = odict.has_key
for sk in sktl.keys():
sklist=[]
for sv in sktl[sk]:
if otest(sv):
sklist.append(odict[sv])
if sklist:
rktl[sk] = sklist
else:
raise ValueError, "kjGraph composes only with kjTable"
return result
def __invert__(self):
result = self.__class__()
pairs = self._pairs()
for i in xrange(len(pairs)):
(k,v) = pairs[i]
pairs[i] = (v,k)
result._setitems(pairs)
result.dirty = self.dirty or result.dirty
return result
def __and__(self, other):
sktl = self.key_to_list
oc = other.__class__
if oc is kjGraph:
result = kjGraph()
rktl = result.key_to_list
oktl = other.key_to_list
otest = oktl.has_key
for k in self.keys():
if otest(k):
l = sktl[k]
ol = oktl[k]
rl = []
for x in l:
if x in ol:
rl.append(x)
if rl:
rktl[k] = rl
elif oc is kjSet or oc is kjDict:
result = oc() # less general!
rdict = result.dict
odict = other.dict
stest = sktl.has_key
for k in odict.keys():
if stest(k):
v = odict[k]
l = sktl[k]
if v in l:
rdict[k] = v
else:
raise ValueError, "kjGraph intersects only with kjTable"
result.dirty = self.dirty or other.dirty
return result
def __coerce__(self, other):
return (self, other) # ?is this sufficient?
class kjDict(kjGraph):
def __init__(self, *args):
#print "kjDict.__init__", args
self.hashed = None
dict = self.dict = {}
self.dirty = 0
if not args: return
if len(args)==1:
from types import TupleType, ListType, IntType
arg0 = args[0]
targ0 = type(arg0)
if targ0 is IntType: return
if targ0 is ListType or targ0 is TupleType:
otest = dict.has_key
for (a,b) in arg0:
if otest(a):
if dict[a]!=b:
self.dirty = 1
dict[a] = b
return
argc = arg0.__class__
if argc is kjGraph:
ktl = arg0.key_to_list
for k in ktl.keys():
l = ktl[k]
if len(l)>1: self.dirty=1
for v in l:
dict[k] = v
return
if argc is kjSet or argc is kjDict:
adict = arg0.dict
for (k,v) in adict.items():
dict[k]=v
return
raise ValueError, "kjDict initializes only from list, tuple, kjTable, or int"
def _setitems(self, thing):
#print "kjDict._setitem", thing
if self.hashed is not None:
raise KeyError, "table hashed, cannot modify"
dict = self.dict
try:
for (k,v) in thing:
if dict.has_key(k) and dict[k]!=v:
self.dirty = 1
dict[k] = v
except:
self._setitems(thing._pairs()) # maybe too tricky!
def dump(self, dumper):
ld = len(dumper)
if ld==1:
return self.dict[dumper[0]]
else:
sdict = self.dict
result = [None] * ld
for i in xrange(ld):
result[i] = sdict[ dumper[i] ]
return tuple(result)
def __setitem__(self, item, value):
if self.hashed is not None:
raise ValueError, "table has been hashed, it is immutable"
d = self.dict
if d.has_key(item):
if d[item]!=value:
self.dirty = 1
self.dict[item]=value
def __getitem__(self, item):
return self.dict[item]
def __delitem__(self, item):
if self.hashed is not None:
raise ValueError, "table has been hashed, it is immutable"
self.dirty = 1
del self.dict[item]
def choose_key(self):
return self.dict.keys()[0]
def __len__(self):
return len(self.dict)
def _pairs(self, justtot=0):
if justtot: return len(self.dict)
return self.dict.items()
def values(self):
return self.dict.values()
def keys(self):
return self.dict.keys()
def items(self):
return self.dict.items()
def remap(self, X):
if X.__class__ is kjGraph:
if self.dirty or X.dirty: return None
result = kjDict()
resultd = result.dict
selfd = self.dict
inself = selfd.has_key
inresult = resultd.has_key
ktl = X.key_to_list
for k in ktl.keys():
for v in ktl[k]:
if inself(v):
map = selfd[v]
if inresult(k):
if resultd[k]!=map:
return None
else:
resultd[k]=map
return result
else:
return (kjDict(X*self)).Clean()
def __cmp__(s,o):
from types import InstanceType
if type(o) is not InstanceType:
return -1
oc = o.__class__
if oc is kjDict or oc is kjSet:
return cmp(s.dict, o.dict)
return kjGraph.__cmp__(s, o)
def __hash__(s):
h = s.hashed
if h is not None: return h
return kjGraph.__hash__(s)
def __add__(s,o):
oc = o.__class__
if oc is kjDict or oc is kjSet:
result = kjDict()
result.dirty = s.dirty or o.dirty
rdict = result.dict
rtest = result.has_key
sdict = s.dict
for k in sdict.keys():
rdict[k] = sdict[k]
odict = o.dict
for k in odict.keys():
if rtest(k):
if rdict[k]!=odict[k]:
result.dirty=1
else:
rdict[k] = odict[k]
return result
if oc is kjGraph:
return kjGraph.__add__(o,s)
else:
raise ValueError, "kjDict unions only with kjTable"
__or__ = __add__
def __and__(s,o):
oc = o.__class__
if oc is kjDict or oc is kjSet:
result = oc()
result.dirty = s.dirty or o.dirty
rdict = result.dict
odict = o.dict
sdict = s.dict
stest = sdict.has_key
for k in odict.keys():
v = odict[k]
if stest(k) and sdict[k]==v:
rdict[k] = v
return result
elif oc is kjGraph:
return kjGraph.__and__(o,s)
def __sub__(s,o):
oc = o.__class__
result = kjDict()
result.dirty = s.dirty or o.dirty
sdict = s.dict
rdict = result.dict
if oc is kjDict:
odict = o.dict
otest = odict.has_key
for k in sdict.keys():
v = sdict[k]
if otest(k):
if odict[k]!=v:
rdict[k] = v
else:
rdict[k] = v
return result
if oc is kjGraph:
oktl = o.key_to_list
otest = oktl.has_key
for k in sdict.keys():
v = sdict[k]
if otest(k):
if v not in oktl[k]:
rdict[k] = v
else:
rdict[k] = v
return result
raise ValueError, "kjDict only diffs with kjGraph, kjDict"
def __mul__(s,o):
oc = o.__class__
sdict = s.dict
if oc is kjDict or oc is kjSet:
result = kjDict()
result.dirty = s.dirty or o.dirty
rdict = result.dict
odict = o.dict
otest = odict.has_key
for k in sdict.keys():
kv = sdict[k]
if otest(kv):
rdict[k] = odict[kv]
return result
elif oc is kjGraph:
return kjGraph(s) * o
else:
raise ValueError, "kjDict only composes with kjTable"
def member(self, k, v):
d = self.dict
try:
return d[k] == v
except:
return 0
_member = member
def delete_arc(self, k, v):
if self.dict[k] == v:
del self.dict[k]
else:
raise KeyError, "pair not in table"
def has_key(self, k):
return self.dict.has_key(k)
def neighbors(self, k):
try:
return [ self.dict[k] ]
except: return []
def reachable(self, k):
result = {}
d = self.dict
try:
while 1:
next = d[k]
if result.has_key(next): break
result[next] = 1
k = next
except KeyError:
pass
return kjSet(result.keys())
def __invert__(self):
result = kjDict()
dr = result.dict
drtest = dr.has_key
ds = self.dict
for (a,b) in ds.items():
if drtest(b):
result.dirty=1
dr[b]=a
result.dirty = self.dirty or result.dirty
return result
def __nonzero__(self):
if self.dict: return 1
return 0
def subset(s, o):
oc = o.__class__
sdict = s.dict
if oc is kjDict or oc is kjSet:
odict = o.dict
otest = odict.has_key
for k in sdict.keys():
v = sdict[k]
if otest(k):
if odict[k]!=v:
return 0
else:
return 0
elif oc is kjGraph:
oktl = o.key_to_list
otest = oktl.has_key
for k in sdict.keys():
v = sdict[k]
if otest(k):
if v not in oktl[k]:
return 0
else:
return 0
else:
raise ValueError, "kjDict subset test only for kjTable"
return 1
def add(s, k, v):
if s.hashed is not None:
raise ValueError, "table has been hashed, immutable"
sdict = s.dict
if sdict.has_key(k):
if sdict[k]!=v:
self.dirty = 1
sdict[k] = v
class kjSet(kjDict):
def __init__(self, *args):
#print "kjSet.__init__", args
# usual cases first
dict = self.dict = {}
self.hashed = None
self.dirty = 0
largs = len(args)
if largs<1: return
if largs>1:
raise ValueError, "at most one argument supported"
from types import IntType, TupleType, ListType
arg0 = args[0]
targ0 = type(arg0)
if targ0 is IntType: return
if targ0 is TupleType or targ0 is ListType:
for x in arg0:
dict[x] = x
return
argc = arg0.__class__
if argc is kjDict or argc is kjSet:
stuff = arg0.dict.keys()
elif argc is kjGraph:
stuff = arg0.key_to_list.keys()
else:
raise ValueError, "kjSet from kjTable, int, list, tuple only"
for x in stuff:
dict[x] = x
def __add__(s,o):
oc = o.__class__
if oc is kjSet:
result = kjSet()
result.dirty = s.dirty or o.dirty
rdict = result.dict
for x in s.dict.keys():
rdict[x]=x
for x in o.dict.keys():
rdict[x]=x
return result
elif oc is kjDict:
return kjDict.__add__(o,s)
elif oc is kjGraph:
return kjGraph.__add__(o,s)
__or__ = __add__
def __sub__(s,o):
if o.__class__ is kjSet:
result = kjSet()
result.dirty = s.dirty or o.dirty
rdict = result.dict
otest = o.dict.has_key
for x in s.dict.keys():
if not otest(x):
rdict[x] = x
return result
else:
return kjDict.__sub__(s,o)
def __and__(s,o):
oc = o.__class__
if oc is kjSet or oc is kjDict:
result = kjSet()
result.dirty = s.dirty or o.dirty
rdict = result.dict
odict = o.dict
otest = odict.has_key
for x in s.dict.keys():
if otest(x) and odict[x]==x:
rdict[x] = x
return result
elif oc is kjGraph:
return kjGraph.__and__(o,s)
raise ValueError, "kjSet only intersects with kjTable"
# illegal methods
values = keys = remap = None
def __repr__(self):
return "kjSet(%s)" % self.items()
def _setelts(self, items):
#print "kjSet.setelts", items
try:
items = items._pairs()
except:
items = list(items)
for i in xrange(len(items)):
items[i] = (items[i], items[i])
self._setitems(items)
else:
items = list(items)
for i in xrange(len(items)):
items[i] = (items[i][0], items[i][0])
self._setitems(items)
# hack!
#D = self.dict
#for x in D.keys():
# D[x] = x
def _pairs(self, justtot=0):
if justtot: return kjDict._pairs(self, justtot=1)
pairs = kjDict.keys(self)
for i in xrange(len(pairs)):
pairs[i] = (pairs[i], pairs[i])
return pairs
member = kjDict.has_key
items = kjDict.keys
#def neighbors(self, x):
# raise ValueError, "operation on kjSet undefined"
#reachable = neighbors
def __getitem__(self, item):
test = self.dict.has_key(item)
if test: return 1
raise KeyError, "item not in set"
def __setitem__(self, item, ignore):
d = self.dict
if self.hashed:
raise ValueError, "table hashed, immutable"
d[item] = item
def add(self, elt):
if self.hashed:
raise ValueError, "table hashed, immutable"
self.dict[elt] = elt
def __mul__(s,o):
oc = o.__class__
if oc is kjSet:
return s.__and__(o)
else:
return kjDict.__mul__(s, o)
def more_general(t1, t2):
try:
raise t1
except kjSet:
try:
raise t2
except (kjGraph, kjDict, kjSet):
return t2.__class__
except kjDict:
try:
raise t2
except kjSet:
return t1.__class__
except (kjDict, kjGraph):
return t2.__class__
except kjGraph:
return t1.__class__
except:
raise ValueError, "cannot coerce, not kjtable"
def less_general(t1,t2):
try:
raise t1
except kjSet:
return t1.__class__
except kjDict:
try:
raise t2
except kjSet:
return t2.__class__
except (kjDict, kjGraph):
return t1.__class__
except kjGraph:
return t2.__class__
except:
raise ValueError, "cannot coerce, not kjtable"
def kjUndump(t1, t2):
result = kjDict()
rdict = result.dict
lt1 = len(t1)
if lt1 == 1:
rdict[t1[0]] = t2
else:
# tightly bound to implementation
for i in xrange(lt1):
rdict[t1[i]] = t2[i]
return result
def test():
global S, D, G
G = kjGraph()
r3 = range(3)
r = map(None, r3, r3)
for i in range(3):
G[i] = i+1
D = kjDict(G)
D[9]=0
G[0]=10
S = kjSet(G)
S[-1] = 5
print "%s.remap(%s) = %s" % (D, G, D.remap(G))
print "init test"
for X in (S, D, G, r, tuple(r), 1):
print "ARG", X
for C in (kjGraph, kjSet, kjDict):
print "AS", C
T = C(X)
T2 = C()
print X, T, T2
ALL = (S, D, G)
for X in ALL:
print "X=", X
print "key", X.choose_key()
print "len", len(X)
print "items", X.items()
print X, "Clean before", X.Clean()
del X[2]
print X, "Clean after", X.Clean()
if not X.subset(X):
raise "trivial subset fails", X
if not X==X:
raise "trivial cmp fails", X
if not X:
raise "nonzero fails", X
if X is S:
if not S.member(0):
raise "huh 1?"
if S.member(123):
raise "huh 2?", S
S.add(999)
del S[1]
if not S.has_key(999):
raise "huh 3?", S
else:
print "values", X.values()
print "keys", X.keys()
print X, "inverted", ~X
if not X.member(0,1):
raise "member test fails (0,1)", X
print "adding to", X
X.add(999,888)
print "added", X
X.delete_arc(999,888)
print "deleted", X
if X.member(999,888):
raise "member test fails (999,888)", X
if X.has_key(999):
raise "has_key fails 999", X
if not X.has_key(0):
raise "has_key fails 0", X
for Y in ALL:
print "Y", Y
if (X!=S and Y!=S):
print "diff", X, Y
print "%s-%s=%s" % (X,Y,X-Y)
elif X==S:
D = kjSet(Y)
print "diff", X, D
print "%s-%s=%s" % (X,D,X-D)
print "%s+%s=%s" % (X,Y,X+Y)
print "%s&%s=%s" % (X,Y,X&Y)
print "%s*%s=%s" % (X,Y,X*Y)
x,y = cmp(X,Y), cmp(Y,X)
if x!=-y: raise "bad cmp!", (X, Y)
print "cmp(X,Y), -cmp(Y,X)", x,-y
print "X.subset(Y)", X.subset(Y)
\ No newline at end of file
#!/usr/local/bin/python
"""python lint using kwParsing
The goal of this module/filter is to help find
programming errors in python source files.
As a filter use thusly:
% python kjpylint.py source_file.py
As an internal tool use like this:
import kjpylint
(pyg, context) = kjpylint.setup()
kjpylint.lint(data, pyg, context)
where data is the text of a python program.
You can build your own context structure by
subclassing GlobalContext, and redefining
GlobalContext.complain(string) for example.
You could do a lot more than that too...
Also, to lint all *.py files recursively contained
in a directory hierarchy use
kjpylint.lintdir("/usr/local/lib/python") # for example
FEATURES:
Lint expects
1) a newline or two at the end of the data;
2) consistent indenting (and inconsistency may be invisible)
[eg " \t" and "\t" are not the same indent
to Lint, but Python sees them the same.]
If (1) or (2) are not satisfied Lint will raise
an exception.
Buglets: lambdas and for loops on one line generate
extraneous warnings.
Notes:
======
The lint process works, in outline, like this.
Scan over a python program
x = 1
def f(a):
a = x
d.x, y = b
z = w
and build annotations like
[ set("x", 1),
[
get("x", 4)
set("a", 4)
get("b", 5)
get("d", 5)
set("y", 5)
pop_local()
]
get("w", 7)
set("z", 7) ]
from this stream conclude
warning on line 5: b used before set
warning on line 5: d used before set
warning on line 5: y set, never used
etc. using simple one pass approximate flow
analysis.
"""
pyg = context = None
#import pygram
from pygram import newlineresult
# reduction rules:
# only need to consider
# expressions, assignments, def, class, global, import, from, for
#
# expressions return a list of unqualified names, not known set
# qualified names are automatically put in context as refs
#
# assignments set left names, ref right names
#
# def sets new name for function and args,
# refs other names
#
# class adds new name for class
# refs other names
#
# global forces global interpretation for name
#
# import adds FIRST names
# from sets names
# for sets names
#
# related rules
# ASSIGNMENT REQUIRES SPECIAL TREATMENT
#@R assn1 :: assn >> testlist = testlist
def assn1(list, context):
[t1, e, t2] = list
return assn(t1, t2)
#@R assnn :: assn >> testlist = assn
def assnn(list, context):
[t1, e, a1] = list
return assn(t1, a1)
# @R assn1c :: assn >> testlist , = testlist
def assn1c(list, context):
[t1, c, e, t2] = list
return assn(t1, t2)
# @R assn1c2 :: assn >> testlist , = testlist ,
def assn1c2(list, context):
del list[-1]
return assn1c(list, context)
# @R assnnc :: assn >> testlist , = assn
def assnnc(list, context):
return assn1c(list, context)
def assn(left, right):
result = right
for x in left:
(ln, ri, op, name) = x
if op == "ref":
result.append( (ln, ri, "set", name) )
else:
result.append(x)
return result
#@R except2 :: except_clause >> except test , test
def except2(list, context):
[e, t1, c, t2] = list
result = t1
for (ln, ri, op, name) in t2:
result.append( (ln, ri, "set", name) )
return result
#@R smassn :: small_stmt >> assn
# ignored
#@R rfrom :: import_stmt >> from dotted_name import name_list
#@R rfromc :: import_stmt >> from dotted_name import name_list ,
def rfrom(list, context):
#print rfrom, list
[f, d, i, n] = list
# ignore d
return n
def rfromc(list, context):
return rfrom(list[:-1])
def mark(kind, thing, context):
L = context.LexD
lineno = L.lineno
# are we reducing on a newline?
if L.lastresult==newlineresult:
lineno = lineno-1
return (lineno, -L.realindex, kind, thing)
#@R dn1 :: dotted_name >> NAME
def dn1(list, context):
#print "dn1", list
#L = context.LexD
return [ mark("set", list[0], context) ]
#return [ (L.lineno, -L.realindex, "set", list[0]) ]
# handles import case, make name set local
#@R nlistn :: name_list >> name_list , NAME
def nlistn(list, context):
#print "nlistn", list
[nl, c, n] = list
#L = context.LexD
#nl.append( (L.lineno, -L.realindex, "set", n) )
nl.append( mark("set", n, context) )
return nl
#@R nlist1 :: name_list >> NAME
def nlist1(list, context):
#print "nlist1", list
#L = context.LexD
#return [ (L.lineno, -L.realindex, "set", list[0]) ]
return [ mark("set", list[0], context) ]
# ignore lhs in calls with keywords.
#@R namearg :: argument >> test = test
def namearg(list, context):
[t1, e, t2] = list
return t2
# handles from case, make names set local
#@R global1 :: global_stmt >> global NAME
def global1(list, context):
#print "global1", list
#L = context.LexD
#return [ (L.lineno, -L.realindex, "global", list[1]) ]
return [ mark("global", list[1], context) ]
#@R globaln :: global_stmt >> global_stmt , NAME
# handles global, make names global (not set or reffed)
def globaln(list, context):
#print "globaln", list
[g, c, n] = list
#L = context.LexD
#g.append( (L.lineno, -L.realindex, "global", n) )
g.append( mark("global", n, context) )
return g
#@R for1 :: for_stmt >>
#for exprlist in testlist :
# suite
def for1(list, context):
#print "for1", list
[f, e, i, t, c, s] = list
refs = t + s
return assn(e, refs)
#@R for2 :: for_stmt >>
#for exprlist in testlist :
# suite
#else :
# suite
def for2(list,context):
#print "for2", list
[f, e, i, t, c1, s1, el, c2, s2] = list
refs = t + s1 + s2
return assn(e, refs)
###
#@R class1 :: classdef >> class NAME : suite
def class1(list, context):
[c, n, cl, s] = list
return Class(n, [], s, context)
#@R class2 :: classdef >> class NAME ( testlist ) : suite
def class2(list, context):
[c, n, opn, t, cls, cl, s] = list
return Class(n, t, s, context)
def Class(name, testlist, suite, context):
globals = analyse_scope(name, suite, context, unused_ok=1)
context.defer_globals(globals)
result = testlist
L = context.LexD
# try to correct lineno
lineno = L.lineno
realindex = L.realindex
for (ln, ri, op, n) in testlist+suite:
lineno = min(lineno, ln)
result.append((lineno, -realindex, "set", name))
#result.append( mark("set", name, context) )
# supress complaints about unreffed classes
result.append((lineno+1, -realindex, "qref", name))
#result.append( mark("qref", name, context) )
return result
# vararsglist requires special treatment.
# return (innerscope, outerscope) pair of lists
# @R params1 :: parameters >> ( varargslist )
def params1(l, c):
return l[1]
params1c = params1
#@R params2 :: varargslist >>
def params2(l, c):
return ([], [])
#@R params3 :: varargslist >> arg
def params3(l, c):
return l[0]
#@R params4 :: varargslist >> varargslist , arg
def params4(l, c):
#print "params4", l
[v, c, a] = l
v[0][0:0] = a[0]
v[1][0:0] = a[1]
return v
#@R argd :: arg >> NAME = test
def argd(l, c):
[n, e, t] = l
#L = c.LexD
#return ([(L.lineno, -L.realindex, "set", n)], t)
return ([ mark("set", n, c) ], t)
#@R arg2 :: arg >> fpdef
def arg2(l, c):
return l[0]
#@R arg3 :: arg >> * NAME
def arg3(l, c):
del l[0]
return fpdef1(l, c)
#@R arg4 :: arg >> ** NAME
def arg4(l, c):
#del l[0]
return arg3(l, c)
#@R fpdef1 :: fpdef >> NAME
def fpdef1(l, c):
[n] = l
#LexD = c.LexD
return ([ mark("set", n, c) ], [])
#@R fpdef2 :: fpdef >> ( fplist )
def fpdef2(l, c):
return l[1]
## @R fpdef2c :: fpdef >> ( fplist , )
#fpdef2c = fpdef2
##31
#@R fplist1 :: fplist >> fpdef
def fplist1(l, c):
#print l
return l[0]
#@R fplistn :: fplist >> fplist , fpdef
fplistn = params4
#@R rdef :: funcdef >> def NAME parameters : suite
def rdef(list, context):
#print "rdef", list
[ddef, name, parameters, c, suite] = list
(l, g) = parameters
globals = analyse_scope(name, l + suite, context)
# for embedded function defs global internal refs must be deferred.
context.defer_globals(globals)
result = g
L = context.LexD
# try to steal a lineno from other declarations:
lineno = L.lineno
index = L.realindex
for (ln, ri, op, n) in l+g+suite:
lineno = min(lineno, ln)
if name is not None:
result.append((lineno, -index, "set", name))
# Note: this is to prevent complaints about unreffed functions
result.append((lineno+1, -index, "qref", name))
return result
#@R testlambda1 :: test >> lambda varargslist : test
def testlambda1(list, context):
[l, v, c, t] = list
return rdef(["def", None, v, ":", t], context)
def analyse_scope(sname, var_accesses, context, unused_ok=0):
var_accesses.sort()
result = []
globals = {}
locals = {}
# scan for globals
for x in var_accesses:
(ln, ri, op, name) = x
if op == "global":
globals[name] = ln
#result.append(x) (ignore global sets in local context)
# scan for locals
for (ln, ri, op, name) in var_accesses:
if op == "set" and not locals.has_key(name):
if globals.has_key(name):
context.complain(
"Warning: set of global %s in local context %s" % (`name`, `sname`))
result.append( (ln, ri, op, name) )
pass # ignore global set in local context
else:
locals[name] = [ln, 0] # line assigned, #refs
# scan for use before assign, etc.
for x in var_accesses:
(ln, ri, op, name) = x
if locals.has_key(name):
if op in ["ref", "qref"]:
set = locals[name]
set[1] = set[1] + 1
assnln = set[0]
if (ln <= assnln):
context.complain(
"(%s) local %s ref at %s before assign at %s" % (
sname, `name`, ln, `assnln`))
elif op not in ("global", "set"):
# ignore global sets in local context.
result.append(x)
# scan for no use
if not unused_ok:
for (name, set) in locals.items():
[where, count] = set
if count<1:
context.complain(
"(%s) %s defined before %s not used" % (sname, `name`, where))
return result
### note, need to make special case for qualified names
#@R powera :: power >> atom trailerlist
def powera(list, context):
#print "powera", list
[a, (t, full)] = list
if a and full:
# atom is a qualified name
(ln, ri, op, n) = a[0]
result = [ (ln, ri, "qref", n) ]
else:
result = a
result = result + t
#print "returning", result
return result
#@R trailerlist0 :: trailerlist >>
def trailerlist0(list, context):
return ([], 0) # empty trailerlist
#@R trailerlistn :: trailerlist >> trailer trailerlist
def trailerlistn(list, context):
#print "trailerlistn", list
result = list[0] + list[1][0]
for i in xrange(len(result)):
(a, b, op, d) = result[i]
result[i] = (a, b, "qref", d)
return (result, 1)
# make name+parameters set local reduce suite...
def default_reduction(list, context):
# append all lists
from types import ListType
#print "defred", list
#return
result = []
for x in list:
if type(x)==ListType:
if result == []:
if len(x)>0 and type(x[0])==ListType:
raise "oops", x
result = x
else:
for y in x:
result.append(y)
return result
def aname(list, context):
#print "aname", list, context
L = context.LexD
# note -L.realindex makes rhs of assignment seem before lhs in sort.
return [ (L.lineno, -L.realindex, "ref", list[0]) ]
# the highest level reduction!
# all1 :: all >> file_input DEDENT
def all1(list, context):
stuff = list[0]
context.when_done(stuff)
# first test
def BindRules(pyg):
for name in pyg.RuleNameToIndex.keys():
pyg.Bind(name, default_reduction)
pyg.Bind("all1", all1)
pyg.Bind("testlambda1", testlambda1)
pyg.Bind("except2", except2)
pyg.Bind("namearg", namearg)
pyg.Bind("rfrom", rfrom)
pyg.Bind("rfromc", rfromc)
pyg.Bind("class1", class1)
pyg.Bind("class2", class2)
pyg.Bind("aname", aname)
pyg.Bind("assn1", assn1)
pyg.Bind("assnn", assnn)
pyg.Bind("assn1c", assn1c)
pyg.Bind("assn1c2", assn1c2)
pyg.Bind("assnnc", assnnc)
pyg.Bind("dn1", dn1)
pyg.Bind("nlistn", nlistn)
pyg.Bind("nlist1", nlist1)
pyg.Bind("global1", global1)
pyg.Bind("globaln", globaln)
pyg.Bind("for1", for1)
pyg.Bind("for2", for2)
pyg.Bind("powera", powera)
pyg.Bind("trailerlist0", trailerlist0)
pyg.Bind("trailerlistn", trailerlistn)
pyg.Bind("params1", params1)
pyg.Bind("params1c", params1c)
pyg.Bind("params2", params2)
pyg.Bind("params3", params3)
pyg.Bind("params4", params4)
pyg.Bind("argd", argd)
pyg.Bind("arg2", arg2)
pyg.Bind("arg3", arg3)
pyg.Bind("arg4", arg4)
pyg.Bind("fpdef1", fpdef1)
pyg.Bind("fpdef2", fpdef2)
# pyg.Bind("fpdef2c", fpdef2c)
pyg.Bind("fplist1" , fplist1 )
pyg.Bind("fplistn" , fplistn)
pyg.Bind("rdef" , rdef)
# pyg.Bind( , )
class globalContext:
def __init__(self, lexd):
self.deferred = []
self.LexD = lexd
def complain(self, str):
print str
def defer_globals(self, globals):
self.deferred[0:0] = globals
def when_done(self, list):
stuff = list + self.deferred + self.patch_globals()
globals = analyse_scope("<module global>", stuff, self)
seen = {}
for (ln, ri, op, name) in globals:
if not seen.has_key(name) and op!="set":
seen[name] = name
self.complain(
"%s: (%s) %s not defined in module?" % (ln, op, `name`))
self.deferred = [] # reset state.
def patch_globals(self):
# patch in global names
import __builtin__
names = dir(__builtin__)
list = names[:]
list2 = names[:]
for i in xrange(len(list)):
list[i] = (-2, -900, "set", names[i])
list2[i] = (-1, -900, "qref", names[i])
return list + list2
teststring = """
class x(y,z):
'''
a doc string
blah
'''
def test(this, that):
w = that+this+x, n
x = 1
return w
"""
def go():
import sys
try:
file = sys.argv[1]
except IndexError:
print "required input file missing, defaulting to test string"
data = teststring
else:
data = open(file).read()
print "setup"
(pyg, context) = setup()
print "now parsing"
lint(data, pyg, context)
def setup():
global pyg, context
import pygram
pyg = pygram.unMarshalpygram()
BindRules(pyg)
context = globalContext(pyg.LexD)
return (pyg, context)
def lint(data, pygin=None, contextin=None):
if pygin is None: pygin = pyg
if contextin is None: contextin = context
pygin.DoParse1(data, contextin)
def lintdir(directory_name):
"""lint all files recursively in directory"""
from find import find
print "\n\nrecursively linting %s\n\n" % directory_name
(pyg, context) = setup()
python_files = find("*.py", directory_name)
for x in python_files:
print "\n\n [ %s ]\n\n" % x
lint( open(x).read(), pyg, context )
print "\014"
if __name__=="__main__": go()
<html>
<head>
<title>A parser generator in Python</title>
</head>
<body bgcolor="#ff66dd">
<center>
<h1>A parser generator in Python: kwParsing</h1>
<blockquote>
Aaron Watters<br>
Computer and Information Sciences<br>
New Jersey Institute of Technology<br>
University Heights<br>
Newark, NJ, 07102<br>
(201)596-2666<br>
aaron_watters@msn.com
</blockquote>
<blockquote>
This is the documentation for the <strong>kjParsing</strong> package,
an experimental parser generator implemented in Python which generates
parsers implemented in Python.
It won't serve as a complete reference on programming
language syntax and interpretation, but it will review
terminology for the knowledgable and I hope it will pique
the interest of the less experienced.
</blockquote>
</center>
<h2>Introduction</h2>
<p>
The <code> kjParsing</code> package is a parser generator written
in Python which generates parsers for use in Python.
<p>
These modules and their documentation and demo files
may be of use for classes on parsing, compiling, or
formal languages, and may also be helpful to people
who like to create experimental interpreters or translators
or compilers.
<p>
The package consists of three Python modules:
<code> kjParser, kjParseBuild,</code> and <code> kjSet</code>. Together these
modules are called the <code> kjParsing</code> package.
The package also includes some documentation and demo
files and a <code> COPYRIGHT</code> file which explains the
conditions for copying and propagating this code
and the fact that the author assumes no responsibility
for any difficulties resulting from the use of this
package by anyone (including himself).
<h2>What a Parser Does</h2>
Parsers can be part of a lot of different things:
compilers, interpreters, translators, or code generators,
among others. Nevertheless, at an abstract level parsers
all translate expressions of a language into computational
actions.
<p>
Parsers generated by the <code> kjParseBuild</code> module may do three
different sorts of actions:
<center><table bgcolor="#ffdd66" border>
<tr><th>Value Computation</th></tr><tr><td>
The parser may build a data structure
as the result of the expression. For example the silly <code> LispG</code>
grammar
from the file
<code> ``DLispShort.py''</code> can construct integers, strings and
lists from string representations.
<pre>
>>> from DLispShort import LispG, Context
>>> LispG.DoParse1( ' ("list with string and int" 23) ', Context)
['list with string and int', 23]
>>>
</pre>
</td></tr><tr><th>Environment Modification</th></tr><tr><td>
The parser may modify the context of the computation. For example
the <code> LispG</code> grammar allows the assignment of values to internal
variable names.
<pre>
>>> LispG.DoParse1( '(setq Variable (4 5 9))', Context)
[4, 5, 9]
>>> Context['Variable']
[4, 5, 9]
>>>
</pre>
(Here the second result indicates that the string <code> 'Variable'</code>
has been associated with the value <code> [4,5,9]</code> in
the <code> Context</code> structure, which in this case is a simple
python dictionary.)
</td></tr><tr><th> External Side Effects</th></tr><tr><td>
The parser may also perform external actions. For example the
<code> LispG</code> grammar has the ability to print values to the terminal.
<pre>
>>> LispG.DoParse1( '( (print Variable) (print "bye bye") )', Context )
[4, 5, 9]
bye bye
[[4, 5, 9], 'bye bye']
>>>
</pre>
(Here the first two lines are the results of printing
and the last is the value of the expression.)
</td></tr></table></center>
More realistic parsers will perform more interesting actions,
of course.
<p>
To implement a parser using <code> kjParseBuild</code> you must
define the grammar to parse and associate each rule and terminal
of the grammar with an action which defines the
computational meaning of each language construct.
<p>
The grammar generation process consists of two phases
<center><table bgcolor="#ffdd66" border>
<tr><th>Generation</th></tr><tr><td>
During this phase you must define the syntax of the
language and function bindings that define the semantics
of the language. When you've debugged the syntax and
semantics you can dump the grammar object
representing the syntax only to a grammar file which
can be reloaded without re-analyzing the language syntax.
For large grammars each regeneration may require significant
time and computational resources.
</td></tr><tr><th>Use</th></tr><tr><td>
During this phase you may load the grammar file without
re-analyzing the grammar on each use. However,
the semantics functions must still be rebound on each
load. The reloaded grammar object augmented with interpretation
functions may be used to parse strings of the language.
</td></tr></table></center>
Note that the functions that define the semantics of the
language are must be bound in both phases.
<p>
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from file DLispShort.py (with small differences)
( 1) def GrammarBuild():
( 2) import kjParseBuild
( 3) LispG = kjParseBuild.NullCGrammar()
( 4) LispG.SetCaseSensitivity(0)
( 5) DeclareTerminals(LispG)
( 6) LispG.Keywords("setq print")
( 7) LispG.punct("().")
( 8) LispG.Nonterms("Value ListTail")
( 9) LispG.comments([LISPCOMMENTREGEX])
(10) LispG.Declarerules(GRAMMARSTRING)
(11) LispG.Compile()
print "dumping as binary to TESTLispG.mar"
(12) outfile = open("TESTLispG.mar", "w")
(13) LispG.MarshalDump(outfile)
(14) outfile.close()
(15) BindRules(LispG)
(16) return LispG
</pre>
<caption>A function for building simple grammar (GrammarBuild)</caption>
</td></tr></td></tr></table></center>
<h2>Defining a Grammar</h2>
A programming language grammar is conventionally divided into
several components:
<center><table bgcolor="#ffdd66" border>
<tr><th>Keywords</th></tr><tr><td>
These are special strings that ``highlight'' a language construct.
Familiar keywords from Python and Pascal and C are ``if'', ``else'',
and ``while''.
</td></tr><tr><th>Terminals</th></tr><tr><td>
These are special patterns of characters that indicate a value
in the language. For example many programming languages will
classify the string <code> 123</code> as an instance of the integer
nonterminal and the string <code> snark</code> (not contained in quotes)
as an instance of the nonterminal identifier or
variable. Terminals are usually restricted to very simple
constructs like identifiers, numbers, and strings. More complex
things (such as a ``date'' data type) might be better handled
by nonterminals and rules.
</td></tr><tr><th>Nonterminals</th></tr><tr><td>
These are ``place holders'' for language constructs of the
grammar. They represent parts of the grammar which sometimes
expand to great size and complexity. For instance the
C language grammar presented by Kernigan and Ritchie has
a nonterminal translationUnit which represents a
complete C language module, a nonterminal
conditionalExpression which represents a truth valued
expression of the language.
</td></tr><tr><th>Punctuations</th></tr><tr><td>
These are special characters or strings which are recognized
as separate entities even if they aren't physically separated
from other strings by white space. For example, most languages
would ``see'' the string <code> if0</code> as a single token
(probably an identifier) even if <code> if</code> is a keyword,
whereas <code> if(0)</code> would be recognized
the same as <code> if ( 0 )</code> because parentheses are normally
considered punctuations. Except for the special treatment
at recognition, punctuations are similar to keywords.
</td></tr></table></center>
The syntax of a language describes how to recognize
the components of the language. To define a language syntax using
<code> kjParseBuild</code> you must create a null compilable grammar object
to contain the grammar (in Figure GrammarBuild this
is done on line 3 using the class constructor
<code> kjParseBuild.NullCGrammar()</code>
creating the grammar object <code> LispG</code>) and define the components
of the grammar and the rules for recognizing the components.
The component definitions
and rule declarations, as well as the specification of case sensitivity
and comment patterns, are performed on lines 4 through 10 of
Figure GrammarBuild for the <code> LispG</code> grammar.
<h3>Declaring Case Sensitivity and Comments</h3>
There are two nuances to parsing not yet mentioned:
case sensitivity and comments.
<p>
Some grammars are not
case sensitive in recognizing keywords or identifiers.
For example ANSI standard SQL (which is not
case sensitive for keywords or identifiers) recognizes
<code> Select, select, SELECT,</code> and <code> SeLect</code> all
as the keyword <code> SELECT</code>.
To specify the case sensitivity of the grammar for keywords only use
<pre>
GRAMMAROBJECT.SetCaseSensitivity(TrueOrFalse)
</pre>
where <code> TrueOrFalse</code> is 0 for no case sensitivity or
1 for case sensitivity. This must be done before
any keyword declarations for the grammar. All other
syntax declarations may be done in any order before
the compilation of the grammar object.
In Figure GrammarBuild the <code> LispG</code> grammar object
is declared to be case insensitive on line 4.
<p>
Comments are patterns in the input string which are ignored
(or more precisely interpreted as white space) by the language.
To declare a sequence of regular expressions to be interpreted as a comment
in a grammar use
<pre>
GRAMMAROBJECT.comments(LIST_OF_COMMENT_REGULAR_EXPR_STRINGS)
</pre>
For example, line 9 or Figure GrammarBuild declares
the constant string previously declared as
<pre>
LISPCOMMENTREGEX = ";.*"
</pre>
to represent a comment of the grammar <code> LispG</code>.
For the syntax of regular expression strings you must look
elsewhere, but as a hint <code> ";.*"</code> represents any string
commencing with a semicolon, followed by any sequence of
characters up to, but not including, a newline.
<h3>Declaring Keywords, Punctuations, and Terminals</h3>
To declare keywords for your grammar use
<pre>
GRAMMAROBJECT.Keywords( STRING )
</pre>
where <code> STRING</code> is a white space separated string of keywords.
Line 6 of Figure GrammarBuild declares <code> setq</code> and <code> print</code>
as keywords of <code> LispG</code>.
<p>
To declare nonterminals for your grammar, similarly, use
<pre>
GRAMMAROBJECT.Nonterms( STRING )
</pre>
where <code> STRING</code> is a white space separated string of nonterminal
names. Line 8 of Figure GrammarBuild declares <code> Value</code>
and <code> ListTail</code> as nonterminals of the <code> LispG</code>.
<p>
Similarly, use
<pre>
GRAMMAROBJECT.punct( STRING )
</pre>
to declare a sequence of punctuations for the grammar, except
that in this case the string must not contain any white space.
Line 7 of Figure GrammarBuild declares parentheses and dot
to be punctuations of the <code> LispG</code>.
<p>
If you have a lot of keywords, punctuations,
or nonterminals you can make many separate
calls to the appropriate declaration methods
with different strings.
<p>
These declarations will cause the grammar to recognize
the declared keyword strings (when separated from other
strings by white space or punctuations) and punctuations
as special tokens of the grammar at the lowest level of
parsing. The parsing process derives nonterminals of the
grammar at a higher level as discussed
below.
<p>
A small difficulty with
<code> kjParseBuild</code> is that the strings <code> @R, ::, >>,</code>
and <code> ##</code> cannot be used as names of keywords for the
grammar because they are used to specify rule syntax
in the ``metagrammar''.
If you need these in your grammar they may
be implemented as ``trivial'' terminals. For example,
<center><table bgcolor="#ffdd66" border><tr><td>
<code> Grammar.Addterm("poundpound", "##", echo)</code>
</td></tr></table></center>
I'm unsure whether this patch is good enough.
Does anyone have any advice for me? If this is a bad
problem for some grammar the keywords of the meta grammar
can be changed of course, but this is a hack.
<h3>Declaring Terminals</h3>
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from DLispShort.py
def DeclareTerminals(Grammar):
(1) Grammar.Addterm("int", INTREGEX, intInterp)
(2) Grammar.Addterm("str", STRREGEX, stripQuotes)
(3) Grammar.Addterm("var", VARREGEX, echo)
</pre>
</td></tr>
<caption>Defining the terminals of a grammar.
(TermDef)</caption></table></center>
<p>
Figure TermDef shows the declarations for installing
the <code> int, str,</code> and <code> var</code> terminals in the grammar.
This is given as a separate function because the declarations
define both the syntax and semantics for the terminals,
and therefore must be called both during grammar generation
and after loading the generated grammar object.
To declare a terminal for a grammar use
<pre>
GRAMMAROBJECT.Addterm(NAMESTR, REGEXSTR, FUNCTION)
</pre>
This declaration associates both a regular expression string
<code> REGEXSTR</code>
and an interpretation function <code> FUNCTION</code> to the
terminal of the grammar named by the string <code> NAMESTR</code>.
The <code> FUNCTION</code> defines the semantics of the terminal
as describe below and the <code> REGEXSTR</code> specifies a regular
expression for recognizing the string. For example on
line 2 of Figure TermDef the <code> var</code> terminal
is associated with the regular expression string
<pre>
STRREGEX = '"[^\n"]*"'
</pre>
which matches any string starting with double quotes and ending
with double quotes which contains neither double quotes nor
a newline.
<h3>Declaring Rules of the Grammar</h3>
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from DLispShort.py
GRAMMARSTRING ="""
Value :: ## indicates Value is the root nonterminal for the grammar
@R SetqRule :: Value >> ( setq var Value )
@R ListRule :: Value >> ( ListTail
@R TailFull :: ListTail >> Value ListTail
@R TailEmpty :: ListTail >> )
@R Varrule :: Value >> var
@R Intrule :: Value >> int
@R Strrule :: Value >> str
@R PrintRule :: Value >> ( print Value )
"""
</pre>
</td></tr>
<caption>A grammar definition string.</caption>
</table></center>
<p>
To declare the rules of a grammar use the simple rule
definition language which comes with <code> kjParseBuild</code>, for example
as shown in Figure GramStr. Line 10 of
Figure GrammarBuild uses the string defined in
Figure GramStr to associate the rules with the
grammar using
<pre>
GRAMMAROBJECT.DeclareRules(RULE_DEFINITION_STRING)
</pre>
This declaration does not analyse the string; analysis
and syntax/semantics errors are reported by <code> *.Compile()</code>
described below.
<p>
The rule definition language allows you to identify
the root nonterminal of your grammar and specify a
sequence of named derivation rules for the
grammar. It also allows comments
which start with <code> ##</code> and end with a newline.
An acceptible string for the rule definition language
looks like
<center><table bgcolor="#ffdd66" border><tr><td>
RootNonterminalName <code> ::</code> NamedRule1 NamedRule2 ...
</td></tr></table></center>
Here the Root nonterminal name should be the nonterminal
that ``stands for'' any complete string of the language.
Furthermore, each named rule looks like
<center><table bgcolor="#ffdd66" border><tr><td>
<code> @R</code> NameString <code> ::</code> GoalNonterm <code> >></code> RuleBody
</td></tr></table></center>
where the name string for the rule is a string without
whitespace, the goal nonterminal is the
nonterminal that the rule derives,
and the rule body is a sequence of keywords, punctuations
and nonterminals separated by white space.
Rule names are used for mapping rules to semantic interpretations
and should be unique.
<p>
Note that punctuations for the grammar you are defining
are not punctuations for the rule definition language
(which has none), so they must be separated from
other tokens by white space. The keyword for the rule
definition language <code> @R, ::, >></code> must also be
separated from other tokens by whitespace in the rule
definition string.
Furthermore, all
punctuations, keywords, nonterminals, and terminals
used in the rules must be declared for the grammar before
the grammar is compiled (if one isn't the compilation will
fail with an error).
<p>
As a bit of sugar you may break up the declarations of rules.
<pre>
LispG.DeclareRules("Value::\n")
LispG.DeclareRules(" @R SetqRule :: Value >> ( setq var Value )\n")
LispG.DeclareRules(" @R ListRule :: Value >> ( ListTail\n")
...
</pre>
This might be useful for larger grammars.
<h3>A Brief Discussion of Derivations</h3>
The rules for a grammar don't really describe how to
parse a string of the language, they actually
describe how to derive a string of the
grammar. For this reason it is possible
to create a grammar which derives the same string
in two different ways; such grammars are termed
ambiguous. If you try to generate a parser
for an ambiguous grammar the parse generation process will
cause the parser generation process to complain.
<p>
For a more precise definition of the derivation of a
language string from a grammar see the ``further readings''
below. For illustrative purposes, and to help explain
how to define semantics functions, consider the following
derivation of the string
<center><table bgcolor="#ffdd66" border><tr><td>
<code> ( 123 ( setq x "this" ) )</code>
</td></tr></table></center>
using the rules declared in Figure GramStr.
<center><table bgcolor="#ffdd66" border><tr><td>
Derivation</td><td> Rule used </td></tr><tr><td>
<code> Value1 >> ( ListTail1</code> </td><td> <code> ListRule</code> </td></tr><tr><td>
<code> ListTail1 >> Value2 ListTail2</code> </td><td> <code> TailFull</code> </td></tr><tr><td>
<code> Value2 >> [int = 123]</code> </td><td> <code> Intrule</code> </td></tr><tr><td>
<code> ListTail2 >> Value3 ListTail3</code> </td><td> <code> TailFull</code> </td></tr><tr><td>
<code> Value3 >> (setq [var='x'] Value4)</code> </td><td> <code> SetqRule</code> </td></tr><tr><td>
<code> Value4 >> [string='this']</code> </td><td> <code> StrRule</code> </td></tr><tr><td>
<code> ListTail3 >> )</code> </td><td> <code> TailEmpty</code>
</td></tr></table></center>
To obtain the string derived we simply substitute the
representations derived for each of the numbered nonterminals
and terminals of the derivation as shown
in Figure Derive.
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
(1) Value1
(2) ( ListTail1 (ListRule)
(3) ( Value2 ListTail2 (TailFull)
(4) ( 123 ListTail2 (Intrule)
(5) ( 123 Value3 ListTail3 (TailFull)
(6) ( 123 ( setq x Value4 ) ListTail3 (SetqRule)
(7) ( 123 ( setq x "this" ) ListTail3 (StrRule)
(8) ( 123 ( setq x "this" ) ) (TailEmpty)
</pre>
</td></tr>
<caption>Right-to-left derivation steps for
<code> (123 (setq x "this"))</code>.
(Derive) </caption>
</table></center>
<h3>Compiling the Grammar Syntax, and Storing the Compilation</h3>
Once you have defined all the keywords, comments, terminals,
nonterminals, punctuations, and rules of your grammer you
may create the datastructures needed for parsing by
compiling the grammar using
<pre>
GRAMMAROBJECT.Compile()
</pre>
Line 11 of Figure GrammarBuild performs the compilation
for the LispG grammar.
<p>
If the compilation succeeds you may use
<pre>
GRAMMAROBJECT.MarshalDump( OUTPUTFILE )
</pre>
to store the compiled grammar structure to a file that
may be later loaded without recompiling the grammar.
Here <code> MarshalDump</code> will create a binary ``marshalled''
representation for the grammar in the <code> OUTPUTFILE</code>.
For example line 13 of figure GrammarBuild
marshalls a representation for <code> LispG</code> to the
file <code> TESTLispG.mar</code>.
<code> TESTLisp.GRAMMAR()</code> will
then reconstruct the internal
structure of LispG as a grammar object and return the
grammar object as the result of the function.
<p>
Nevertheless, compilation of the grammar by itself does
not yeild a grammar that will do any useful
parsing [Actually, it will do ``parsing'' using
default actions (implemented as a function which simply return
the list argument).]
Rules must be associated with computational actions before
useful parsing can be done.
<h2>Defining a Semantics</h2>
<p>
Two sorts of objects require semantic actions that
define their meaning: rules and terminals.
All semantic actions must be defined as Python functions
and bound in the grammar before parsing can be performed.
<p>
Before you can define the semantics of your language
in Python you better have a pretty good idea of what
the components of the language are supposed to represent,
of course. Using your intuitive understanding of the
language you can:
<center><table bgcolor="#ffdd66" border><tr><td>
</td></tr><tr><td>
Decide what the context of the computation should be
and how it should be implemented as a Python structure.
If the process of Parsing must modify the context, then
then the context structure must be a ``mutable'' python
structure.
In the case of <code> LispG</code> the context is simply a structure
that maps ``internal'' variable names to values,
implemented as a simple Python dictionary mapping
name strings to the appropriate value.
</td></tr><tr><td>
Decide what kind of Python value each terminal of the grammar
represents. In the case of <code> LispG</code>
<center><table bgcolor="#ffdd66" border>
<tr><th> str</th></tr><tr><td>
should represent a string value corresponding to the string
recognized (minus the surrounding quotes).
</td></tr><tr><th> int</th></tr><tr><td>
should represent an integer value corresponding to the
string recognized.
</td></tr><tr><th> var</th></tr><tr><td>
should represent the string representing the variable name
recognized (the name must be translated to a corresponding
value at a higher level since the terminal interpretation
functions don't have access to the context
structure).
</td></tr></table></center>
</td></tr><tr><td>
Decide what kind of Python structure or value each
nonterminal represents. In the case of the <code> LispG</code>
grammar:
<center><table bgcolor="#ffdd66" border>
</td></tr><tr><th> Value</th></tr><tr><td>
represents a Python integer, string, or list.
</td></tr><tr><th> ListTail</th></tr><tr><td>
represents a Python list containing the
members of the tail of a list.
</td></tr></table></center>
</td></tr><tr><td>
Decide how each rule should derive a structure corresponding
to the Goal (left hand side) of the rule based on the
values corresponding to the terminals and nonterminals
on the right hand side of the rule.
In the case of the <code> LispG</code> grammar
(refer to Figure GramStr for rule definitions):
<center><table bgcolor="#ffdd66" border>
<tr><th> SetqRule</th></tr><tr><td>
should return whatever the <code> Value</code> terminal in the body
represents.
</td></tr><tr><th> ListRule</th></tr><tr><td>
should return the list represented by the
<code> ListTail</code> nonterminal of the body.
</td></tr><tr><th> TailFull</th></tr><tr><td>
should return the result of adding the value corresponding
to the <code> Value</code> nonterminal of the list to the front
of the list corresponding to the <code> Listtail</code> nonterminal
of the body.
</td></tr><tr><th> Varrule</th></tr><tr><td>
should return the value from the computational
context that corresponds to the variable
name represented by the <code> var</code> terminal of the body.
</td></tr><tr><th> Intrule</th></tr><tr><td>
should return the integer corresponding to the <code> int</code>
terminal of the body.
</td></tr><tr><th> Strrule</th></tr><tr><td>
should return the string corresponding to the <code> str</code>
terminal of the body.
</td></tr><tr><th> PrintRule</th></tr><tr><td>
should return the value represented by the <code> Value</code>
nonterminal of the body.
</td></tr></table></center>
</td></tr><tr><td>
Decide what side effects, if any, each rule should have on
the computational context or
externally.
In the case of the <code> LispG</code> grammar:
<center><table bgcolor="#ffdd66" border>
<tr><th> SetqRule</th></tr><tr><td>
should associate the variable name represented by <code> var</code>
to the value represented by <code> Value</code> in the body.
</td></tr><tr><th> PrintRule</th></tr><tr><td>
should print the value corresponding to the <code> Value</code>
nonterminal to the screen.
</td></tr></table></center>
The other rules of <code> LispG</code>
should have no internal or external side effects.
</td></tr></table></center>
More complex languages may require much more complex contexts,
values and side effects, including
function definitions, modules, database
table accesses, user authorization
verifications, and/or file creation, among other possibilities.
<p>
Having determined the intuitive semantics of the language you
may now specify implement the semantic functions and
bind them in your grammar.
<h3>Semantics for Terminals</h3>
<p>
To define the meaning of a terminal you must create a Python
function that translates a string (which the parser has recognized
as an instance of the terminal)
into an appropriate value.
For instance, when the LispG grammar recognizes a string
<pre>
"this is a string"
</pre>
the interpretation function should translate the recognized
string into the Python string it represents: namely,
the same string but with the double quotes stripped off.
The following ``string intepretation function'' will perform
this simple interpretation.
<pre>
# from DLispShort.py
def stripQuotes( str ):
return str[1:len(str)-1]
</pre>
Similarly, when the parser recognizes a string as
an integer, the associated interpretation function
should translate the string into a Python integer.
<p>
The binding of interpretation functions to terminal
names is performed by the <code> Addterm</code> method previously
mentioned. For example, line 2 of Figure TermDef
associates the <code> stripQuotes</code> function to the
nonterminal named <code> str</code>.
<p>
All functions passed to
<code> Addterm</code> should take a single string argument
which represents the recognized string, and return
a value which represents the semantic interpretation
for the input string.
<h3>Semantics for Rules</h3>
<p>
The semantics of rules is more interesting since they may
have side effects and require the kind of recursive thinking
that gives most people headaches. The semantics for rules
are specified by functions. To perform the
semantic action associated with a rule, the ``reduction
function'' should
perform any side effects (to the computational context or
externally) and return a result value that represents
the interpretation for the nonterminal at the head of the rule.
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from DLispShort.py
def EchoValue( list, Context ):
return list[0]
def VarValue( list, Context ):
varName = list[0]
if Context.has_key(varName):
return Context[varName]
else:
raise NameError, "no such lisp variable in context "+varName
def NilTail( list, Context ):
return []
def AddToList( list, Context ):
return [ list[0] ] + list[1]
def MakeList( list, Context ):
return list[1]
def DoSetq( list, Context):
Context[ list[2] ] = list[3]
return list[3]
def DoPrint( list, Context ):
print list[2]
return list[2]
</pre>
</td></tr>
<caption>The reduction functions for the rules. (RedFun)</caption>
</table></center>
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from DLispShort.py
def BindRules(LispG):
LispG.Bind( "Intrule", EchoValue )
LispG.Bind( "Strrule", EchoValue )
LispG.Bind( "Varrule", VarValue )
LispG.Bind( "TailEmpty", NilTail )
LispG.Bind( "TailFull", AddToList )
LispG.Bind( "ListRule", MakeList )
LispG.Bind( "SetqRule", DoSetq )
LispG.Bind( "PrintRule", DoPrint )
</pre>
</td></tr>
<caption>Binding named rules to interpretation functions.
(ruleBind)</caption>
</table></center>
The Python functions that define the semantics of the rules of
<code> LispG</code> appear in Figure RedFun and the declarations
that bind the rule names to the functions in the grammar object
<code> LispG</code> appear in Figure ruleBind.
<p>
Each ``reduction function'' for a rule must take two arguments:
a list representing the body of the rule and a context
structure which represents the computational context of the
computation. The list argument will have the same length
as the body of the rule, counting the keywords and punctuations
as well as the terminals and nonterminals.
<p>
For example the <code> SetqRule</code> has a body with five tokens,
<pre>
@R SetqRule :: Value >> ( setq var Value )
</pre>
so the
<code> DoSetq</code> function should expect the parser to deliver a
Python list argument with five elements of form
<pre>
list = [ '(', 'SETQ', VARIABLE_NAME, VALUE_RESULT, ')' ]
</pre>
note that the ``names'' of keywords and punctuations appear
in the appropriate positions (0, 1, and 4) of the <code> list</code>
corresponding to their positions in <code> SetqRule</code>.
Furthermore, the position occupied by the terminal
<code> var</code> in <code> SetqRule</code> has been replaced by a string
representing a variable name in the <code> list</code> and the
position occupied by the nonterminal <code> Value</code> in
<code> SetqRule</code> has been replaced by a Python value.
<p>
More generally, the parser will call reduction functions for
rules with a <code> list</code> representing the ``interpreted
body of the rule'' where
<center><table bgcolor="#ffdd66" border>
<tr><th>keywords and punctuations</th></tr><tr><td>
are interpreted as themselves (i.e., their names),
except that letters will be in upper case if the grammar
is not case sensitive;
</td></tr><tr><th>terminals</th></tr><tr><td>
are interpreted as values previously returned by a call to
the appropriate terminal interpretation function; and
</td></tr><tr><th>nonterminals</th></tr><tr><td>
are interpreted as values previously returned by a
reduction function for a rule that derived this terminal.
</td></tr></table></center>
Although, the occurrence of the keyword names in the list may seem
useless, it may have its purposes. For example,
a careful programmer might check them during debugging
to make sure the right function was bound to the right rule.
<p>
To determine how to implement the semantics of a rule
you must refer to the semantic decisions you made earlier.
For example, above we specified that the <code> setq</code> construct
should bind the variable name recieved (<code> list[2]</code>)
to the value (<code> list[3]</code>) in the <code> Context</code>,
and return the value (<code> list[3]</code>)
as the result of the expression.
Translated into the more concise language of Python this is
exactly what <code> DoSetq</code> shown in Figure RedFun
does.
<p>
To bind a rule name to a (previously declared)
reduction function use
<pre>
GRAMMAROBJECT.Bind( RULENAME, FUNCTION )
</pre>
where <code> RULENAME</code> is the string name for the rule previously
declared for the grammar <code> GRAMMAROBJECT</code> and
<code> FUNCTION</code> is
the appropriate reduction function for the rule.
These bindings for <code> LispG</code> are shown in Figure ruleBind.
<h3>A Bit on the Parsing Process</h3>
<p>
The following is not a precise definition
of the actions of a Parser, but it may help
you understand how the parsing process
works and the order in which rules are recognized
and functions are evaluated.
<p>
<center><table bgcolor="#ffdd66" border><tr><td>
</td><td>Tokens seen S </td><td> input remaining</td><td>
rule R and function call </tr><tr>
0</td><td><code> </code> </td><td> <code> (123 (setq x "this"))</code> </td><td> </tr><tr>
1</td><td><code> ( 123</code> </td><td> <code> (setq x "this"))</code> </td><td> Intrule </tr><tr>
</td><td></td><td></td><td> <code> Value2 = EchoValue([123],C))</code> </tr><tr>
2</td><td><code> ( Value2 ( setq x "this"</code> </td><td> <code> ))</code> </td><td> StrRule </tr><tr>
</td><td></td><td></td><td> <code> Value4 = EchoValue(['this'],C)</code> </tr><tr>
3</td><td><code> ( Value2 ( setq x Value4 )</code> </td><td> <code> )</code> </td><td> SetqRule </tr><tr>
</td><td></td><td></td><td> <code> Value3 = DoSetq(['(','SETQ','x',Value4,')'],C)</code> </tr><tr>
4</td><td><code> ( Value2 Value3 )</code> </td><td> </td><td> TailEmpty </tr><tr>
</td><td></td><td></td><td> <code> ListTail3 = NilTail([')'],C)</code> </tr><tr>
5</td><td><code> ( Value2 Value3 ListTail3</code> </td><td> </td><td> TailFull </tr><tr>
</td><td></td><td></td><td> <code> ListTail2 = AddToList([Value3,ListTail3],C)</code> </tr><tr>
6</td><td><code> ( Value2 ListTail2</code> </td><td> </td><td> TailFull </tr><tr>
</td><td></td><td></td><td> <code> ListTail3 = AddToList([Value2,ListTail2],C)</code> </tr><tr>
7</td><td><code> ( ListTail3</code> </td><td> </td><td> ListRule </tr><tr>
</td><td></td><td></td><td> <code> Value1 = MakeList(['(',Value1],C)</code> </tr><tr>
8</td><td><code> Value1</code> </td><td> </td><td>
</td></tr>
<caption>Parsing <code> (123 (setq x "this"))</code>
(Parse)</caption>
</table>
<em>Technically, each entry of S is tagged with the kind of token
it represents (keyword, nonterminal, or terminal) and the name
of the token it represents (e.g., <code> Value</code>, <code> str</code>)
as well as the
values shown.</em>
</center>
<p>
Figure Parse illustrates the sequence of reduction actions
performed by <code> LispG</code> when parsing the input string
<code> (123 (setq x "this"))</code>. We can think of this parse as
``reversing'' the derivation process shown in Figure Derive
using the rule reduction functions to obtain semantic
interpretations for the nonterminals.
<p>
At the lowest level of parsing a lexical analyser
examines the unread portion of the input string tries
to match a prefix of the input string with a keyword
or a regular expression for a terminal (ignoring comments
and whitespace, except as separators). The analyser ``passes''
the recognized
token to the higher level parser
together with its interpreted value. The interpreted
value of a terminal is determined by using the appropriate
interpretation function and the interpreted value of
a keyword is simply its name (in upper case, if the
grammer is not case sensitive). For example the <code> LispG</code>
lexical analyser recognizes <code> '('</code> as a keyword with the
value <code> '('</code> and <code> "this"</code> as an instance of the nonterminal
<code> str</code> with the value <code> 'this'</code>.
<p>
The higher level parser accepts tokens T from the lexical analyser
and does one of two things with them
<center><table bgcolor="#ffdd66" border>
<tr><td>
If the most recent
token values V the parser has saved on its ``tokens seen'' stack S
``looks like'' the body B of a
rule R and the current token is a token that
could follow the nonterminal N at the head of R, then
the parser evaluates the reduction function F associated
with R, using the values V from the stack S that match the body
B together with the computational context C. The resulting
value F(V,C) replaces the values V
</td></tr><tr><td>
Otherwise the current token is shifted onto the ``tokens seen''
stack S and the parser moves on to the next token.
</td></tr></table></center>
The above is a lie.
Actually, the parsing process is much smarter than this, but
from a users perspective this simplification may be
helpful.
<p>
Figure Parse shows ``reduction'' steps and not
the ``shifts'', and glosses over the lexical analysis and
other nuances,
but it illustrates the idea of the parsing process nonetheless.
For example at step 2 the parse recognizes the last token
on the stack S
(an instance of the <code> "str"</code> terminal with value <code> "this"</code>)
as matching the body of <code> StrRule</code>, and replaces it
with the an instance of the nonterminal <code> Value</code>
with value determined by the reduction of <code> StrRule</code>.
In this case <code> StrRule</code> is associated with the reduction
function <code> EchoValue</code>, so the result of the reduction
is given by <code> EchoValue( 'this', C )</code> where C is the
context structure for the Parse.
<p>
At Step 3 the most recent entries of S
<pre>
V = ['(', 'SETQ', 'x', Value4, ')']
</pre>
match the body of the rule
<code> SetqRule</code>, so they are replaced on S by an instance
of the <code> Value</code> nonterminal with value determined by
<pre>
Value3 = DoSet( V, C )
</pre>
Finally, at step 8, the interpretation associated
with <code> Value1</code> (an instance of the root nonterminal for
<code> LispG</code>) is considered the result of the computation.
<h2>Parsing with a Grammar</h2>
<p>
Before you can perform a parse you probably must create a
computational context for the parse. In the case of <code> LispG</code>
the context is simply a dictionary so we may initialize
<pre>
Context = {}
</pre>
To create a context for Parsing.
<p>
There are two methods which provide the primary interfaces for
the parsing process for a grammar.
<pre>
RESULT = GRAMMAROBJECT.Parse1(STRING, CONTEXT)
(RESULT, CONTEXT) = GRAMMAROBJECT.Parse(STRING, CONTEXT)
</pre>
The second allows you to make explicit in code that uses parsing
the possibility that a parse may alter the context of the parse
-- aside from that the two functions are identical. Example
usage for <code> Parse1</code> using <code> LispG</code> were given earlier.
<h2>Storing and Reloading a Grammar Object</h2>
<p>
The process of compiling a grammar may take significant time
and consume significant quantities of
memory. To
free up memory from structures in a
compilable grammar object that aren't
needed after compilation use <code> GRAMMAR.CleanUp()</code>.
<p>
Once you have
debugged the syntax and semantics of your grammar you may
store syntactic information for the
grammar using the <code> Reconstruct</code>
method already mentioned. The declarations created by <code>
Reconstruct</code> only defines the syntax for the grammar.
The semantics must be rebound separately. But it is much better to
use UnMarshalGram as shown below, which stores the grammar
in a binary format.
<p>
For example, lines 12 through 14 of
Figure GrammarBuild create a file <code> TESTLispG.py</code>
containing a function <code> GRAMMAR()</code> which will reconstruct
the syntax for the <code> LispG</code> grammar.
<center><table bgcolor="#ffdd66" border><tr><td>
<pre>
# from DLispShort.py
def unMarshalLispG():
import kjParser
infile = open("TESTLispG.mar", "r")
LispG = kjParser.UnMarshalGram(infile)
infile.close()
DeclareTerminals(LispG)
BindRules(LispG)
return LispG
</pre>
This function can then be used in another file,
provided <code>DLispShort.GrammarBuild()</code> has been executed
at some point in the past, thusly:
<pre>
import DLispShort
LGrammar = DLispShort.unMarshalLispG()
</pre>
</td></tr>
<caption>Loading a precomputed grammar object.</caption>
</table></center>
<p>
Figure Load shows a function <code> LoadLispG</code>
that reloads the syntactic
portion of <code> LispG</code> from <code> TESTLispG.mar</code>.
To rebind the semantics as well the
function re-declares the terminals and re-binds the rules
to make the reconstructed <code> LispG</code> suitable for use in parsing.
<h2>Errors raised</h2>
<p>
You may see the following errors:
<center><table bgcolor="#ffdd66" border>
<tr><th> LexTokenError</th></tr><tr><td>
This usually means the lowest level of the parser ran into a string
it couldn't recognize.
</td></tr><tr><th> BadPunctError</th></tr><tr><td>
You tried to make a whitespace character a punctuation.
This is not currently allowed.
</td></tr><tr><th> EOFError, SyntaxError</th></tr><tr><td>
You tried to parse a string that is not valid for the grammar.
</td></tr><tr><th> TokenError</th></tr><tr><td>
During parser generation you used a string in the rule definitions
that wasn't previously registered as a terminal, nonterminal, or
punctuation.
</td></tr><tr><th> NotSLRError</th></tr><tr><td>
You attempted to build a grammar that is not ``SLR'' according
to the definition of Aho and Ullman. Either the grammar is
ambiguous, or it doesn't have a derivation for the root
nonterminal, or it is too tricky for the generator.
</td></tr></table></center>
Furthermore
<code> NondetError, ReductError, FlowError, ParseInitError,
UnkTermError</code>
or errors raised by other modules
shouldn't happen.
If an error that shouldn't happen happens there are
two possibilities (1) you have fiddled with the code or
data structures and you broke something, or (2) there
is a serious bug in the module.
<h2>Possible Gotchas</h2>
<p>
This package has a number of known deficiencies, and there
are probably many that are yet to be discovered.
<p>
Syntax errors are not reported nicely. Sorry.
<p>
Currently, there is no way to to resolve grammar
ambiguities. For example a C construct
<pre>
if (x)
if (y)
x = 0;
else
y = 1;
</pre>
could have the <code> else</code> associated with either the
first or second if; the grammar doesn't indicate which.
This is normally resolved by informing
the parser generator to prefer one binding or the other.
No method for providing a preference is implemented here, yet.
Let me know if you need such a method or if you have any suggestions.
<p>
Keywords of the meta-grammar cannot name tokens of
the object grammar (see footnote above).
<p>
If you want keywords to be recognized without case
sensitivity you must declare <code> G.SetCaseSensitivity(0)</code>
before any keyword declarations.
<p>
Name and regular expression collisions are not always
checked and reported. If you name two rules the same,
for example, you may get undefined behavior.
<p>
The lexical analysis implementation is not as fast as it
could be (of course).
It also sees all white space as a
`single space'
so, for example, if indentation is significant in your grammar
(as in Python) you'll need a different lexical analyzer.
Also if <code> x=+y</code> means something different from
<code> x = + y</code> (as it did in the original C, I believe)
you may have trouble. Happily the lexical component can
be easily ``plug replaced'' by another implementation if needed.
<p>
Also, the system currently only handles SLR grammars (as defined
by Aho and Ullman), as mentioned above. If you get a
<code> NonSLRError</code> during grammar compilation you need a better
parser generator. I may provide one, if I have motivation and time.
<p>
I know of no outright bugs. Trust me, they're there. Please
find them for me and tell me about them. I'm not a big
expert on parsing so I'm sure I've made some errors, particularly
at the lexical level.
<h2>Further Reading</h2>
<p>
A standard reference for parsing and compiler, interpreter,
and translator implementation is Principles of Compiler
Design, by Aho and Ullman (Addison Wesley). The best thing
to say about this book is that its competitors don't seem to
be much better.
</body>
</html>
\ No newline at end of file
# rules for python
# based on grammar given in Programming Python by Mark Lutz
# EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
# GRAMMAR DATA STRUCTURES.
#
ARCHIVE = "."
marshalfilename = ARCHIVE + "/pygram.mar"
pyrules = """
all ::
## input terminates with "fake" dedent (forces read of all file)
@R all1 :: all >> file_input DEDENT
## 1 term newline
##@R lead_blank :: file_input >> NEWLINE file_input
@R top_stmt :: file_input >> file_input stmt
@R file_input :: file_input >> stmt
## 2
@R simple :: stmt >> simple_stmt
@R compound :: stmt >> compound_stmt
## 3 punct ; term NEWLINE
@R one_small :: simple_stmt >> small_stmt NEWLINE
@R more_small :: simple_stmt >> small_stmt ; simple_stmt
@R small_semi :: simple_stmt >> small_stmt ; NEWLINE
## 4 kw pass
@R smexpr :: small_stmt >> expr_stmt
@R smassn :: small_stmt >> assn
@R smprint :: small_stmt >> print_stmt
@R smdel :: small_stmt >> del_stmt
@R smpass :: small_stmt >> pass
@R smflow :: small_stmt >> flow_stmt
@R smimport :: small_stmt >> import_stmt
@R smglobal :: small_stmt >> global_stmt
## access ignored
@R smexec :: small_stmt >> exec_stmt
## 5
@R cmif :: compound_stmt >> if_stmt
@R cmwhile :: compound_stmt >> while_stmt
@R cmfor :: compound_stmt >> for_stmt
@R cmtry :: compound_stmt >> try_stmt
@R cmdef :: compound_stmt >> funcdef
@R cmclass :: compound_stmt >> classdef
##6
@R exprlist :: expr_stmt >> testlist
##@R assignment :: expr_stmt >> assn
@R assn1 :: assn >> testlist = testlist
@R assnn :: assn >> testlist = assn
@R assn1c :: assn >> testlist , = testlist
@R assn1c2 :: assn >> testlist , = testlist ,
@R assnnc :: assn >> testlist , = assn
##testing @R exprassn :: expr_stmt >> expr_stmt = testlist
@R exprlistc :: expr_stmt >> testlist ,
##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,
##7 kw print
@R rprint0 :: print_stmt >> print
@R rprint :: print_stmt >> print testlist
@R rprintc :: print_stmt >> print testlist ,
##8 kw del
@R rdel :: del_stmt >> del exprlist
##9 trivially handled in #4
##10 kw raise continue break return
## eliminates 11 12 13 14
@R rbreak :: flow_stmt >> break
@R rcontinue :: flow_stmt >> continue
@R rreturn0 :: flow_stmt >> return
@R rreturn :: flow_stmt >> return testlist
@R rreturnc :: flow_stmt >> return testlist ,
@R rraise1 :: flow_stmt >> raise test
@R rraise2 :: flow_stmt >> raise test , test
@R rraise3 :: flow_stmt >> raise test , test , test
## 11 12 13 14 skipped
## 15 kw import from
@R rimport :: import_stmt >> import dotted_name_list
@R rimportc :: import_stmt >> import dotted_name_list ,
@R dnlist1 :: dotted_name_list >> dotted_name
@R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
@R rfrom :: import_stmt >> from dotted_name import name_list
@R rfroms :: import_stmt >> from dotted_name import *
@R rfromc :: import_stmt >> from dotted_name import name_list ,
@R nlistn :: name_list >> name_list , NAME
@R nlist1 :: name_list >> NAME
##16 nt NAME
@R dn1 :: dotted_name >> NAME
@R dnn :: dotted_name >> dotted_name . NAME
##17 kw global
@R global1 :: global_stmt >> global NAME
@R globaln :: global_stmt >> global_stmt , NAME
## 18 19 ignored
##20 kw exec in
@R exec1 :: exec_stmt >> exec expr
@R exec2 :: exec_stmt >> exec expr in test
@R exec3 :: exec_stmt >> exec expr in test , test
##21 kw if elif else punct :
@R ifr :: if_stmt >> if test : suite elifs
@R elifs0 :: elifs >>
@R relse :: elifs >> else : suite
@R elifsn :: elifs >> elif test : suite elifs
##22 kw while
@R while1 :: while_stmt >>
while test :
suite
@R while2 :: while_stmt >>
while test :
suite
else :
suite
##23 kw for
@R for1 :: for_stmt >>
for exprlist in testlist :
suite
@R for2 :: for_stmt >>
for exprlist in testlist :
suite
else :
suite
##24 kw try
@R tryr :: try_stmt >> try : suite excepts
@R excepts1 :: excepts >> except_clause : suite
@R excepts2 :: excepts >> except_clause : suite else : suite
@R exceptsn :: excepts >> except_clause : suite excepts
@R tryf :: try_stmt >> try : suite finally : suite
##25 kw except
@R except0 :: except_clause >> except
@R except1 :: except_clause >> except test
@R except2 :: except_clause >> except test , test
##26
@R class1 :: classdef >> class NAME : suite
@R class2 :: classdef >> class NAME ( testlist ) : suite
##27 kw def
@R rdef :: funcdef >> def NAME parameters : suite
##28, 29 punct = *
## (modified from grammar presented)
@R params1 :: parameters >> ( varargslist )
@R params1c :: parameters >> ( varargslist , )
@R params2 :: varargslist >>
## this is way too permissive: fix at semantic level
@R params3 :: varargslist >> arg
@R params4 :: varargslist >> varargslist , arg
@R argd :: arg >> NAME = test
@R arg2 :: arg >> fpdef
@R arg3 :: arg >> * NAME
@R arg4 :: arg >> ** NAME
## 30
@R fpdef1 :: fpdef >> NAME
@R fpdef2 :: fpdef >> ( fplist )
@R fpdef2c :: fpdef >> ( fplist , )
##31
@R fplist1 :: fplist >> fpdef
@R fplistn :: fplist >> fplist , fpdef
##32 t INDENT DEDENT
@R ssuite :: suite >> simple_stmt
@R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
@R stmtseq1 :: stmtseq >> stmt
@R stmtseqn :: stmtseq >> stmtseq stmt
##33 kw or cancels 53
@R testor :: test >> or_test
@R testand :: or_test >> and_test
@R testor1 :: or_test >> or_test or and_test
## @R testlambda0 :: test >> lambda : test REDUNDANT
@R testlambda1 :: test >> lambda varargslist : test
##34 kw and
@R andnot :: and_test >> not_test
@R andand :: and_test >> and_test and not_test
##35 kw not
@R notnot :: not_test >> not not_test
@R notcmp :: not_test >> comparison
##36 NOTE KWS == >= <= <> !=
@R cmpexpr :: comparison >> expr
@R cmplt :: comparison >> comparison < expr
@R cmpgt :: comparison >> comparison > expr
@R cmpeq :: comparison >> comparison == expr
@R cmpge :: comparison >> comparison >= expr
@R cmple :: comparison >> comparison <= expr
@R cmpnep :: comparison >> comparison <> expr
@R cmpne :: comparison >> comparison != expr
@R cmpin :: comparison >> comparison in expr
@R cmpnotin :: comparison >> comparison not in expr
@R cmpis :: comparison >> comparison is expr
@R cmpisnot :: comparison >> comparison is not expr
##37 kw is not punct > < ! (eliminated)
##38 p |
@R expr_xor :: expr >> xor_expr
@R expr_lor :: expr >> expr | xor_expr
##39 p ^
@R xor_and :: xor_expr >> and_expr
@R xor_xor :: xor_expr >> xor_expr ^ and_expr
##40
@R and_shift :: and_expr >> shift_expr
@R and_and :: and_expr >> and_expr & shift_expr
##41 note kw's << >x> note goofy x to avoid confusing the grammar
@R shift_arith :: shift_expr >> arith_expr
@R shift_left :: shift_expr >> shift_expr << arith_expr
@R shift_right :: shift_expr >> shift_expr >x> arith_expr
##42
@R arith_term :: arith_expr >> term
@R arith_plus :: arith_expr >> arith_expr + term
@R arith_minus :: arith_expr >> arith_expr - term
##43 p */%
@R termfactor :: term >> factor
@R termmul :: term >> term * factor
@R termdiv :: term >> term / factor
@R termmod :: term >> term % factor
## stuff for power
@R factorpower :: factor >> power
@R factorexp :: factor >> factor ** power
##44 p ~
@R powera :: power >> atom trailerlist
@R trailerlist0 :: trailerlist >>
@R trailerlistn :: trailerlist >> trailer trailerlist
@R powerp :: power >> + power
@R powerm :: power >> - power
@R poweri :: power >> ~ power
##45 t NUMBER STRING
@R nulltup :: atom >> ( )
@R parens :: atom >> ( testlist )
@R parensc :: atom >> ( testlist , )
@R nulllist :: atom >> [ ]
@R list :: atom >> [ testlist ]
@R listc :: atom >> [ testlist , ]
@R nulldict :: atom >> { }
@R dict :: atom >> { dictmaker }
@R dictc :: atom >> { dictmaker , }
@R repr :: atom >> ` testlist `
## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
@R aname :: atom >> NAME
## note number to be broken out into FLOAT OCTINT HEXINT INT
@R anumber :: atom >> NUMBER
@R astring :: atom >> stringseq
@R stringseq0 :: stringseq >> STRING
@R stringseqn :: stringseq >> stringseq STRING
##46
@R nullcall :: trailer >> ( )
@R call :: trailer >> ( arglist )
@R callc :: trailer >> ( arglist , )
@R index :: trailer >> [ subscriptdots ]
@R getattr :: trailer >> . NAME
##47
@R arg1 :: arglist >> argument
@R argn :: arglist >> arglist , argument
##@R argn1 :: arglist >> arglist , NAME = test
##48 ( !!!! is this wrong in PP?)
@R posarg :: argument >> test
## here the left test should be a NAME always, but parser doesn't like it
@R namearg :: argument >> test = test
##49 this IS wrong in PP (numeric ext)
@R nodots :: subscriptdots >> subscriptseq
@R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
@R subscript1 :: subscriptseq >> subscript
@R subscriptn :: subscriptseq >> subscriptseq , subscript
@R subscriptt :: subscript >> test
@R subscripts0 :: subscript >> :
@R subscriptsL :: subscript >> test :
@R subscriptsR :: subscript >> : test
@R subscripts :: subscript >> test : test
##50
@R exprlist1 :: exprlist >> expr
@R exprlistn :: exprlist >> exprlist , expr
##51
@R testlist0 :: testlist >> test
@R testlistn :: testlist >> testlist , test
##52
@R dictmaker1 :: dictmaker >> test : test
@R dictmaker2 :: dictmaker >> dictmaker , test : test
"""
nonterms = """
subscriptdots subscript arg
argument arglist subscriptseq params trailerlist
factor atom trailer dictmaker stringseq power
xor_expr and_expr shift_expr arith_expr term
and_test or_test not_test comparison comp_op expr
fplist stmtseq varargslist assn
expr elifs suite excepts parameters pbasic pdefault pspecial
testlist exprlist test dotted_name_list dotted_name name_list
if_stmt while_stmt for_stmt try_stmt funcdef classdef
expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
small_stmt compound_stmt stmt simple_stmt exec_stmt
file_input except_clause fpdef cmp_op
all
"""
import string
# python needs special handling for the lexical stuff
NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
NUMBERre = "[" + string.digits + "]+" # temporary!
STRINGre = '"[^"\n]*"' # to be overridden in lexdict
#NEWLINEre = "\n" # to be overridden in lexdict
INDENTre = "#" # a fake! to be overridden
DEDENTre = "#" # a fake! to be overridden
def echo(str):
return str
def DeclareTerminals(Grammar):
Grammar.Addterm("NAME", NAMEre, echo)
Grammar.Addterm("NUMBER", NUMBERre, echo)
Grammar.Addterm("STRING", STRINGre, echo)
#Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
Grammar.Addterm("INDENT", INDENTre, echo)
Grammar.Addterm("DEDENT", DEDENTre, echo)
# not >x> is a fake!
keywords = """
and break class continue def del elif else except exec
finally for from global if import in is lambda not or pass
print raise return try while == >= <= <> != >x> << NEWLINE
**
"""
import kjParser, string, regex
from kjParser import KEYFLAG, ENDOFFILETERM
alphanumunder = string.letters+string.digits+"_"
alpha = string.letters + "_"
# components that are part of a identifier (cannot be next to kw).
id_letters = map(None, alphanumunder)
# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = regex.compile(nametermre)
# terminator re for numbers (same as above but allow "." in num).
numtermre = "[^" + alphanumunder + "\.]"
numterm = regex.compile(numtermre)
parseerror = "parseerror"
pycommentre = "\(#.*\)"
# whitespace regex outside of brackets
# white followed by (comment\n maybe repeated)
# DON'T EAT NEWLINE!!
pywhiteoutre = "\([ \t\r\014]\|\\\\\n\)*%s?" % pycommentre
pywhiteout = regex.compile(pywhiteoutre)
# whitespace regex inside brackets
# white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*\(\\\\\n\)*%s?" % pycommentre
pywhitein = regex.compile(pywhiteinre)
# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = regex.compile(allblankre)
# re for indentation (might accept empty string)
indentp = regex.compile("[\t ]*")
# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]
# >two char kws as map of first 3 chars to others
char3k_data = """
and break class continue def del elif else except
finally for from global import lambda not pass print
raise return try while exec
"""
char3kw = string.split(char3k_data)
char3kwdict = {}
for x in char3kw:
char3kwdict[x[:3]] = x
# NOTE: newline is treated same as a punctuation
# NOTE: "' ARE NOT PUNCTS
punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
punctlist = map(None, punct)
kwmap = {}
for x in char2kw + punct2 + char3kw + map(None, punct):
# everything parses as length 1 to the outer world.
kwmap[x] = (((KEYFLAG, x), x), 1)
# special hack
kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)
#finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)
# Python lexical dictionary.
### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!
class pylexdict(kjParser.LexDictionary):
def __init__(self):
kjParser.LexDictionary.__init__(self)
# need to add special map for >>
self.brackets = 0 # count of active brackets
self.realindex = 0 # where to start
self.indents = [""] # stack of indents (start with a fake one)
self.lineno = 0
self.atdedent = 0
### handle multiple dedents correctly!!!
### translate tabs to 8 spaces...
from kjParser import TERMFLAG
self.NAMEflag = (TERMFLAG, "NAME")
self.STRINGflag = (TERMFLAG, "STRING")
self.NEWLINEflag = (TERMFLAG, "NEWLINE")
self.INDENTflag = (TERMFLAG, "INDENT")
self.DEDENTflag = (TERMFLAG, "DEDENT")
self.NUMBERflag = (TERMFLAG, "NUMBER")
def endoffile(self, String):
# pop off all indentations!
indents = self.indents
#lastresult = self.lastresult
self.realindex = len(String)
if not indents:
# pop indents
#print "eof after dedent"
result = self.lastresult = (ENDOFFILETERM, 0)
else:
#print "eof as dedent after", self.lastresult
del indents[-1]
if indents:
dedent = indents[-1]
else:
dedent = ""
result = self.lastresult = ((self.DEDENTflag, dedent), 1)
#print "returning eof", result, "after", lastresult
return result
def Token(self, String, StartPosition):
#print "Token", (StartPosition,
# `String[self.realindex:self.realindex+20]`, self.lastresult)
# HAVE TO FAKE OUT LEXER FOR DEDENTS
# STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
# STRING POSITION IS MAINTAINED IN LexDict object.
lastindex = self.lastindex
lastresult = self.lastresult
if self.laststring is not String:
#print "parsing new string"
self.laststring = String
# special hack: skip lead whitespace
cursor = 0
self.lineno = 1
while 1:
test = pywhitein.match(String, cursor)
if test<0: break
next = cursor + test
#print "lead skip:", next, String[cursor:next]
if String[next]!="\n": break
#skipped = String[cursor:next]
#if "\n" in skipped:
# self.lineno = (
# self.lineno + len(string.splitfields(skipped, "\n")))
#self.lineno = self.lineno+1
cursor = next + 1
self.realindex = cursor
self.saveindex = 0
self.indents = [""] # stack of indents (start with a fake one)
# pretend we saw a newline
self.lastresult = newlineresult
if StartPosition!=0:
self.laststring = None
raise ValueError, "python lexical parsing must start at zero"
lastindex = self.lastindex
lastresult = None
elif lastindex == StartPosition:
#print "returning lastresult ", lastresult
return lastresult
elif lastindex != StartPosition-1:
raise ValueError, "python lexer can't skip tokens"
#print "parsing", StartPosition, lastresult
# do newline counting here!
delta = String[self.saveindex: self.realindex]
#print "delta", `delta`
if "\n" in delta:
#print self.lineno, self.saveindex, self.realindex, `delta`
self.lineno = self.lineno + len(
string.splitfields(delta, "\n")) - 1
realindex = self.saveindex = self.realindex
self.lastindex = StartPosition
# skip whitespace (including comments)
### needs to be improved to parse blank lines, count line numbers...
# skip all totally blank lines (don't eat last newline)
atlineend = (String[realindex:realindex+1] == "\n"
or lastresult is newlineresult
or self.atdedent)
skipnewlines = (lastresult is newlineresult or
self.atdedent or
self.brackets>0)
if atlineend: #String[realindex:realindex+1]=="\n":
#print "trying to skip blank lines", String[realindex:realindex+10]
while 1:
#if String[realindex:realindex+1]=="\n":
# start = realindex+1 # move past current newline
# self.lineno = self.lineno + 1
#else:
# start = realindex
start = realindex
if skipnewlines:
while String[start:start+1]=="\n":
start = start+1
#self.lineno = self.lineno+1
#print "matching", `String[start:start+10]`
skip = pywhitein.match(String, start)
#print "skip=", skip
if skip<0: break
rs = skip + realindex + (start-realindex)
if rs==realindex: break
#print "at", rs, `String[rs]`
if (rs<len(String) and
(String[rs] == "\n" or
(skipnewlines and String[rs-1:rs]=="\n"))):
#print "skipping blank line"
#if lastresult is newlineresult or self.brackets>0:
# rs = rs + 1
#skipped = String[start:rs]
#if "\n" in skipped:
#self.lineno = self.lineno + len(
# string.splitfields(skipped, "\n"))
self.realindex = realindex = rs
#self.lineno = self.lineno+1
else:
if skipnewlines: self.realindex = realindex = start
break
#print "after skipping blank lines", `String[realindex:realindex+20]`
skipto = realindex
skip = 0
if self.brackets>0:
while 1:
#print "skipping white in brackets", skipto
if realindex>len(String):
break
if String[skipto]=="\n":
#self.lineno = self.lineno+1
skipto = skipto + 1
self.realindex = realindex = skipto
continue
skip = pywhiteout.match(String, skipto)
nextskipto = skipto+skip
#skipped = String[skipto:nextskipto]
#if "\n" in skipped:
# self.lineno = self.lineno+len(
# string.splitfields(skipped, "\n"))
if skip>0:
skipto = nextskipto
else: break
skip = skipto - realindex
elif not atlineend:
skip = pywhitein.match(String, realindex)
if skip<=0:
skip = 0
else:
#print "skipping", skip
nextri = realindex + skip
#skipped = String[realindex:nextri]
#if "\n" in skipped:
# self.lineno = self.lineno + len(
# string.splitfields(skipped, "\n"))
realindex = self.realindex = nextri
if realindex>=len(String):
return self.endoffile(String)
# now look for a keyword, name, number, punctuation,
# INDENT, DEDENT, NEWLINE
first = String[realindex]
#if last parse was newline and not in brackets:
# look for indent/dedent
if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
and first != "\n"):
#print "looking for dent", realindex, `String[realindex:realindex+20]`
match = indentp.match(String, realindex)
if match>=0:
dent = String[realindex: realindex+match]
#print "dent match", match, `dent`
oldindex = realindex
self.realindex = realindex = realindex+match
# replace tabs with 8 spaces
dent = string.joinfields(string.splitfields(dent, "\t"),
" ")
dents = self.indents
lastdent = dents[-1]
ldl = len(lastdent)
dl = len(dent)
#print "last", ldl, dents
if ldl<dl:
self.atdedent = 0
result = self.lastresult = ((self.INDENTflag, dent), 1)
dents.append(dent)
#print "indent ", result, dents
return result
if ldl>dl:
self.realindex = oldindex # back up, may have to see it again!
self.atdedent = 1
result = self.lastresult = ((self.DEDENTflag, dent), 1)
del dents[-1]
#print "dedent ", result, dl, dents
return result
# otherwise, indentation is same, keep looking
# might be at eof now:
if realindex>=len(String):
#print "returning eof"
return self.endoffile(String)
first = String[realindex]
self.atdedent = 0
from string import digits #, letters
if (first in punctlist and
# special case for .123 numbers (yuck!)
(first!="." or String[realindex+1] not in digits)):
# is it a 2 char punct?
first2 = String[realindex:realindex+2]
if first2 in punct2:
result = self.lastresult = kwmap[first2]
self.realindex = realindex+2
#print "2 digit punct", result
return result
# otherwise, just return normal punct
result = self.lastresult = kwmap[first]
self.realindex = self.realindex + 1
### special bookkeeping
if first=="\n":
result = newlineresult
#print "newline!"
#self.lineno = self.lineno+1
elif first in "[{(":
#print "bracket!"
self.brackets = self.brackets + 1
elif first in "]})":
#print "close bracket!"
self.brackets = self.brackets - 1
#print "1 digit punct", result
return result
if first in digits or first==".":
# parse a number...
skip = numterm.search(String, realindex)
if skip<=realindex:
raise parseerror, "number length<1 (!)"
thenumber = String[realindex:skip]
self.realindex = skip
### note don't interpret number here!!
result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
#print "number", result
return result
if first in alpha:
# try keyword...
first2 = String[realindex: realindex+2]
if first2 in char2kw:
if String[realindex+2:realindex+3] not in id_letters:
# parse a 2 char kw first2
result = self.lastresult = kwmap[first2]
self.realindex = self.realindex+2
#print "keyword 2", result
return result
first3 = String[realindex: realindex+3]
if char3kwdict.has_key(first3):
the_kw = char3kwdict[first3]
the_end = realindex+len(the_kw)
if ((the_end<len(String)) and
(String[the_end] not in id_letters) and
(String[realindex:the_end]==the_kw)):
# parse the_kw
self.realindex = the_end
result = self.lastresult = kwmap[the_kw]
#print "keyword +", result
return result
#otherwise parse an identifier
#print "looking for name:", `String[realindex:realindex+10]`
skip = nameterm.search(String, realindex)
if skip<=realindex:
raise parseerror, "identifier length<1 (!)"
theid = String[realindex:skip]
self.realindex = skip
### note don't interpret number here!!
result = self.lastresult = ((self.NAMEflag, theid), 1)
#print "id", result
return result
if first in "\"'":
# check for triplequotes
first3 = first*3
if String[realindex: realindex+3] == first3:
# parse triple quotes
start = place = realindex+3
while 1:
last = string.find(String, first3, place)
if last<0:
raise parseerror, "failed to terminate triple quotes"
if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
place = last+1
else: break
the_string = String[start: last]
self.realindex = last+3
result = self.lastresult = ((self.STRINGflag, the_string), 1)
#print "3q string", result
# count the newlines!
#newlinecount = len(string.splitfields(the_string, "\n"))
#self.lineno = self.lineno+newlinecount
#print "triple quotes", result
return result
else:
# parse single quotes
sanity = start = place = realindex+1
done = 0
while 1:
sanity = min(string.find(String, "\n", sanity), len(String))
if sanity<start:
sanity=len(String)
break
if String[sanity-1]!="\\":
break
else:
#self.lineno = self.lineno+1
sanity = sanity + 1
while 1:
last = string.find(String, first, place)
if last<0 or last>sanity:
raise parseerror, "failed to terminate single quotes"
if String[last-1:last]=="\\":
# are we at the end of an odd number of backslashes? (yuck!)
bplace = last-1
while String[bplace:bplace+1]=="\\":
bplace = bplace-1
if (last-bplace)%2==1:
break # the end quote is real!
place = last+1
else: break
the_string = String[start:last]
self.realindex = last+1
result = self.lastresult = ((self.STRINGflag, the_string), 1)
#print "1q string", result
return result
#print (String[realindex-20:realindex-1], String[realindex],
# String[realindex+1:realindex+20])
raise parseerror, "invalid first: " + `first`
# use a modified lexstringwalker
class pylexstringwalker(kjParser.LexStringWalker):
def DUMP(self):
kjParser.DumpStringWindow(self.String, self.LexDict.realindex)
## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
## to give Python line numbers. RELIES ON GLOBAL pyg
##
def hackDoParse(String, Context=None, DoReductions=1):
import sys, kjParser
try:
# construct the ParserObj
# add a newline to front to avoid problem with leading comment
#String = "\n%s\n" % String
Stream = pylexstringwalker( String, pyg.LexD )
Stack = [] # {-1:0} #Walkers.SimpleStack()
ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
DoReductions, Context )
# do the parse
ParseResult = ParseOb.GO()
# return final result of reduction and the context
return (ParseResult[1], Context)
#return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
except: ### for testing!!
t, v = sys.exc_type, sys.exc_value
v = ("near line", pyg.LexD.lineno, v)
raise t, v
buildinfo = """
Please edit the ARCHIVE parameter of this module (%s)
to place the python grammar archive in a standard
directory to prevent the module from rebuilding
the python grammar over and over and over...
""" % __name__
def GrammarBuild():
global pyg
import kjParseBuild
pyg = kjParseBuild.NullCGrammar()
pyg.DoParse = hackDoParse
# override lexical dict here
pyg.LexD = pylexdict()
DeclareTerminals(pyg)
pyg.Keywords(keywords)
pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
pyg.Nonterms(nonterms)
pyg.Declarerules(pyrules)
print buildinfo
print "compiling... this may take a while..."
pyg.Compile()
print "dumping"
outfile = open(marshalfilename, "wb")
pyg.MarshalDump(outfile)
outfile.close()
print "self testing the grammar"
test(pyg)
print "\n\ndone with regeneration"
return pyg
def unMarshalpygram():
global pyg
import kjParser
print "loading"
try:
infile = open(marshalfilename, "rb")
except IOError:
print marshalfilename, "not found, attempting creation"
pyg = GrammarBuild()
else:
pyg = kjParser.UnMarshalGram(infile)
infile.close()
pyg.DoParse = hackDoParse
# lexical override
pyg.LexD = pylexdict()
DeclareTerminals(pyg)
# BindRules(pyg)
if dotest:
print "self testing the grammar"
test(pyg)
return pyg
# not used, commented
#### interpretation rules/classes
#
#def zeroth(list, Context):
# return list[0] # eg, for all1, ignore all but first
#
## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
#def append(list, Context):
# "eg, for top_stmt, conjoin two smt lists"
# return list[0] + list[1]
#
## file_input -->zeroth
#
## simple, compound, one_small, small_semi: echol
#def echol(list, Context):
# return list
#
## more_small --> seq_sep
#def seq_sep(list, Context):
# list[0].append(list[2])
# return list[0]
#
## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
## --> zeroth
#
## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass --> zeroth
#
#
#def BindRules(pyg):
# for name in string.split("""
# all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
# smexpr smassn smprint smdel smflow smimport smglobal smexec
# """):
# pyg.Bind(name, zeroth)
# for name in string.split("""
# simple compound one_small small_semi
# """):
# pyg.Bind(name, echol)
# pyg.Bind("top_stmt", append)
# pyg.Bind("more_small", seq_sep)
teststring = """#
#
# a test string
#
from string import join, split
'''
import regex
for a in l:
a.attr, a[x], b = c
else:
d = b
'''
class zzz:
'''
#doc string
'''
'''
global regex, join
d = {}
for i in range(10): d[i] = i
'''
def test(c,s):
return "this"
while not done:
print done
break
list = [1,2,3]
# comment
return 5
n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8
if x==5:
while y:
for i in range(6):
raise SystemError, "oops"
"""
#teststring ="""\
## comment
#if x in y: print z
#elif 1: print w
#"""
teststring="""
exec "print 1"
"""
def test(grammar, context=None, teststring=teststring):
from time import time
now = time()
x = grammar.DoParse1(teststring, context)
elapsed = time()-now
print x
print elapsed
return x
regen = 0
dotest = 0
if __name__ == "__main__" :
if regen: GrammarBuild()
unMarshalpygram()
"""Simple relational algebra interpreter.
usage:
To make the grammar
python relalg.py make
To run some relatoinal algebra expressions
python relalg.py < expressions_file
"""
# EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD
INSTALLDIR = "."
## simple relational algebra using only the equality predicate
## note: string values cannot contain ;
## statement sequencing using ; handled at higher level
relalg_rules = """
statement ::
@R statementassn :: statement >> assignment
@R statementexpr :: statement >> rexpr
@R assignment1 :: assignment >> name = rexpr
@R assignmentn :: assignment >> name = assignment
@R union :: rexpr >> rexpr U rterm
@R rterm :: rexpr >> rterm
@R minus :: rexpr >> rexpr - rterm
@R intersect :: rterm >> rterm intersect rfactor
@R join :: rterm >> rterm join rfactor
@R rfactor :: rterm >> rfactor
@R projection :: rfactor >> projection [ names ] rfactor
@R names0 :: names >>
@R namesn :: names >> names1
@R names11 :: names1 >> name
@R names1n :: names1 >> names1 name
@R selection :: rfactor >> selection ( condition ) rfactor
@R conditionor :: condition >> condition | boolfactor
@R condfactor :: condition >> boolfactor
@R factorand :: boolfactor >> boolfactor & boolprimary
@R factorprime :: boolfactor >> boolprimary
@R notprimary :: boolprimary >> ~ boolprimary
@R primarycondition :: boolprimary >> ( condition )
@R primaryeq :: boolprimary >> expression = expression
@R expname :: expression >> name
@R expvalue :: expression >> value
@R rename :: rfactor >> rename [ names ] to [ names ] rfactor
@R named :: rfactor >> name
@R factorexpr :: rfactor >> ( rexpr )
@R relationval :: rfactor >> [ names ] ( rows )
@R rows0 :: rows >>
@R rowsn :: rows >> somerows
@R somerows1 :: somerows >> row
@R somerowsn :: somerows >> somerows , row
@R emptyrow :: row >> NIL
@R row1 :: row >> value
@R rown :: row >> row value
@R valuenum :: value >> number
@R valuestr :: value >> string
"""
keywords = """
selection intersect rename projection to NIL U join
"""
puncts = """=^~|,-[]()&"""
nonterms = """
statement assignment rexpr rterm value rfactor
names names1 condition boolfactor boolprimary
expression rows somerows row
"""
try:
from kjbuckets import *
except ImportError:
from kjbuckets0 import *
class relation:
def __init__(self, names, rows):
#print "relation init", names, rows
names = self.names = tuple(names)
nameset = self.nameset = kjSet(names)
for r in rows:
if nameset != kjSet(r.keys()):
raise ValueError, \
"bad names: "+`(names, r.items())`
self.rows = kjSet(rows)
def __repr__(self):
from string import join
names = self.names
rows = self.rows.items()
if not rows:
nns = join(names)
replist = [nns, "="*len(nns), " --<empty>--"]
return join(replist, "\n")
#print names, rows
nnames = len(names)
if nnames==1:
replist = [names[0]]
else:
replist = [names]
for r in rows:
elt = r.dump(names)
replist.append(r.dump(names))
#print replist
if nnames==1:
replist = maxrep(replist)
else:
transpose = apply(map, tuple([None] + replist))
adjusted = map(maxrep, transpose)
replist = apply(map, tuple([None] + adjusted))
replist = map(join, replist)
replist.insert(1, "=" * len(replist[0]))
#print replist
return join(replist, "\n")
def maxrep(list):
list = map(str, list)
maxlen = max( map(len, list) )
for i in range(len(list)):
item = list[i]
litem = len(item)
list[i] = item + (" " * (maxlen-litem))
return list
# context is a simple dictionary of named relations
def elt0(l, c):
return l[0]
statementassn = elt0
def statementexpr(l, c):
from string import split, join
print
print " --- expression result ---"
print
data = str(l[0])
print " "+ join(split(data, "\n"), "\n ")
def assignment1(l, c):
[name, eq, val] = l
c[name] = val
return val
assignmentn = assignment1
def check_compat(v1, v2):
names1, names2 = v1.names, v2.names
if names1 != names2:
raise ValueError, \
"operands not union compatible "+`(names1, names2)`
return names1, v1.rows, v2.rows
def union(l, c):
[v1, U, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1+r2).items())
rterm = elt0
def minus(l, c):
[v1, m, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1-r2).items())
def intersect(l, c):
[v1, i, v2] = l
names1, r1, r2 = check_compat(v1, v2)
return relation(names1, (r1&r2).items())
def join(l, c):
[v1, j, v2] = l
n1, n2 = v1.names, v2.names
r1, r2 = v1.rows.items(), v2.rows.items()
n1s, n2s = kjSet(n1), kjSet(n2)
common = tuple((n1s&n2s).items())
result = kjSet()
if common:
# simple hashjoin
G = kjGraph()
for a in r1:
G[a.dump(common)] = a
for b in r2:
for a in G.neighbors(b.dump(common)):
result[a+b] = 1
else:
for a in r1:
for b in r2:
result[a+b] = 1
return relation( (n1s+n2s).items(), result.items() )
rfactor = elt0
def projection(l, c):
[p, b1, names, b2, val] = l
proj = kjSet(names)
result = kjSet()
for row in val.rows.items():
result[ proj * row ] = 1
return relation( names, result.items())
def emptylist(l, c):
return []
names0 = emptylist
namesn = elt0
def names11(l, c):
return l
def names1n(l, c):
[ns, n] = l
ns.append(n)
return ns
def selection(l, c):
[sel, p1, cond, p2, val] = l
return cond.filter(val)
## conditions are not optimized at all!
class conditionor:
def __init__(self, l, c):
[self.c1, op, self.c2] = l
def filter(self, val):
v1 = self.c1.filter(val)
v2 = self.c2.filter(val)
return relation(v1.names, (v1.rows+v2.rows).items())
condfactor = elt0
class factorand(conditionor):
def filter(self, val):
v1 = self.c1.filter(val)
v2 = self.c2.filter(val)
return relation(v1.names, (v1.rows&v2.rows).items())
factorprime = elt0
class notprimary:
def __init__(self, l, c):
[n, self.c1] = l
def filter(self, val):
v1 = self.c1.filter(val)
return relation(v1.names, (val.rows-v1.rows).items())
def elt1(l, c):
return l[1]
primarycondition = elt1
class primaryeq:
def __init__(self, l, c):
[self.e1, eq, self.e2] = l
def filter(self, val):
rows = val.rows.items()
e1v = self.e1.value(rows)
e2v = self.e2.value(rows)
result = kjSet()
for (r, v1, v2) in map(None, rows, e1v, e2v):
if v1==v2:
result[r] = 1
return relation(val.names, result.items())
class expname:
def __init__(self, l, c):
self.name = l[0]
def value(self, rows):
name = self.name
r = list(rows)
for i in xrange(len(r)):
r[i] = r[i][name]
return r
class expvalue(expname):
def value(self, rows):
return [self.name] * len(rows)
def rename(l, c):
[ren, b1, names, b2, to, b3, names2, b4, val] = l
if len(names)!=len(names2):
raise ValueError, "names lengths must match"+`(names1, names2)`
remap = kjDict(map(None, names2, names))
oldnames = kjSet(val.names)
addnames = kjSet(names2)
remnames = kjSet(names)
keepnames = oldnames - remnames
remap = remap + keepnames
if not remnames.subset(oldnames):
#print remnames, oldnames
raise ValueError, "old names not present"+`(names, val.names)`
newnames = keepnames+addnames
rows = val.rows.items()
for i in range(len(rows)):
rows[i] = remap*rows[i]
return relation(newnames.items(), rows)
def named(l, c):
[name] = l
return c[name]
def relationval(l, c):
[b1, names, b2, p1, rows, p2] = l
names = tuple(names)
ln = len(names)
for i in xrange(len(rows)):
this = rows[i]
lt = len(this)
if lt!=ln:
raise ValueError, "names, vals don't match"+`(names,this)`
if len(this)==1:
this = this[0]
else:
this = tuple(this)
rows[i] = kjUndump(names, this)
return relation(names, rows)
rows0 = emptylist
rowsn = elt0
def somerows1(l, c):
#print "somerows1", l
return l
def somerowsn(l, c):
#print "somerowsn", l
[sr, c, r] = l
sr.append(r)
return sr
emptyrow = emptylist
row1 = somerows1
def factorexpr(l, c):
return l[1]
def rown(l, c):
#print "rows", l
[r, v] = l
r.append(v)
return r
valuenum = valuestr = elt0
## snarfed from sqlbind
# note: all reduction function defs must precede this assign
VARS = vars()
class punter:
def __init__(self, name):
self.name = name
def __call__(self, list, context):
print "punt:", self.name, list
return list
class tracer:
def __init__(self, name, fn):
self.name = name
self.fn = fn
def __call__(self, list, context):
print "tracing", self.name, list
test = self.fn(list, context)
print self.name, "returns", test
return test
def BindRules(sqlg):
for name in sqlg.RuleNameToIndex.keys():
if VARS.has_key(name):
#print "binding", name
sqlg.Bind(name, VARS[name]) # nondebug
#sqlg.Bind(name, tracer(name, VARS[name]) ) # debug
else:
print "unbound", name
sqlg.Bind(name, punter(name))
return sqlg
## snarfed from sqlgen
MARSHALFILE = "relalg.mar"
import string
alphanum = string.letters+string.digits + "_"
userdefre = "[%s][%s]*" % (string.letters +"_", alphanum)
RACOMMENTREGEX = "COMMENT .*"
def userdeffn(str):
return str
charstre = "'[^\n']*'"
def charstfn(str):
return str[1:-1]
numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really...
def numlitfn(str):
"""Note: this is "safe" because regex
filters out dangerous things."""
return eval(str)
def DeclareTerminals(Grammar):
Grammar.Addterm("name", userdefre, userdeffn)
Grammar.Addterm("string", charstre, charstfn)
Grammar.Addterm("number", numlitre, numlitfn)
def Buildrelalg(filename=MARSHALFILE):
import kjParseBuild
SQLG = kjParseBuild.NullCGrammar()
#SQLG.SetCaseSensitivity(0)
DeclareTerminals(SQLG)
SQLG.Keywords(keywords)
SQLG.punct(puncts)
SQLG.Nonterms(nonterms)
# should add comments
SQLG.comments([RACOMMENTREGEX])
SQLG.Declarerules(relalg_rules)
print "working..."
SQLG.Compile()
filename = INSTALLDIR+"/"+filename
print "dumping to", filename
outfile = open(filename, "wb")
SQLG.MarshalDump(outfile)
outfile.close()
return SQLG
def reloadrelalg(filename=MARSHALFILE):
import kjParser
filename = INSTALLDIR+"/"+filename
infile = open(filename, "rb")
SQLG = kjParser.UnMarshalGram(infile)
infile.close()
DeclareTerminals(SQLG)
BindRules(SQLG)
return SQLG
def runfile(f):
from string import split, join
ragram = reloadrelalg()
context = {}
#f = open(filename, "r")
data = f.read()
#f.close()
from string import split, strip
commands = split(data, ";")
for c in commands:
if not strip(c): continue
print " COMMAND:"
data = str(c)
pdata = " "+join(split(c, "\n"), "\n ")
print pdata
test = ragram.DoParse1(c, context)
print
# c:\python\python relalg.py ratest.txt
if __name__=="__main__":
try:
done = 0
import sys
argv = sys.argv
if len(argv)>1:
command = argv[1]
if command=="make":
print "building relational algebra grammar"
Buildrelalg()
done = 1
else:
runfile(sys.stdin)
done = 1
finally:
if not done:
print __doc__
<html>
<head>
<title>Gadfly: server operations</title>
</head>
<body bgcolor="#ddff55">
<h1>Gadfly Server Operations</h1>
To permit multiple processes to access and modify a
single database instance, and to reduce the overhead per process
of connecting to a Gadfly database a Gadfly database may be
run in server mode. A Gadfly server can use a DBA configured
start-up script to set up optimized query accesses and certain
forms of security.
<p>
For example to startup a server for the
test database "test" in directory "dbtest" (created by gftest.py)`
use:
<pre>
python gfserve.py 2222 test dbtest admin
</pre>
or to start up the same server with some non-priviledged
policies and some named prepared queries (as initialized
in gfstest.startup(...)) use
<pre>
python gfserve.py 2222 test dbtest admin gfstest
</pre>
In both cases the admin password for the server is "admin"
and the server runs on port 2222.
See the doc string for gfserve.py for more information on
the command line arguments.
<p>
Only one process should directly access a gadfly database at once
(not mediated by a server),
so if a server is running, no other server for that database
should be started and no other process should connect in "non-server"
mode to that database.
<h1>Motivation</h1>
There are several reasons to run a server: to allow multiple
processes to access the same database; to allow password protected
restricted access to the database by non-priviledged agents;
and to permit faster access to the database
by providing globally shared
prepared statements. Using a server also eliminates the need
to start up and load the database many times -- and startup
time could be considerable if the database is large.
<p>
For example I imagine that simple Gadfly servers may be of
use to implement database enabled CGI scripts, whereas the
"non-server" Gadfly will only run with CGI scripts that do not modify
the database, and the startup time for Gadfly might make those
scripts unacceptibly slow if the database is large. Furthermore,
by using the security features a Gadfly server could be configured
to allow restricted data distribution across a network without
compromising the integrity of the database.
<h1>Security</h1>
The primary goal of Gadfly server security is to prevent
accidental or malicious destruction of a database.
<p>
Security is arbitrated by policies. Policies have passwords
that are never transmitted in clear text. However, a "captured"
command could potentially be repeated by a hostile program
even without knowing the password. It is not a good idea to
run admin or other unrestricted commands on a network that may
have hostile parties sniffing the network. As with the rest
of the system I provide no guarantees, but for many purposes
the level of security provided may be acceptible. To be specific
passwords are used to generate md5 certificates for all server
accesses (please see gfsocket.py for implementation details).
<p>
A server always has
an "admin" policy that is permitted to shutdown, restart, or
force a checkpoint on the server. By default the admin
policy also has the ability to run arbitrary SQL statements
such as "drop table x". This ability can be disabled in
a startup function if needed.
<pre>
admin_policy.general_queries=0
</pre>
<p>
Other policies can be created that have very restricted access.
For example the following startup function initializes two
policies beyond the admin policy that can only access certain
tables in specific ways (from gfstest.py):
<pre>
def startup(admin_policy, connection, Server_instance):
"""example startup script.
add a policies test and test1 passwords same
test1 is allowed to query the likess table by name
test is allowed to update likes where drinker='nan'
also add prepared query dumpwork to admin_policy.
"""
from gfserve import Policy
admin_policy["dumpwork"] = "select * from work"
test1 = Policy("test1", "test1", connection, queries=0)
test = Policy("test", "test", connection, queries=0)
test1["qlike"] = "select * from likes where drinker=?"
test["updatenan"] = """
update likes
set beer=?, perday=?
where drinker='nan'
"""
test["getnan"] = """
select * from likes where drinker='nan'
"""
return {"test": test, "test1": test1}
</pre>
Please see the doc string for gfserve.py for more information
on creating startup functions.
<p>
A policy with queries disabled (queries=0) can only execute
named queries. By using such policies a DBA can configure
a server such that client programs can only read certain tables,
can only update certain rows of certain tables in certain ways,
and so forth.
<p>
Even policies with "unrestricted access" (queries=1)
can provide performance benefits if they have associated
named, prepared queries (like "dumpwork" above). At the moment
the SQL parser slows down gadfly a bit, and prepared queries
will only be parsed once for all clients. After the first
access subsequent accesses may be noticably faster (10x faster
in some cases), especially
if the server has the kjbuckets builtin C module. However,
with queries=1 the policy can execute any SQL statement.
<p>
<strong>NOTE: The server runs all accesses (once the complete
message has been read from the network) serially -- there is
no concurrent access permitted to
a Gadfly instance at this time. For this
reason a "large query" may cause the server to "freeze" and
delay other accesses.
</strong> Incomplete requests due to network delays or
other problems will not freeze the server, however (sockets
are polled using select.select).
<p>
<strong>NOTE: All server accesses run in "autocommit mode" at
this time. A successful access automatically triggers a database
commit (but an unsuccessful access will rollback).</strong>
<p>
As an optimization, however, checkpoints only occur occasionally,
once per a given number of accesses, configurable by setting:
<pre>
Server_instance.check_loop = 100
</pre>
<h1>Start up</h1>
Servers can be started from the command line using the gfserve.py
script interpretation
(as shown above)
or using gfserve.Server(...) from another
program. See the doc strings and source for gfserve.py and gfstest.py
for more information.
<h1>Shut down</h1>
Servers can be shut down from the command line interpretation of
gfclient.py or from another program using the gfclient(...) class
shutdown() method, but only using the admin policy with the admin
password. For example to shut down the server started above:
<pre>
python gfclient.py shutdown 2222 admin
</pre>
See the doc strings and source for gfserve.py
and gfstest.py
for more information.
<h1>Client Access</h1>
Client access to a gadfly server is similar to the normal
Python DB-SIG DBAPI
access to gadfly, except that it is sometimes faster and can
potentially
be run from any machine reachable on the network (if the client
program knows the password).
<p>
To access a gadfly server from a remote machine the only
python modules required (in addition to the standard libraries)
are gfclient.py and gfsocket.py.
<p>
Initialize a connection with a given "POLICY" with "PASSWORD"
to a running server
on "machine.domain.com" using port number 2222 with:
<pre>
from gfclient import gfclient
conn = gfclient("POLICY", 2222, "PASSWORD", "machine.domain.com")
</pre>
Note that policy names and passwords are case sensitive.
<p>
Queries and other statements are normally executed via cursors.
Obtain a cursor from a connection using:
<pre>
cursor = connection.cursor()
</pre>
Execute a statement in a cursor using:
<pre>
cursor.execute(statement)
</pre>
or to provide dynamic parameters:
<pre>
cursor.execute(statement, dynamic_parameters)
</pre>
For example
<pre>
cursor.execute("select * from work")
...
cursor.execute("select * from work where name=?", ("carla",))
</pre>
The dynamic parameters work the same as described in the
<a href="gadfly.html">the main gadfly documentation page</a>.
In particular INSERT VALUES can insert several rows at once
by using a list of tuples for the rows.
<p>
If there is any problem (bad policy name, bad password, server
not running, queries not allowed for this policy)
the execute will generate an exception.
<p>
To run a named/prepared query (initialized at startup) use
execute_prepared, which takes a prepared statement name
rather than a query string:
<pre>
cursor.execute_prepared("updatenan", ("rollingrock", 1))
...
cursor.execute_prepared("getnan")
</pre>
The execute_prepared method works just like the execute
method except that the "name" must be the name of a query initialized
by the startup(...) function at server startup.
<p><strong>
NOTE: by default any execution that sends or recieves "too much
data" will be aborted. Edit gfsocket.py (both on the client end
and on the server end if different) if you wish to disable this
sanity check feature.
<pre>
LEN_LIMIT=10e8
</pre></strong>
<p>
As with other dbapi cursors the results of a query can be
extracted as a list of tuples using (after execute):
<pre>
result_list = cursor.fetchall()
</pre>
The other fetches (fetchone and fetchmany) have not been
implemented yet (partially since they don't make much sense
in this context).
<p>
Both named and unnamed statements may be semicolon separated
sequences of several SQL statements, but if they are they will return
no results.
<h1>Implementation Comments</h1>
For your information the server/client interaction is much like
"finger" or "http" -- each client access is a separate TCP/Stream
connection where the client sends a request and the server sends
a response. After each access the connection is closed and the
next access generates a new connection.
I did it that way, because it was a simple and
robust strategy (witness the success of HTTP).
<p>
<strong>
Please note: Although I have attempted to provide a robust
implementation
for this software I do not guarantee its correctness. I hope
it will work well for you but I do not assume any legal
responsibility for problems anyone may have during use
of these programs.
</strong>
</Body>
</html>
\ No newline at end of file
"""rule bindings for sql grammar."""
def elt0(list, context):
"""return first member of reduction"""
return list[0]
def elt1(list, context):
"""return second member"""
return list[1]
def elt2(list, context):
return list[2]
def returnNone(list, context):
return None
def stat1(list, context):
"""return list of len 1 of statements"""
return list
#def statn(list, context):
# """return a list of statement reductions"""
# [stat, semi, statlist] = list
# statlist.insert(0, stat)
# return statlist
def thingcommalist(l, c):
[thing, comma, list] = l
list.insert(0, thing)
return list
def listcommathing(l, c):
[list, comma, thing] = l
list.append(thing)
return list
statn = thingcommalist
selstat = elt0
insstat = elt0
createtablestat = elt0
droptablestat = elt0
delstat = elt0
updatestat = elt0
createindexstat = elt0
dropindexstat = elt0
createviewstat = elt0
dropviewstat = elt0
# drop view statement stuff
def dropview(l, c):
[drop, view, name] = l
from sqlsem import DropView
return DropView(name)
# create view statement stuff
def createview(l, c):
[create, view, name, namelist, as, selection] = l
from sqlsem import CreateView
return CreateView(name, namelist, selection)
optnamelist0 = returnNone
optnamelistn = elt1
# drop index statement stuff
def dropindex(l, c):
[drop, index, name] = l
from sqlsem import DropIndex
return DropIndex(name)
# create index statement stuff
def createindex(l, c):
[create, index, name, on, table, op, namelist, cp] = l
from sqlsem import CreateIndex
return CreateIndex(name, table, namelist)
names1 = stat1
namesn = listcommathing
# update statement stuff
def update(l, c):
[upd, name, set, assns, condition] = l
from sqlsem import UpdateOp
return UpdateOp(name, assns, condition)
def assn(l, c):
[col, eq, exp] = l
return (col, exp)
def assn1(l, c):
[ (col, exp) ] = l
from sqlsem import TupleCollector
result = TupleCollector()
result.addbinding(col, exp)
return result
def assnn(l, c):
[ result, comma, (col, exp) ] = l
result.addbinding(col, exp)
return result
# delete statement stuff
def deletefrom(l, c):
[delete, fromkw, name, where] = l
from sqlsem import DeleteOp
return DeleteOp(name, where)
# drop table stuff
def droptable(l, c):
[drop, table, name] = l
from sqlsem import DropTable
return DropTable(name)
# create table statement stuff
def createtable(list, context):
[create, table, name, p1, colelts, p2] = list
from sqlsem import CreateTable
return CreateTable(name, colelts)
colelts1 = stat1
coleltsn = listcommathing
#def coleltsn(list, c):
# [c1, cc, ce] = list
# c1.append(ce)
# return c1
coleltid = elt0
coleltconstraint = elt0
def coldef(l, c):
[colid, datatype, default, constraints] = l
from sqlsem import ColumnDef
return ColumnDef(colid, datatype, default, constraints)
optdef0 = returnNone
optcolconstr0 = returnNone
stringtype = exnumtype = appnumtype = integer = float = varchar = elt0
varcharn = elt0
# insert statement stuff
def insert1(l, c):
[insert, into, name, optcolids, insert_spec] = l
from sqlsem import InsertOp
return InsertOp(name, optcolids, insert_spec)
optcolids0 = returnNone
optcolids1 = elt1
colids1 = stat1
colidsn = listcommathing
def insert_values(l, c):
from sqlsem import InsertValues
return InsertValues(l[2])
def insert_query(l, c):
from sqlsem import InsertSubSelect
return InsertSubSelect(l[0])
litlist1 = stat1
litlistn = listcommathing
# select statement stuff
def selectx(list, context):
[sub, optorder_by] = list
#sub.union_select = optunion
sub.order_by = optorder_by
# number of dynamic parameters in this parse.
sub.ndynamic = context.ndynamic()
return sub
psubselect = elt1
def subselect(list, context):
[select, alldistinct, selectlist, fromkw, trlist,
optwhere, optgroup, opthaving, optunion] = list
from sqlsem import Selector
sel = Selector(
alldistinct,
selectlist,
trlist,
optwhere,
optgroup,
opthaving,
# store # of dynamic parameters seen in this parse.
ndynamic = context.ndynamic()
)
sel.union_select = optunion
return sel
def ad0(list, context):
return "ALL"
adall = ad0
def addistinct(list, context):
return "DISTINCT"
def where0(list, context):
from sqlsem import BTPredicate
return BTPredicate() # true
where1 = elt1
group0 = returnNone
group1 = elt2
colnames1 = stat1
colnamesn = listcommathing
having0 = returnNone
having1 = elt1
union0 = returnNone
def union1(l, c):
[union, alldistinct, selection] = l
from sqlsem import Union
return Union(alldistinct, selection)
def except1(l, c):
[union, selection] = l
alldistinct = "DISTINCT"
from sqlsem import Except
return Except(alldistinct, selection)
def intersect1(l, c):
[union, selection] = l
alldistinct = "DISTINCT"
from sqlsem import Intersect
return Intersect(alldistinct, selection)
order0 = returnNone
order1 = elt2
#orderby = elt2
sortspec1 = stat1
sortspecn = listcommathing
def sortint(l, c):
from sqlsem import PositionedSort
[num, ord] = l
from types import IntType
if type(num)!=IntType or num<=0:
raise ValueError, `num`+': col position not positive int'
return PositionedSort(num, ord)
def sortcol(l, c):
from sqlsem import NamedSort
[name, ord] = l
return NamedSort(name, ord)
def optord0(l, c):
return "ASC"
optordasc = optord0
def optorddesc(l, c):
return "DESC"
## table reference list returns list of (name, name) or (name, alias)
def trl1(l, c):
[name] = l
return [(name, name)]
def trln(l,c):
[name, comma, others] = l
others.insert(0, (name, name))
return others
def trl1a(l,c):
[name, alias] = l
return [(name, alias)]
def trlna(l,c):
[name, alias, comma, others] = l
others.insert(0, (name, alias))
return others
def trl1as(l,c):
[name, as, alias] = l
return [(name, alias)]
def trlnas(l,c):
[name, as, alias, comma, others] = l
others.insert(0, (name, alias))
return others
tablename1 = elt0
columnid1 = elt0
def columnname1(list, context):
[ci] = list
return columnname2([None, None, ci], context)
def columnname2(list, context):
[table, ignore, col] = list
from sqlsem import BoundAttribute
return BoundAttribute(table, col)
def dynamic(list, context):
from sqlsem import BoundAttribute
# return a new dynamic parameter
int = context.param()
return BoundAttribute(0, int)
# expression stuff
def literal(list, context):
[lit] = list
from sqlsem import Constant
return Constant(lit)
def stringstring(l, c):
"""two strings in sequence = apostrophe"""
[l1, l2] = l
from sqlsem import Constant
value = "%s'%s" % (l1.value0, l2)
return Constant(value)
numlit = literal
stringlit = literal
primarylit = elt0
primary1 = elt0
factor1 = elt0
term1 = elt0
exp1 = elt0
def expplus(list, context):
[exp, plus, term] = list
return exp + term
def expminus(list, context):
[exp, minus, term] = list
return exp - term
def termtimes(list, context):
[exp, times, term] = list
return exp * term
def termdiv(list, context):
[exp, div, term] = list
return exp / term
plusfactor = elt1
def minusfactor(list, context):
[minus, factor] = list
return -factor
primaryexp = elt1
primaryset = elt0
def countstar(l, c):
from sqlsem import Count
return Count("*")
def distinctset(l, c):
[agg, p1, distinct, exp, p2] = l
return set(agg, exp, 1)
distinctcount = distinctset
def allset(l, c):
[agg, p1, exp, p2] = l
return set(agg, exp, 0)
allcount = allset
def set(agg, exp, distinct):
import sqlsem
if agg=="AVG":
return sqlsem.Average(exp, distinct)
if agg=="COUNT":
return sqlsem.Count(exp, distinct)
if agg=="MAX":
return sqlsem.Maximum(exp, distinct)
if agg=="MIN":
return sqlsem.Minimum(exp, distinct)
if agg=="SUM":
return sqlsem.Sum(exp, distinct)
if agg=="MEDIAN":
return sqlsem.Median(exp, distinct)
raise NameError, `agg`+": unknown aggregate"
average = count = maximum = minimum = summation = median = elt0
def predicateeq(list, context):
[e1, eq, e2] = list
return e1.equate(e2)
def predicatene(list, context):
[e1, lt, gt, e2] = list
return ~(e1.equate(e2))
def predicatelt(list, context):
[e1, lt, e2] = list
return e1.lt(e2)
def predicategt(list, context):
[e1, lt, e2] = list
return e2.lt(e1)
def predicatele(list, context):
[e1, lt, eq, e2] = list
return e1.le(e2)
def predicatege(list, context):
[e1, lt, eq, e2] = list
return e2.le(e1)
def predbetween(list, context):
[e1, between, e2, andkw, e3] = list
from sqlsem import BetweenPredicate
return BetweenPredicate(e1, e2, e3)
def prednotbetween(list, context):
[e1, notkw, between, e2, andkw, e3] = list
from sqlsem import BetweenPredicate
return ~BetweenPredicate(e1, e2, e3)
predicate1 = elt0
bps = elt1
bp1 = elt0
# exists predicate stuff
predexists = elt0
def exists(l, c):
[ex, paren1, subquery, paren2] = l
from sqlsem import ExistsPred
return ExistsPred(subquery)
def notbf(list, context):
[ notst, thing ] = list
return ~thing
# quantified predicates
nnall = elt0
nnany = elt0
def predqeq(list, context):
[exp, eq, allany, p1, subq, p2] = list
from sqlsem import QuantEQ, QuantNE
if allany=="ANY":
return QuantEQ(exp, subq)
else:
return ~QuantNE(exp, subq)
def predqne(list, context):
[exp, lt, gt, allany, p1, subq, p2] = list
from sqlsem import QuantEQ, QuantNE
if allany=="ANY":
return QuantNE(exp, subq)
else:
return ~QuantEQ(exp, subq)
def predqlt(list, context):
[exp, lt, allany, p1, subq, p2] = list
from sqlsem import QuantLT, QuantGE
if allany=="ANY":
return QuantLT(exp, subq)
else:
return ~QuantGE(exp, subq)
def predqgt(list, context):
[exp, gt, allany, p1, subq, p2] = list
from sqlsem import QuantGT, QuantLE
if allany=="ANY":
return QuantGT(exp, subq)
else:
return ~QuantLE(exp, subq)
def predqle(list, context):
[exp, less, eq, allany, p1, subq, p2] = list
from sqlsem import QuantGT, QuantLE
if allany=="ANY":
return QuantLE(exp, subq)
else:
return ~QuantGT(exp, subq)
def predqge(list, context):
[exp, gt, eq, allany, p1, subq, p2] = list
from sqlsem import QuantGE, QuantLT
if allany=="ANY":
return QuantGE(exp, subq)
else:
return ~QuantLT(exp, subq)
# subquery expression
def subqexpr(list, context):
[p1, subq, p2] = list
from sqlsem import SubQueryExpression
return SubQueryExpression(subq)
def predin(list, context):
[exp, inkw, p1, subq, p2] = list
from sqlsem import InPredicate
return InPredicate(exp, subq)
def prednotin(list, context):
[exp, notkw, inkw, p1, subq, p2] = list
from sqlsem import InPredicate
return ~InPredicate(exp, subq)
def predinlits(list, context):
[exp, inkw, p1, lits, p2] = list
from sqlsem import InLits
return InLits(exp, lits)
def prednotinlits(list, context):
[exp, notkw, inkw, p1, lits, p2] = list
from sqlsem import InLits
return ~InLits(exp, lits)
bf1 = elt0
def booln(list, context):
[ e1, andst, e2 ] = list
return e1&e2
bool1 = elt0
def searchn(list, context):
[ e1, orst, e2 ] = list
return e1 | e2
search1 = elt0
colalias = elt0
# select list stuff
def selectstar(l,c):
return "*"
selectsome = elt0
select1 = elt0
# selectsub returns (expression, asname)
def select1(list, context):
[ (exp, name) ] = list
from sqlsem import TupleCollector
result = TupleCollector()
result.addbinding(name, exp)
return result
def selectn(list, context):
[ selectsubs, comma, select_sublist ] = list
(exp, name) = select_sublist
selectsubs.addbinding(name, exp)
return selectsubs
def selectit(list, context):
[exp] = list
return (exp, None) # no binding!
def selectname(list, context):
[exp, as, alias] = list
return (exp, alias)
colalias = elt0
#### do the bindings.
# note: all reduction function defs must precede this assign
VARS = vars()
class punter:
def __init__(self, name):
self.name = name
def __call__(self, list, context):
print "punt:", self.name, list
return list
class tracer:
def __init__(self, name, fn):
self.name = name
self.fn = fn
def __call__(self, list, context):
print self.name, list
return self.fn(list, context)
def BindRules(sqlg):
for name in sqlg.RuleNameToIndex.keys():
if VARS.has_key(name):
#print "binding", name
sqlg.Bind(name, VARS[name]) # nondebug
#sqlg.Bind(name, tracer(name, VARS[name]) ) # debug
else:
print "unbound", name
sqlg.Bind(name, punter(name))
return sqlg
"""grammar generation stuff for sql.
This module does not bind any rule semantics, it
just generates the parser data structures.
"""
### interpretation functions and regexen for terminals
MARSHALFILE = "sql.mar"
import string
alphanum = string.letters+string.digits + "_"
userdefre = "[%s][%s]*" % (string.letters +"_", alphanum)
commentre = "--.*"
def userdeffn(str):
from string import upper
return upper(str)
charstre = "'[^\n']*'"
def charstfn(str):
return str[1:-1]
numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really...
def numlitfn(str):
"""Note: this is "safe" because regex
filters out dangerous things."""
return eval(str)
def DeclareTerminals(Grammar):
Grammar.Addterm("user_defined_name", userdefre, userdeffn)
Grammar.Addterm("character_string_literal", charstre, charstfn)
Grammar.Addterm("numeric_literal", numlitre, numlitfn)
def BuildSQL(filename=MARSHALFILE):
import kjParseBuild
from sqlgram import sqlrules, nonterms, keywords, puncts
SQLG = kjParseBuild.NullCGrammar()
SQLG.SetCaseSensitivity(0)
DeclareTerminals(SQLG)
SQLG.Keywords(keywords)
SQLG.punct(puncts)
SQLG.Nonterms(nonterms)
SQLG.comments([commentre])
# should add comments
SQLG.Declarerules(sqlrules)
print "working..."
SQLG.Compile()
print "testing"
from sqlgtest import test
for x in test:
print SQLG.DoParse1(x)
print "dumping to", filename
outfile = open(filename, "wb")
SQLG.MarshalDump(outfile)
outfile.close()
return SQLG
def reloadSQLG(filename=MARSHALFILE):
"""does not bind any interpretation functions."""
import kjParser
infile = open(filename, "rb")
SQLG = kjParser.UnMarshalGram(infile)
infile.close()
DeclareTerminals(SQLG)
return SQLG
def getSQL():
from sqlwhere import filename
return reloadSQLG(filename)
\ No newline at end of file
# sql grammar, partial, based on ODBC 2.0 programmer's ref
## someday add subquery precedence to allow more general selects.
sqlrules = """
statement_list ::
@R stat1 :: statement_list >> statement
@R statn :: statement_list >> statement ; statement_list
@R dropindexstat :: statement >> drop_index_statement
@R createindexstat :: statement >> create_index_statement
@R selstat :: statement >> select_statement
@R insstat :: statement >> insert_statement
@R createtablestat :: statement >> create_table_statement
@R droptablestat :: statement >> drop_table_statement
@R delstat :: statement >> delete_statement_searched
@R updatestat :: statement >> update_statement_searched
@R createviewstat :: statement >> create_view_statement
@R dropviewstat :: statement >> drop_view_statement
## drop view statement
@R dropview :: drop_view_statement >> DROP VIEW user_defined_name
## create view statement
@R createview :: create_view_statement >>
CREATE VIEW user_defined_name optnamelist AS select_statement
@R optnamelist0 :: optnamelist >>
@R optnamelistn :: optnamelist >> ( namelist )
## drop index statement
@R dropindex :: drop_index_statement >> DROP INDEX user_defined_name
## create index statement
@R createindex :: create_index_statement >>
CREATE INDEX user_defined_name
ON user_defined_name
( namelist )
@R names1 :: namelist >> user_defined_name
@R namesn :: namelist >> namelist , user_defined_name
## update statement
@R update :: update_statement_searched >>
UPDATE user_defined_name
SET assns
optwhere
@R assn1 :: assns >> assn
@R assnn :: assns >> assns , assn
@R assn :: assn >> column_identifier = expression
#####
## delete statement
@R deletefrom :: delete_statement_searched >> DELETE FROM user_defined_name optwhere
## drop table
@R droptable :: drop_table_statement >> DROP TABLE user_defined_name
## create table statement ( restricted )
@R createtable :: create_table_statement >>
CREATE TABLE user_defined_name ( colelts )
@R colelts1 :: colelts >> colelt
@R coleltsn :: colelts >> colelts , colelt
@R coleltid :: colelt >> column_definition
@R coleltconstraint :: colelt >> column_constraint_definition
## column constraints deferred
@R coldef :: column_definition >>
column_identifier data_type optdefault optcolconstraints
## optdefault deferred
@R optdef0 :: optdefault >>
## optcolconstraint deferred
@R optcolconstr0 :: optcolconstraints >>
@R stringtype :: data_type >> character_string_type
@R exnumtype :: data_type >> exact_numeric_type
@R appnumtype :: data_type >> approximate_numeric_type
@R integer :: exact_numeric_type >> INTEGER
@R float :: approximate_numeric_type >> FLOAT
@R varchar :: character_string_type >> VARCHAR
@R varcharn :: character_string_type >> VARCHAR ( numeric_literal )
## insert statement
@R insert1 :: insert_statement >>
INSERT INTO table_name optcolids insert_spec
@R optcolids0 :: optcolids >>
@R optcolids1 :: optcolids >> ( colids )
@R colids1 :: colids >> column_identifier
@R colidsn :: colids >> colids , column_identifier
@R insert_values :: insert_spec >> VALUES ( litlist )
@R insert_query :: insert_spec >> sub_query
@R litlist1 :: litlist >> literal
@R litlistn :: litlist >> litlist , literal
## select statement
@R subselect :: sub_query >>
SELECT alldistinct select_list
FROM table_reference_list
optwhere optgroup opthaving optunion
## @R psubselect :: sub_query >> ( sub_query )
@R selectx :: select_statement >>
sub_query
optorder_by
@R ad0 :: alldistinct >>
@R adall :: alldistinct >> ALL
@R addistinct :: alldistinct >> DISTINCT
@R where0 :: optwhere >>
@R where1 :: optwhere >> WHERE search_condition
@R group0 :: optgroup >>
@R group1 :: optgroup >> GROUP BY colnamelist
@R colnames1 :: colnamelist >> column_name
@R colnamesn :: colnamelist >> colnamelist , column_name
@R having0 :: opthaving >>
@R having1 :: opthaving >> HAVING search_condition
@R union0 :: optunion >>
@R union1 :: optunion >> UNION alldistinct sub_query
@R except1 :: optunion >> EXCEPT sub_query
@R intersect1 :: optunion >> INTERSECT sub_query
@R order0 :: optorder_by >>
@R order1 :: optorder_by >> ORDER BY sortspeclist
##@R orderby :: order_by_clause >> ORDER BY sortspeclist
@R sortspec1 :: sortspeclist >> sort_specification
@R sortspecn :: sortspeclist >> sortspeclist , sort_specification
## really, should be unsigned int
@R sortint :: sort_specification >> numeric_literal opt_ord
@R sortcol :: sort_specification >> column_name opt_ord
@R optord0 :: opt_ord >>
@R optordasc :: opt_ord >> ASC
@R optorddesc :: opt_ord >> DESC
## table reference list (nasty hack alert)
@R trl1 :: table_reference_list >> user_defined_name
@R trln :: table_reference_list >> user_defined_name , table_reference_list
@R trl1a :: table_reference_list >> user_defined_name user_defined_name
@R trlna :: table_reference_list >> user_defined_name user_defined_name , table_reference_list
@R trl1as :: table_reference_list >> user_defined_name AS user_defined_name
@R trlnas :: table_reference_list >> user_defined_name AS user_defined_name , table_reference_list
## select list
@R selectstar :: select_list >> *
@R selectsome :: select_list >> selectsubs
@R select1 :: selectsubs >> select_sublist
@R selectn :: selectsubs >> selectsubs , select_sublist
@R selectit :: select_sublist >> expression
@R selectname :: select_sublist >> expression AS column_alias
@R colalias :: column_alias >> user_defined_name
## search condition
@R search1 :: search_condition >> boolean_term
@R searchn :: search_condition >> boolean_term OR search_condition
@R bool1 :: boolean_term >> boolean_factor
@R booln :: boolean_term >> boolean_factor AND boolean_term
@R bf1 :: boolean_factor >> boolean_primary
@R notbf :: boolean_factor >> NOT boolean_primary
@R bp1 :: boolean_primary >> predicate
@R bps :: boolean_primary >> ( search_condition )
## predicate (simple for now!!!)
@R predicate1 :: predicate >> comparison_predicate
## comparison predicate (simple for now!!!)
@R predicateeq :: comparison_predicate >> expression = expression
@R predicatelt :: comparison_predicate >> expression < expression
@R predicategt :: comparison_predicate >> expression > expression
@R predicatele :: comparison_predicate >> expression < = expression
@R predicatege :: comparison_predicate >> expression > = expression
@R predicatene :: comparison_predicate >> expression < > expression
@R predbetween :: comparison_predicate >> expression BETWEEN expression AND expression
@R prednotbetween :: comparison_predicate >>
expression NOT BETWEEN expression AND expression
## exists predicate
@R predexists :: predicate >> exists_predicate
@R exists :: exists_predicate >> EXISTS ( sub_query )
## quantified predicate
@R predqeq :: predicate >> expression = allany ( sub_query )
@R predqne :: predicate >> expression < > allany ( sub_query )
@R predqlt :: predicate >> expression < allany ( sub_query )
@R predqgt :: predicate >> expression > allany ( sub_query )
@R predqle :: predicate >> expression < = allany ( sub_query )
@R predqge :: predicate >> expression > = allany ( sub_query )
@R nnall :: allany >> ALL
@R nnany :: allany >> ANY
## in predicate
@R predin :: predicate >> expression IN ( sub_query )
@R prednotin :: predicate >> expression NOT IN ( sub_query )
@R predinlits :: predicate >> expression IN ( litlist )
@R prednotinlits :: predicate >> expression NOT IN ( litlist )
## subquery expression
@R subqexpr :: expression >> ( sub_query )
## expression (simple for now!!!)
@R exp1 :: expression >> term
@R expplus :: expression >> expression + term
@R expminus :: expression >> expression - term
@R term1 :: term >> factor
@R termtimes :: term >> term * factor
@R termdiv :: term >> term / factor
@R factor1 :: factor >> primary
@R plusfactor :: factor >> + factor
@R minusfactor :: factor >> - factor
@R primary1 :: primary >> column_name
@R primarylit :: primary >> literal
@R primaryexp :: primary >> ( expression )
@R primaryset :: primary >> set_function_reference
@R stringlit :: literal >> character_string_literal
@R stringstring :: literal >> literal character_string_literal
@R numlit :: literal >> numeric_literal
## set functions (nasty hack!)
@R countstar :: set_function_reference >> COUNT ( * )
@R distinctcount :: set_function_reference >> COUNT ( DISTINCT expression )
@R allcount :: set_function_reference >> COUNT ( expression )
@R distinctset :: set_function_reference >> aggregate ( DISTINCT expression )
@R allset :: set_function_reference >> aggregate ( expression )
@R average :: aggregate >> AVG
##@R count :: aggregate >> COUNT
@R maximum :: aggregate >> MAX
@R minimum :: aggregate >> MIN
@R summation :: aggregate >> SUM
@R median :: aggregate >> MEDIAN
## dynamic parameter (varies quite a bit from ODBC spec)
@R dynamic :: literal >> ?
## column name
@R columnname1 :: column_name >> column_identifier
@R columnname2 :: column_name >> table_name . column_identifier
@R tablename1 :: table_name >> user_defined_name
@R columnid1 :: column_identifier >> user_defined_name
"""
nonterms = """
exists_predicate set_function_reference aggregate
sortspeclist sort_specification opt_ord
drop_table_statement delete_statement_searched update_statement_searched
assns assn
insert_statement litlist colelt optcolconstraints optdefault
optcolids insert_spec create_table_statement
colids colelts column_constraint_definition
column_definition data_type character_string_type
exact_numeric_type approximate_numeric_type
expression term factor primary literal
comparison_predicate column_alias column_identifier table_name
boolean_term boolean_factor boolean_primary predicate
selectsubs expression alias sub_query
statement_list statement select_statement alldistinct subselect
select_list table_reference_list optwhere optgroup opthaving
order_by_clause select_sublist
optunion optorder_by search_condition colnamelist column_name
table_reference table_name create_index_statement namelist
drop_index_statement allany create_view_statement drop_view_statement
optnamelist
"""
keywords = """
INDEX ON ANY IN VIEW AS
EXCEPT INTERSECT
EXISTS AVG COUNT MAX MIN SUM MEDIAN
UPDATE DROP DELETE FROM SET
INSERT INTO VALUES CREATE TABLE INTEGER FLOAT VARCHAR
AND OR NOT
SELECT FROM WHERE HAVING GROUP BY UNION ALL DISTINCT AS ORDER
ASC DESC BETWEEN
"""
puncts = """.,*;=<>{}()?+-/"""
# terminals user_defined_name, character_string_literal,
# numeric_literal
"test parses for sql grammar"
test = [
"select a from x where b=c",
"select distinct x.a from x where x.b=c",
"select all a from x where b=c",
"select a from x, y where b=c or x.d=45",
"select a as k from x d, y as m where b=c",
"select 1 as n, a from x where b=c",
"select * from x",
"select a from x where b=c",
"select a from x where not b=c or d=1 and e=5",
"select a from x where a=1 and (x.b=3 or not b=c)",
]
\ No newline at end of file
"""Database modification statement semantics"""
import sqlsem
# ordering of ddef storage is important so, eg, index defs
# follow table defs.
class Ordered_DDF:
"""mixin for DDF statement sorting, subclass defines s.cmp(o)"""
def __cmp__(self, other):
try:
#print "comparing", self.name, other.name
try:
sc = self.__class__
oc = other.__class__
#print sc, oc
except:
#print "punting 1", -1
return -1
if sc in ddf_order and oc in ddf_order:
test = cmp(ddf_order.index(sc), ddf_order.index(oc))
#print "ddforder", test
if test: return test
return self.cmp(other)
else:
test = cmp(sc, oc)
#print "punting 2", test
return test
except:
#import sys
#print "exception!"
#print sys.exc_type, sys.exc_value
return -1
def __coerce__(self, other):
return (self, other)
def cmp(self, other):
"""redefine if no name field"""
return cmp(self.name, other.name)
CTFMT = """\
CREATE TABLE %s (
%s
)"""
class CreateTable(Ordered_DDF):
"""create table operation"""
def __init__(self, name, colelts):
self.name = name
self.colelts = colelts
self.indb = None # db in which to create
def initargs(self):
return (self.name, [])
def marshaldata(self):
from sqlsem import serialize
return map(serialize, self.colelts)
def demarshal(self, args):
from sqlsem import deserialize
self.colelts = map(deserialize, args)
def __repr__(self):
from string import join
elts = list(self.colelts)
elts = map(repr, elts)
return CTFMT % (self.name, join(elts, ",\n "))
def relbind(self, db):
"""check that table doesn't already exist"""
if db.has_relation(self.name):
raise NameError, "cannot create %s, exists" % (self.name,)
self.indb = db
return self
def eval(self, dyn=None):
"create the relation now"
# datatypes currently happily ignored :)
db = self.indb
if db is None:
raise ValueError, "unbound or executed"
self.indb = None
name = self.name
if db.has_relation(self.name):
raise NameError, "relation %s exists, cannot create" % (self.name,)
db.touched = 1
attnames = []
for x in self.colelts:
attnames.append(x.colid)
from gfdb0 import Relation0
r = Relation0(attnames)
# must store if new (unset for reloads)
r.touched = 1
db[name] = r
db.add_datadef(name, self)
log = db.log
if log is not None:
log.log(self)
viewfmt = """\
CREATE VIEW %s (%s) AS
%s"""
class CreateView(sqlsem.SimpleRecursive, Ordered_DDF):
"""CREATE VIEW name (namelist) AS selection"""
# note: no check for cross-references on drops!
def __init__(self, name, namelist, selection):
self.name = name
self.namelist = namelist
self.selection = selection
self.indb = None
def __repr__(self):
return viewfmt % (self.name, self.namelist, self.selection)
def initargs(self):
return (self.name, self.namelist, self.selection)
def relbind(self, db):
self.indb = db
name = self.name
if db.has_datadef(name):
raise NameError, "(view) datadef %s exists" % name
# don't bind the selection yet
return self
def eval(self, dyn=None):
"create the view"
db = self.indb
name = self.name
if db is None:
raise ValueError, "create view %s unbound or executed" % name
self.indb = None
if db.has_relation(name):
raise ValueError, "create view %s, name exists" % name
db.touched = 1
from gfdb0 import View
v = View(self.name, self.namelist, self.selection, db)
db[name] = v
db.add_datadef(name, self)
log = db.log
if log is not None:
log.log(self)
CREATEINDEXFMT = """\
CREATE INDEX %s ON %s (
%s
)"""
class CreateIndex(sqlsem.SimpleRecursive, Ordered_DDF):
"""create index operation"""
def __init__(self, name, tablename, atts):
self.name = name
self.tablename=tablename
self.atts = atts
self.indb = None
self.target = None
def initargs(self):
return (self.name, self.tablename, self.atts)
def __cmp__(self, other):
oc = other.__class__
if oc is CreateTable:
return 1 # after all create tables
sc = self.__class__
if oc is not sc:
return cmp(sc, oc)
else:
return cmp(self.name, other.name)
def __coerce__(self, other):
return (self, other)
def __repr__(self):
from string import join
innards = join(self.atts, ",\n ")
return CREATEINDEXFMT % (self.name, self.tablename, innards)
def relbind(self, db):
name = self.name
self.indb = db
if db.has_datadef(name):
raise NameError, `name`+": data def exists"
try:
self.target = db.get_for_update(self.tablename) #db[self.tablename]
except:
raise NameError, `self.tablename`+": no such relation"
return self
def eval(self, dyn=None):
from gfdb0 import Index
db = self.indb
if db is None:
raise ValueError, "create index unbound or executed"
self.indb = None
rel = self.target
if rel is None:
raise ValueError, "create index not bound to relation"
db.touched = 1
self.the_index = the_index = Index(self.name, self.atts)
rel.add_index(the_index)
name = self.name
db.add_datadef(name, self)
db.add_index(name, the_index)
log = db.log
if log is not None:
log.log(self)
class DropIndex(sqlsem.SimpleRecursive):
def __init__(self, name):
self.name = name
self.indb = None
def initargs(self):
return (self.name,)
def __repr__(self):
return "DROP INDEX %s" % (self.name,)
def relbind(self, db):
self.indb = db
if not db.has_datadef(self.name):
raise NameError, `self.name`+": no such index"
return self
def eval(self, dyn=None):
db = self.indb
self.indb=None
if db is None:
raise ValueError, "drop index executed or unbound"
db.touched = 1
indexname = self.name
createindex = db.datadefs[indexname]
index = createindex.the_index
relname = createindex.tablename
rel = db[relname]
rel.drop_index(index)
db.drop_datadef(indexname)
db.drop_index(indexname)
log = db.log
if log is not None:
log.log(self)
class DropTable(sqlsem.SimpleRecursive):
def __init__(self, name):
self.name = name
self.indb = None
def initargs(self):
return (self.name,)
def __repr__(self):
return "DROP TABLE %s" % (self.name,)
def relbind(self, db):
self.indb = db
name = self.name
if not db.has_relation(name):
raise NameError, `self.name` + ": cannot delete, no such table/view"
self.check_kind(name, db)
return self
def check_kind(self, name, db):
if db[name].is_view:
raise ValueError, "%s is VIEW, can't DROP TABLE" % name
def eval(self, dyn):
db = self.indb
if db is None:
raise ValueError, "unbound or executed"
db.touched = 1
self.indb = None
self.relbind(db)
name = self.name
rel = db[name]
rel.drop_indices(db)
db.drop_datadef(name)
del db[name]
log = db.log
if log is not None:
log.log(self)
class DropView(DropTable):
"""DROP VIEW name"""
def __repr__(self):
return "DROP VIEW %s" % self.name
def check_kind(self, name, db):
if not db[name].is_view:
raise ValueError, "%s is TABLE, can't DROP VIEW" % name
COLDEFFMT = "%s %s %s %s"
class ColumnDef(sqlsem.SimpleRecursive):
def __init__(self, colid, datatype, defaults, constraints):
self.colid = colid
self.datatype = datatype
self.defaults = defaults
self.constraints = constraints
def initargs(self):
return (self.colid, self.datatype, self.defaults, self.constraints)
def __repr__(self):
defaults = self.defaults
if defaults is None: defaults=""
constraints = self.constraints
if constraints is None: constraints = ""
return COLDEFFMT % (self.colid, self.datatype, defaults, constraints)
def evalcond(cond, eqs, target, dyn, rassns, translate, invtrans):
"""factored out shared op between Update and Delete."""
if dyn:
from sqlsem import dynamic_binding
dynbind = dynamic_binding(len(dyn), dyn)
if len(dynbind)>1:
raise ValueError, "only one dynamic binding allowed for UPDATE"
dynbind = dynbind[0]
if eqs is not None:
dynbind1 = dynbind.remap(eqs)
if dynbind1 is None:
# inconsistent
return
dynbind = dynbind1 + dynbind
if rassns is not None:
rassns = rassns + invtrans * dynbind
if rassns.Clean() is None:
# inconsistent
return
else:
rassns = invtrans * dynbind
else:
dynbind = None
# get tuple set, try to use an index
index = None
if rassns is not None:
known = rassns.keys()
index = target.choose_index(known)
if index is None:
(tuples, seqnums) = target.rows(1)
else:
#print "using index", index.name
(tuples, seqnums) = index.matches(rassns)
ltuples = len(tuples)
buffer = [0] * ltuples
rtups = range(ltuples)
for i in rtups:
tup = tuples[i]
#print tup
ttup = translate * tup
if dynbind:
ttup = (ttup + dynbind).Clean()
if ttup is not None:
buffer[i] = ttup
#for x in buffer:
#print "before", x
test = cond(buffer)
return (test, rtups, seqnums, tuples)
UPDFMT = """\
UPDATE %s
SET %s
WHERE %s"""
# optimize to use indices and single call to "cond"
class UpdateOp(sqlsem.SimpleRecursive):
def __init__(self, name, assns, condition):
self.name = name
self.assns = assns
self.condition = condition
def initargs(self):
return (self.name, self.assns, self.condition)
def __repr__(self):
return UPDFMT % (self.name, self.assns, self.condition)
def relbind(self, db):
self.indb = db
name = self.name
target = self.target = db.get_for_update(name)
(attb, relb, amb, ambatts) = db.bindings( [ (name, name) ] )
assns = self.assns = self.assns.relbind(attb, db)
cond = self.condition = self.condition.relbind(attb, db)
constraints = cond.constraints
if constraints is not None:
eqs = self.eqs = constraints.eqs
cassns = constraints.assns
else:
cassns = eqs = self.eqs = None
#print constraints, eqs
# check that atts of assns are atts of target
#print dir(assns)
resultatts = assns.attorder
from sqlsem import kjbuckets
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjGraph
resultatts = kjSet(resultatts)
allatts = kjSet(target.attribute_names)
self.preserved = allatts - resultatts
huh = resultatts - allatts
if huh:
raise NameError, "%s lacks %s attributes" % (name, huh.items())
# compute projection
condatts = kjGraph(cond.domain().items()).neighbors(name)
#print "condatts", condatts
translate = kjbuckets.kjDict()
for att in condatts:
translate[ (name, att) ] = att
self.translate = translate
invtrans= self.invtrans = ~translate
if cassns is not None:
self.rassns = invtrans * cassns
else:
self.rassns = None
#print "cassns,rassns", cassns, self.rassns
#print translate
# compute domain of self.assns
# (do nothing with it, should add sanity check!)
assns_domain = self.assns.domain()
return self
def eval(self, dyn=None):
indb = self.indb
name = self.name
cond = self.condition
cond.uncache()
assns = self.assns
assns.uncache()
translate = self.translate
preserved = self.preserved
target = self.target
rassns = self.rassns
eqs = self.eqs
invtrans = self.invtrans
(test, rtups, seqnums, tuples) = evalcond(
cond, eqs, target, dyn, rassns, translate, invtrans)
# shortcut
if not test: return
self.indb.touched = 1
tt = type
from types import IntType
(tps, attorder) = assns.map(test)
count = 0
newseqs = list(rtups)
newtups = list(rtups)
for i in rtups:
new = tps[i]
if tt(new) is not IntType and new is not None:
seqnum = seqnums[i]
old = tuples[i]
if preserved:
new = new + preserved*old
newtups[count] = new
newseqs[count] = seqnum
count = count + 1
if count:
newseqs = newseqs[:count]
newtups = newtups[:count]
target.reset_tuples(newtups, newseqs)
log = indb.log
if log is not None and not log.is_scratch:
from sqlsem import Reset_Tuples
op = Reset_Tuples(self.name)
op.set_data(newtups, newseqs, target)
log.log(op)
class DeleteOp(sqlsem.SimpleRecursive):
def __init__(self, name, where):
self.name = name
self.condition = where
def initargs(self):
return (self.name, self.condition)
def __repr__(self):
return "DELETE FROM %s WHERE %s" % (self.name, self.condition)
def relbind(self, db):
self.indb = db
name = self.name
target = self.target = db.get_for_update(name)
(attb, relb, amb, ambatts) = db.bindings( [ (name, name) ] )
cond = self.condition = self.condition.relbind(attb, db)
# compute domain of cond
# do nothing with it (should add sanity check)
cond_domain = cond.domain()
constraints = cond.constraints
if constraints is not None:
cassns = constraints.assns
self.eqs = constraints.eqs
else:
self.eqs = cassns = None
# compute projection/rename
from sqlsem import kjbuckets
condatts = kjbuckets.kjGraph(cond.domain().items()).neighbors(name)
translate = kjbuckets.kjDict()
for att in condatts:
translate[(name, att)] = att
self.translate = translate
invtrans = self.invtrans = ~translate
if cassns is not None:
self.rassns = invtrans * cassns
else:
self.rassns = None
return self
def eval(self, dyn=None):
# note, very similar to update case...
indb = self.indb
name = self.name
target = self.target
tuples = target.tuples
eqs = self.eqs
rassns = self.rassns
cond = self.condition
cond.uncache()
translate = self.translate
invtrans = self.invtrans
(test, rtups, seqnums, tuples) = evalcond(
cond, eqs, target, dyn, rassns, translate, invtrans)
# shortcut
if not test: return
indb.touched = 1
tt = type
from types import IntType
count = 0
newseqs = list(rtups)
for i in rtups:
new = test[i]
if tt(new) is not IntType and new is not None:
seqnum = seqnums[i]
newseqs[count] = seqnum
count = count + 1
if count:
newseqs = newseqs[:count]
target.erase_tuples(newseqs)
log = indb.log
if log is not None and not log.is_scratch:
from sqlsem import Erase_Tuples
op = Erase_Tuples(self.name)
op.set_data(newseqs, target)
log.log(op)
INSFMT = """\
INSERT INTO %s
%s
%s"""
class InsertOp(sqlsem.SimpleRecursive):
def __init__(self, name, optcolids, insertspec):
self.name = name
self.optcolids = optcolids
self.insertspec = insertspec
self.target = None # target relation
self.collector = None # name map for attribute translation
def initargs(self):
return (self.name, self.optcolids, insertspec)
def __repr__(self):
return INSFMT % (self.name, self.optcolids, self.insertspec)
def relbind(self, db):
self.indb = db
name = self.name
# determine target relation
target = self.target = db.get_for_update(name)
targetatts = target.attributes()
from sqlsem import kjbuckets
kjSet = kjbuckets.kjSet
targetset = kjSet(targetatts)
# check or set colid bindings
colids = self.optcolids
if colids is None:
colids = self.optcolids = target.attributes()
colset = kjSet(colids)
### for now all attributes must be in colset
cdiff = colset-targetset
if cdiff:
raise NameError, "%s: no such attributes in %s" % (cdiff.items(), name)
cdiff = targetset-colset
### temporary!!!
if cdiff:
raise NameError, "%s: not set in insert on %s" % (cdiff.items(), name)
# bind the insertspec
insertspec = self.insertspec
self.insertspec = insertspec = insertspec.relbind(db)
# create a collector for result
from sqlsem import TupleCollector
collector = self.collector = TupleCollector()
# get ordered list of expressions to eval on bound attributes of insertspec
resultexps = insertspec.resultexps()
if len(resultexps)!=len(colset):
raise ValueError, "result and colset of differing length %s:%s" % (colset,resultexps)
pairs = map(None, colids, resultexps)
for (col,exp) in pairs:
collector.addbinding(col, exp)
return self
def eval(self, dyn=None):
resultbts = self.insertspec.eval(dyn)
#print "resultbts", resultbts
# shortcut
if not resultbts: return
indb = self.indb
indb.touched = 1
(resulttups, resultatts) = self.collector.map(resultbts)
#print "resulttups", resulttups
if resulttups:
target = self.target
target.add_tuples(resulttups)
#target.regenerate_indices()
log = indb.log
if log is not None and not log.is_scratch:
from sqlsem import Add_Tuples
op = Add_Tuples(self.name)
op.set_data(resulttups, target)
log.log(op)
Insert_dummy_arg = [ ( (1,1), 1 ) ]
class InsertValues(sqlsem.SimpleRecursive):
def __init__(self, List):
self.list = List
def initargs(self):
return (self.list,)
def __repr__(self):
return "VALUES " +` tuple(self.list) `
def resultexps(self):
return self.list
def relbind(self, db):
l = self.list
bindings = {}
for i in xrange(len(self.list)):
li = l[i]
l[i] = li.relbind(bindings, db)
# do nothing with domain, for now
li_domain = li.domain()
return self
def eval(self, dyn=None):
if dyn:
from sqlsem import dynamic_binding
dynbt = dynamic_binding(len(dyn), dyn)
else:
# dummy value to prevent triviality
from sqlsem import kjbuckets
dynbt = [kjbuckets.kjDict(Insert_dummy_arg)]
#print "bindings", dynbt.assns
return dynbt # ??
class InsertSubSelect(sqlsem.SimpleRecursive):
def __init__(self, subsel):
self.subsel = subsel
def initargs(self):
return (self.subsel,)
def __repr__(self):
return "[subsel] %s" % (self.subsel,)
def resultexps(self):
# get list of result bindings
subsel = self.subsel
atts = self.subsel.attributes()
# bind each as "result.name"
exps = []
from sqlsem import BoundAttribute
for a in atts:
exps.append( BoundAttribute("result", a) )
return exps # temp
def relbind(self, db):
subsel = self.subsel
self.subsel = subsel.relbind(db)
# do nothing with domain for now
#subsel_domain = subsel.domain()
return self
def eval(self, dyn=None):
subsel = self.subsel
subsel.uncache()
rel = subsel.eval(dyn)
tups = rel.rows()
from sqlsem import BoundTuple ### temp
from sqlsem import kjbuckets
kjDict = kjbuckets.kjDict
for i in xrange(len(tups)):
tupsi = tups[i]
new = kjDict()
for k in tupsi.keys():
new[ ("result", k) ] = tupsi[k]
tups[i] = new
return tups
# ordering for archiving datadefs
ddf_order = [CreateTable, CreateIndex, CreateView]
""" sql semantics
"""
### trim unused methods.
### make assns use equivalence classes.
### maybe eventually implement disj-conj-eq optimizations
### note: for multithreading x.relbind(...) should ALWAYs return
### a fresh copy of structure (sometimes in-place now).
### note: binding of order by is dubious with archiving,
### should not bind IN PLACE, leave unbound elts alone!
# use kjbuckets builtin if available
try:
import kjbuckets
except ImportError:
import kjbuckets0
kjbuckets = kjbuckets0
Tuple = kjbuckets.kjDict
Graph = kjbuckets.kjGraph
Set = kjbuckets.kjSet
import sys, traceback
### debug
#sys.stderr = sys.stdin
# operations on simple tuples, mostly from kjbuckets
#def maketuple(thing):
# """try to make a tuple from thing.
# thing should be a dictionary or sequence of (name, value)
# or other tuple."""
# from types import DictType
# if type(thing)==DictType:
# return Tuple(thing.items() )
# else: return Tuple(thing)
def no_ints_nulls(list):
"""in place remove all ints, Nones from a list (for null handling)"""
tt = type
nn = None
from types import IntType
count = 0
for x in list:
if tt(x) is not IntType and x is not nn:
list[count] = x
count = count+1
del list[count:]
return list
# stuff for bound tuples.
class HashJoiner:
def __init__(self, bt, relname, attributes, relation, witness):
self.relname = relname
self.attributes = attributes
self.relation = relation
self.witness = witness
self.bt = bt
eqs = bt.eqs
#print "relname", relname
#print "attributes", attributes
#print "relation", relation
#print "witness", witness
#print "bt", bt
transform = self.transform = kjbuckets.kjDict()
rbindings = self.rbindings = kjbuckets.kjSet()
for a in attributes:
b = (relname, a)
transform[b] = a
rbindings[b] = b
self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings)
witness = witness.remap(eqs)
known = kjbuckets.kjSet(witness.keys()) & rbindings
batts = tuple(known.items())
if not batts:
atts = ()
elif len(batts)==1:
atts = ( transform[batts[0]], )
else:
atts = transform.dump(batts)
self.atts = atts
self.batts = batts
self.transform = transform
eqs = bt.eqs
#eqs = (rbindings * eqs)
self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings)
self.transformG = transformG = eqs * transform
assns = self.assns = bt.assns
self.rassns = assns.remap( ~transformG )
def relbind(self, db, atts):
rel = self.relation
if rel.is_view:
self.relation = rel.relbind(db, atts)
return self
def uncache(self):
rel = self.relation
if rel.is_view:
self.relation.uncache()
def join(self, subseq):
relname = self.relname
result = []
assns = self.assns
if not subseq: return result
# apply equalities to unitary subseq (embedded subq)
if len(subseq)==1:
subseq0 = subseq[0]
subseq0r = subseq0.remap(self.eqs)
if subseq0r is None:
return [] # inconsistent
subseq0 = subseq0 + subseq0r + assns
if subseq0.Clean() is None:
return [] # inconsistent
subseq = [subseq0]
rassns = self.rassns
#print "rassns", rassns
#print "subseq", subseq
if rassns is None:
#print "inconsistent btup"
return []
relation = self.relation
#print "assns", assns
transformG = self.transformG
#print "transformG", transformG
transform = self.transform
atts = self.atts
batts = self.batts
#print "batts, atts", batts, atts
if not batts:
#print "cross product", relname
tuples = relation.rows()
for t in tuples:
#print "t is", t
if rassns:
t = (t + rassns).Clean()
if t is None:
#print "preselect fails"
continue
new = t.remap(transformG)
#print "new is", new
if new is None:
#print "transform fails"
continue
for subst in subseq:
#print "subst", subst
if subst:
add = (subst + new).Clean()
else:
add = new
#print "add is", add
if add is not None:
result.append(add)
else:
# hash join
#print "hash join"
# first try to use an index
index = relation.choose_index(atts)
#print transform
if index is not None:
#print "index join", index.name, relname
#print index.index.keys()
#print "rassns", rassns
atts = index.attributes()
invtransform = ~transform
if len(atts)==1:
batts = (invtransform[atts[0]],)
else:
batts = invtransform.dump(atts)
hash_tups = 1
tindex = index.index
# memoize converted tuples
tindex0 = {}
test = tindex.has_key
test0 = tindex0.has_key
for i in xrange(len(subseq)):
subst = subseq[i]
#print "substs is", subst
its = subst.dump(batts)
#print "its", its
othersubsts = []
if test0(its):
othersubsts = tindex0[its]
elif test(its):
tups = tindex[its]
for t in tups:
#print "t before", t
t = (t+rassns).Clean()
#print "t after", t
if t is None: continue
new = t.remap(transformG)
#print "new", new
if new is None: continue
othersubsts.append(new)
tindex0[its] = othersubsts
for other in othersubsts:
#print "adding", other, subst
add = (other + subst).Clean()
if add is not None:
result.append(add)
# hash join
#print "hash join"
else:
tuples = relation.rows()
if len(subseq)<len(tuples):
#print "hash subseq", relname
subseqindex = {}
test = subseqindex.has_key
for i in xrange(len(subseq)):
subst = subseq[i]
its = subst.dump(batts)
#print "items1", subseq, batts, its
if test(its):
subseqindex[its].append(subst)
else:
subseqindex[its] = [ subst ]
for t in tuples:
#print "on", t
if rassns:
t = (t+rassns).Clean()
if t is None:
#print "preselect fails"
continue
its = t.dump(atts)
#print "items2", its
if test(its):
new = t.remap(transformG)
#print "...new", new
if new is None:
#print "transform fails"
continue
l = subseqindex[its]
for subst in l:
add = (subst + new).Clean()
#print "adding", add
if add is not None:
result.append(add)
else:
#print "hash tuples", relname
tindex = {}
test = tindex.has_key
for i in xrange(len(tuples)):
t = tuples[i]
if rassns:
t = (t + rassns).Clean()
if t is None:
#print "preselect fails"
continue
new = t.remap(transformG)
#print "new is", new
if new is None:
#print "transform fails"
continue
its = t.dump(atts)
#print "items3", its
if test(its):
tindex[its].append(new)
else:
tindex[its] = [ new ]
for subst in subseq:
its = subst.dump(batts)
#print "items4", its
if test(its):
n = tindex[its]
for new in n:
add = (subst + new).Clean()
if add is not None:
result.append(add)
#print "hashjoin result"
#for x in result:
#print x
#break
return result
### essentially, specialized pickle for this app:
def deserialize(description):
"""simple protocol for generating a marshallable ob"""
#print "deserialize", description
from types import TupleType
if type(description) is not TupleType:
return description # base type
try:
(name, desc) = description
except:
return description # base type
if name == "tuple":
# tuple case
return desc
### other special cases here...
# all other cases are classes of sqlsem
import sqlsem
klass = getattr(sqlsem, name)
(args1, args2) = desc
args1 = tuple(map(deserialize, args1))
ob = apply(klass, args1)
ob.demarshal(args2)
return ob
def serialize(ob):
"""dual of deserialize."""
try:
#print ob.__class__, ob
args1 = ob.initargs()
#print "args1 before", args1
args1 = tuple(map(serialize, args1))
#print "args1 after", args1
args2 = ob.marshaldata()
return (ob.__class__.__name__, (args1, args2))
except:
from types import InstanceType
tt = type(ob)
if tt is InstanceType:
#ext = traceback.extract_tb(sys.exc_traceback)
#for x in ext:
#print x
#print
#print sys.exc_type, sys.exc_value
#print ob.__class__
raise ValueError, "couldn't serialize %s %s" % (
tt, ob.__class__)
# assume base type otherwise
return ob
# invariant:
# deserialize(serialize(ob)) returns semantic copy of ob
# serialize(ob) is marshallable
# ie,
# args1 = ob.initargs() # init args
# args1d = map(serialize, args1) # serialized
# args2 = ob.marshaldata() # marshalable addl info
# # assert args1d, args2 are marshallable
# args1copy = map(deserialize, args1)
# ob2 = ob.__class__(args1copy)
# ob2 = ob2.demarshal(args2)
# # assert ob2 is semantic copy of ob
class SimpleRecursive:
"""Simple Recursive structure, only requires initargs"""
def demarshal(self, args):
pass
def marshaldata(self):
return ()
class BoundTuple:
clean = 1 # false if inconsistent
closed = 0 # true if equality constraints inferred
def __init__(self, **bindings):
"""bindings are name-->simpletuple associations."""
self.eqs = Graph()
self.assns = Tuple()
for (name, simpletuple) in bindings.items():
self.bind(name, simpletuple)
def initargs(self):
return ()
def marshaldata(self):
#print "btp marshaldata", self
return (self.eqs.items(), self.assns.items(), self.clean, self.closed)
def demarshal(self, args):
(eitems, aitems, self.clean, self.closed) = args
self.eqs = kjbuckets.kjGraph(eitems)
self.assns = kjbuckets.kjDict(aitems)
def relbind(self, dict, db):
"""return bindings of self wrt dict rel-->att"""
result = BoundTuple()
e2 = result.eqs
a2 = result.assns
for ((a,b), (c,d)) in self.eqs.items():
if a is None:
try:
a = dict[b]
except KeyError:
raise NameError, `b`+": ambiguous or unknown attribute"
if c is None:
try:
c = dict[d]
except KeyError:
raise NameError, `d`+": ambiguous or unknown attribute"
e2[(a,b)] = (c,d)
for ((a,b), v) in self.assns.items():
if a is None:
try:
a = dict[b]
except KeyError:
raise NameError, `b`+": ambiguous or unknown attribute"
a2[(a,b)] = v
result.closed = self.closed
result.clean = self.clean
return result
#def known(self, relname):
# """return ([(relname, a1), ...], [a1, ...])
# for attributes ai of relname known in self."""
# atts = []
# batts = []
# for x in self.assns.keys():
# (r,a) = x
# if r==relname:
# batts.append(x)
# atts.append(a)
# return (batts, atts)
def relorder(self, db, allrels):
"""based on known constraints, pick an
ordering for materializing relations.
db is database (ignored currently)
allrels is names of all relations to include (list)."""
### not very smart about indices yet!!!
if len(allrels)<2:
# doesn't matter
return allrels
order = []
eqs = self.eqs
assns = self.assns
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjGraph
pinned = kjSet()
has_index = kjSet()
needed = kjSet(allrels)
akeys = assns.keys()
for (r,a) in akeys:
pinned[r]=r # pinned if some value known
known_map = kjGraph(akeys)
for r in known_map.keys():
rknown = known_map.neighbors(r)
if db.has_key(r):
rel = db[r]
index = rel.choose_index(rknown)
if index is not None:
has_index[r] = r # has an index!
if pinned: pinned = pinned & needed
if has_index: has_index = has_index & needed
related = kjGraph()
for ( (r1, a1), (r2, a2) ) in eqs.items():
related[r1]=r2 # related if equated to other
related[r2]=r1 # redundant if closed.
if related: related = needed * related * needed
chosen = kjSet()
pr = kjSet(related) & pinned
# choose first victim
if has_index:
choice = has_index.choose_key()
elif pr:
choice = pr.choose_key()
elif pinned:
choice = pinned.choose_key()
elif related:
choice = related.choose_key()
else:
return allrels[:] # give up!
while pinned or related or has_index:
order.append(choice)
chosen[choice] = 1
if pinned.has_key(choice):
del pinned[choice]
if related.has_key(choice):
del related[choice]
if has_index.has_key(choice):
del has_index[choice]
nexts = related * chosen
if nexts:
# prefer a relation related to chosen
choice = nexts.choose_key()
elif pinned:
# otherwise one that is pinned
choice = pinned.choose_key()
elif related:
# otherwise one that relates to something...
choice = related.choose_key()
others = kjSet(allrels) - chosen
if others: order = order + others.items()
return order
def domain(self):
kjSet = kjbuckets.kjSet
return kjSet(self.eqs) + kjSet(self.assns)
def __repr__(self):
from string import join
result = []
for ( (name, att), value) in self.assns.items():
result.append( "%s.%s=%s" % (name, att, `value`) )
for ( (name, att), (name2, att2) ) in self.eqs.items():
result.append( "%s.%s=%s.%s" % (name, att, name2, att2) )
if self.clean:
if not result: return "TRUE"
else:
result.insert(0, "FALSE")
result.sort()
return join(result, " & ")
def equate(self, equalities):
"""add equalities to self, only if not closed.
equalities should be seq of ( (name, att), (name, att) )
"""
if self.closed: raise ValueError, "cannot add equalities! Closed!"
e = self.eqs
for (a, b) in equalities:
e[a] = b
def close(self):
"""infer equalities, if consistent.
only recompute equality closure if not previously closed.
return None on inconsistency.
"""
neweqs = self.eqs
if not self.closed:
self.eqs = neweqs = (neweqs + ~neweqs).tclosure() # sym, trans closure
self.closed = 1
# add trivial equalities to self
for x in self.assns.keys():
if not neweqs.member(x,x):
neweqs[x] = x
newassns = self.assns.remap(neweqs)
if newassns is not None and self.clean:
self.assns = newassns
#self.clean = 1
return self
else:
self.clean = 0
return None
def share_eqs(self):
"""make clone of self that shares equalities, closure.
note: will share future side effects to eqs too."""
result = BoundTuple()
result.eqs = self.eqs
result.closed = self.closed
return result
def __add__(self, other):
"""combine self with other, return closure."""
result = self.share_eqs()
se = self.eqs
oe = other.eqs
if (se is not oe) and (se != oe):
result.eqs = se + oe
result.closed = 0
ra= result.assns = self.assns + other.assns
result.clean = result.clean and (ra.Clean() is not None)
return result.close()
def __and__(self, other):
"""return closed constraints common to self and other."""
result = BoundTuple()
se = self.eqs
oe = other.eqs
if (se is oe) or (se == oe):
result.eqs = self.eqs
result.closed = self.closed
else:
result.eqs = self.eqs & other.eqs
result.assns = self.assns & other.assns
result.clean = self.clean and other.clean
return result.close()
def __hash__(self):
# note: equalities don't enter into hash computation!
# (some may be spurious)
self.close()
return hash(self.assns)# ^ hash(self.eqs)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
sa = self.assns
oa = other.assns
test = cmp(sa, oa)
if test: return test
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjSet
se = self.eqs
se = kjGraph(se) - kjGraph(kjSet(se))
oe = other.eqs
oe = kjGraph(oe) - kjGraph(kjSet(oe))
return cmp(se, oe)
class BoundExpression(SimpleRecursive):
"""superclass for all bound expressions.
except where overloaded expressions are binary
with self.left and self.right
"""
contains_aggregate = 0 # default
def __init__(self, left, right):
self.left = left
self.right = right
self.contains_aggregate = left.contains_aggregate or right.contains_aggregate
def initargs(self):
return (self.left, self.right)
def uncache(self):
"""prepare for execution, clear cached data."""
self.left.uncache()
self.right.uncache()
# eventually add converters...
def equate(self,other):
"""return predicate equating self and other.
Overload for special cases, please!"""
return NontrivialEqPred(self, other)
def attribute(self):
return (None, `self`)
def le(self, other):
"""predicate self<=other"""
return LessEqPred(self, other)
# these should be overridden for 2 const case someday...
def lt(self, other):
"""predicate self<other"""
return LessPred(self, other)
def __coerce__(self, other):
return (self, other)
def __add__(self, other):
return BoundAddition(self, other)
def __sub__(self, other):
return BoundSubtraction(self, other)
def __mul__(self, other):
return BoundMultiplication(self, other)
def __neg__(self):
return BoundMinus(self)
def __div__(self, other):
return BoundDivision(self, other)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.left.relbind(dict, db), self.right.relbind(dict, db))
def __repr__(self):
return "(%s)%s(%s)" % (self.left, self.op, self.right)
def domain(self):
return self.left.domain() + self.right.domain()
# always overload value
class BoundMinus(BoundExpression, SimpleRecursive):
def __init__(self, thing):
self.thing = thing
self.contains_aggregate = thing.contains_aggregate
def initargs(self):
return (self.thing,)
def __repr__(self):
return "-(%s)" % (self.thing,)
def value(self, contexts):
from types import IntType
tt = type
result = self.thing.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
result[i] = -result[i]
return result
def relbind(self, dict, db):
Class = self.__class__
return Class(self.thing.relbind(dict,db))
def uncache(self):
self.thing.uncache()
def domain(self):
return self.thing.domain()
### stuff for aggregation
class Average(BoundMinus):
contains_aggregate = 1
def __init__(self, expr, distinct=0):
self.distinct = distinct
if expr.contains_aggregate:
raise ValueError, `expr`+": aggregate in aggregate "+self.name
self.thing = expr
name = "Average"
def __repr__(self):
distinct = ""
if self.distinct:
distinct = "distinct "
return "%s(%s%s)" % (self.name, distinct, self.thing)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.thing.relbind(dict,db), self.distinct)
def value(self, contexts):
if not contexts: return [] # ???
test = contexts[0]
if not test.has_key(None):
return [self.all_value(contexts)]
else:
return self.agg_value(contexts)
def dvalues(self, values):
d = {}
for i in xrange(len(values)):
d[values[i]] = 1
return d.keys()
def all_value(self, contexts):
thing = self.thing
values = self.clean(thing.value(contexts), contexts)
if self.distinct:
values = self.dvalues(values)
return self.op(values)
def clean(self, values, contexts):
D = {}
from types import IntType
tt = type
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
D[i] = values[i]
return D.values()
def agg_value(self, contexts):
from types import IntType
tt = type
result = list(contexts)
for i in xrange(len(contexts)):
context = contexts[i]
if tt(context) is not IntType:
result[i] = self.all_value( context[None] )
return result
def op(self, values):
sum = 0
for x in values:
sum = sum+x
return sum/(len(values)*1.0)
class Sum(Average):
name = "Sum"
def op(self, values):
if not values: return 0
sum = values[0]
for x in values[1:]:
sum = sum+x
return sum
class Median(Average):
name = "Median"
def op(self, values):
if not values:
raise ValueError, "Median of empty set"
values = list(values)
values.sort()
lvals = len(values)
return values[lvals/2]
class Maximum(Average):
name = "Maximum"
def op(self, values):
return max(values)
class Minimum(Average):
name = "Minimum"
def op(self, values):
return min(values)
class Count(Average):
name = "Count"
distinct = 0
def __init__(self, thing, distinct = 0):
if thing=="*":
self.thing = "*"
else:
Average.__init__(self, thing, distinct)
def domain(self):
thing = self.thing
if thing=="*":
return kjbuckets.kjSet()
return thing.domain()
def all_value(self, contexts):
thing = self.thing
if thing=="*" or not self.distinct:
test = self.clean(contexts, contexts)
#print "count len", len(test), contexts[0]
return len(test)
return Average.all_value(self, contexts)
def op(self, values):
return len(values)
def relbind(self, dict, db):
if self.thing=="*":
return self
return Average.relbind(self, dict, db)
def uncache(self):
if self.thing!="*": self.thing.uncache()
def aggregate(assignments, exprlist):
"""aggregates are a assignments with special
attribute None --> list of subtuple"""
lexprs = len(exprlist)
if lexprs<1:
raise ValueError, "aggregate on no expressions?"
lassns = len(assignments)
pairs = list(exprlist)
for i in xrange(lexprs):
expr = exprlist[i]
attributes = [expr.attribute()]*lassns
values = expr.value(assignments)
pairs[i] = map(None, attributes, values)
#for x in pairs:
#print "pairs", x
if lexprs>1:
newassnpairs = apply(map, (None,)+tuple(pairs))
else:
newassnpairs = pairs[0]
#for x in newassnpairs:
#print "newassnpairs", x
xassns = range(lassns)
dict = {}
test = dict.has_key
for i in xrange(lassns):
thesepairs = newassnpairs[i]
thissubassn = assignments[i]
if test(thesepairs):
dict[thesepairs].append(thissubassn)
else:
dict[thesepairs] = [thissubassn]
items = dict.items()
result = list(items)
kjDict = kjbuckets.kjDict
if lexprs>1:
for i in xrange(len(items)):
(pairs, subassns) = items[i]
#print "pairs", pairs
#print "subassns", subassns
D = kjDict(pairs)
D[None] = subassns
result[i] = D
else:
for i in xrange(len(items)):
(pair, subassns) = items[i]
#print "pair", pair
#print "subassns", subassns
result[i] = kjDict( [pair, (None, subassns)] )
return result
### stuff for order_by
class DescExpr(BoundMinus):
"""special wrapper used only for order by descending
for things with no -thing operation (eg, strings)"""
def __init__(self, thing):
self.thing = thing
self.contains_aggregate = thing.contains_aggregate
def value(self, contexts):
from types import IntType, StringType
tt = type
result = self.thing.value(contexts)
allwrap = None
allnowrap = None
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
resulti = result[i]
# currently assume only value needing wrapping is string
if tt(resulti) is StringType:
if allnowrap is not None:
raise ValueError, "(%s, %s) cannot order desc" % (allnowrap, resulti)
allwrap = resulti
result[i] = descOb(resulti)
else:
if allwrap is not None:
raise ValueError, "(%s, %s) cannot order desc" % (allwrap, resulti)
allnowrap = resulti
result[i] = -resulti
return result
def __repr__(self):
return "DescExpr(%s)" % (self.thing,)
def orderbind(self, order):
"""order is list of (att, expr)."""
Class = self.__class__
return Class(self.thing.orderbind(order))
class SimpleColumn(SimpleRecursive):
"""a simple column name for application to a list of tuples"""
contains_aggregate = 0
def __init__(self, name):
self.name = name
def relbind(self, dict, db):
# already bound!
return self
def orderbind(self, whatever):
# already bound!
return self
def initargs(self):
return (self.name,)
def value(self, simpletuples):
from types import IntType
tt = type
name = self.name
result = list(simpletuples)
for i in xrange(len(result)):
ri = result[i]
if tt(ri) is not IntType:
result[i] = ri[name]
else:
result[i] = None # ???
return result
def __repr__(self):
return "<SimpleColumn %s>" % (self.name,)
class NumberedColumn(BoundMinus):
"""order by column number"""
contains_aggregate = 0
def __init__(self, num):
self.thing = num
def __repr__(self):
return "<NumberedColumn %s>" % (self.thing,)
def relbind(self, dict, db):
from types import IntType
if type(self.thing)!=IntType:
raise ValueError, `self.thing`+": not a numbered column"
return self
def orderbind(self, order):
return SimpleColumn( order[self.thing-1][0] )
class OrderExpr(BoundMinus):
"""order by expression."""
def orderbind(self, order):
expratt = self.thing.attribute()
for (att, exp) in order:
if exp.attribute()==expratt:
return SimpleColumn(att)
else:
raise NameError, `self`+": invalid ordering specification"
def __repr__(self):
return "<order expression %s>" % (self.thing,)
class descOb:
"""special wrapper only used for sorting in descending order
should only be compared with other descOb instances.
should only wrap items that cannot be easily "order inverted",
(eg, strings).
"""
def __init__(self, ob):
self.ob = ob
def __cmp__(self, other):
#test = cmp(self.__class__, other.__class__)
#if test: return test
return -cmp(self.ob,other.ob)
def __coerce__(self, other):
return (self, other)
def __hash__(self):
return hash(self.ob)
def __repr__(self):
return "descOb(%s)" % (self.ob,)
def PositionedSort(num, ord):
nc = NumberedColumn(num)
if ord=="DESC":
return DescExpr(nc)
return nc
def NamedSort(name, ord):
oe = OrderExpr(name)
if ord=="DESC":
return DescExpr(oe)
return oe
def relbind_sequence(order_list, dict, db):
result = list(order_list)
for i in xrange(len(order_list)):
result[i] = result[i].relbind(dict,db)
return result
def orderbind_sequence(order_list, order):
result = list(order_list)
for i in xrange(len(order_list)):
result[i] = result[i].orderbind(order)
return result
def order_tuples(order_list, tuples):
lorder_list = len(order_list)
ltuples = len(tuples)
if lorder_list<1:
raise ValueError, "order on empty list?"
order_map = list(order_list)
for i in xrange(lorder_list):
order_map[i] = order_list[i].value(tuples)
if len(order_map)>1:
order_vector = apply(map, (None,)+tuple(order_map) )
else:
order_vector = order_map[0]
#G = kjbuckets.kjGraph()
pairs = map(None, range(ltuples), tuples)
ppairs = map(None, order_vector, pairs)
G = kjbuckets.kjGraph(ppairs)
#for i in xrange(ltuples):
# G[ order_vector[i] ] = (i, tuples[i])
Gkeys = G.keys()
Gkeys.sort()
result = list(tuples)
index = 0
for x in Gkeys:
#print x
for (i, y) in G.neighbors(x):
#print " ", y
result[index]=y
index = index+1
if index!=ltuples:
raise ValueError, \
"TUPLE LOST IN ORDERING COMPUTATION! (%s,%s)" % (lorder, index)
return result
class BoundAddition(BoundExpression):
"""promised addition."""
op = "+"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] + rvs[i]
return lvs
class BoundSubtraction(BoundExpression):
"""promised subtraction."""
op = "-"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] - rvs[i]
return lvs
class BoundMultiplication(BoundExpression):
"""promised multiplication."""
op = "*"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
#print lvs
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] * rvs[i]
return lvs
class BoundDivision(BoundExpression):
"""promised division."""
op = "/"
def value(self, contexts):
from types import IntType
tt = type
lvs = self.left.value(contexts)
rvs = self.right.value(contexts)
for i in xrange(len(contexts)):
if tt(contexts[i]) is not IntType:
lvs[i] = lvs[i] / rvs[i]
return lvs
class BoundAttribute(BoundExpression):
"""bound attribute: initialize with relname=None if
implicit."""
contains_aggregate = 0
def __init__(self, rel, name):
self.rel = rel
self.name = name
def initargs(self):
return (self.rel, self.name)
def relbind(self, dict, db):
if self.rel is not None: return self
name = self.name
try:
rel = dict[name]
except KeyError:
raise NameError, `name` + ": unknown or ambiguous"
return BoundAttribute(rel, name)
def uncache(self):
pass
def __repr__(self):
return "%s.%s" % (self.rel, self.name)
def attribute(self):
"""return (rename, attribute) for self."""
return (self.rel, self.name)
def domain(self):
return kjbuckets.kjSet([ (self.rel, self.name) ])
def value(self, contexts):
"""return value of self in context (bound tuple)."""
#print "value of ", self, "in", contexts
from types import IntType
tt = type
result = list(contexts)
ra = (self.rel, self.name)
for i in xrange(len(result)):
if tt(result[i]) is not IntType:
result[i] = contexts[i][ra]
return result
def equate(self, other):
oc = other.__class__
if oc==BoundAttribute:
result = BoundTuple()
result.equate([(self.attribute(), other.attribute())])
return BTPredicate(result)
elif oc==Constant:
result = BoundTuple()
result.assns[ self.attribute() ] = other.value([1])[0]
return BTPredicate(result)
else:
return NontrivialEqPred(self, other)
class Constant(BoundExpression):
contains_aggregate = 0
def __init__(self, value):
self.value0 = value
def __hash__(self):
return hash(self.value0)
def initargs(self):
return (self.value0,)
def domain(self):
return kjbuckets.kjSet()
def __add__(self, other):
if other.__class__==Constant:
return Constant(self.value0 + other.value0)
return BoundAddition(self, other)
def __sub__(self, other):
if other.__class__==Constant:
return Constant(self.value0 - other.value0)
return BoundSubtraction(self, other)
def __mul__(self, other):
if other.__class__==Constant:
return Constant(self.value0 * other.value0)
return BoundMultiplication(self, other)
def __neg__(self):
return Constant(-self.value0)
def __div__(self, other):
if other.__class__==Constant:
return Constant(self.value0 / other.value0)
return BoundDivision(self, other)
def relbind(self, dict, db):
return self
def uncache(self):
pass
def value(self, contexts):
"""return the constant value associated with self."""
return [self.value0] * len(contexts)
def equate(self,other):
if other.__class__==Constant:
if other.value0 == self.value0:
return BTPredicate() #true
else:
return ~BTPredicate() #false
else:
return other.equate(self)
def attribute(self):
"""invent a pair to identify a constant"""
return ('unbound', `self`)
def __repr__(self):
return "<const %s at %s>" % (`self.value0`, id(self))
class TupleCollector:
"""Translate a sequence of assignments to simple tuples.
(for implementing the select list of a SELECT).
"""
contains_aggregate = 0
contains_nonaggregate = 0
def __init__(self):
self.final = None
self.order = []
self.attorder = []
self.exporder = []
def initargs(self):
return ()
def marshaldata(self):
exps = map(serialize, self.exporder)
return (self.attorder, exps,
self.contains_aggregate, self.contains_nonaggregate)
def demarshal(self, args):
(self.attorder, exps, self.contains_aggregate,
self.contains_nonaggregate) = args
exporder = self.exporder = map(deserialize, exps)
self.order = map(None, self.attorder, exporder)
def uncache(self):
for exp in self.exporder:
exp.uncache()
def domain(self):
all=[]
for e in self.exporder:
all = all+e.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
l = []
for (att, exp) in self.order:
l.append( "%s as %s" % (exp, att) )
from string import join
return join(l, ", ")
def addbinding(self, attribute, expression):
"""bind att-->expression."""
self.order.append((attribute, expression) )
self.attorder.append(attribute )
self.exporder.append(expression)
if expression.contains_aggregate:
self.contains_aggregate = 1
else:
self.contains_nonaggregate = 1
def map(self, assnlist):
"""remap btlist by self. return (tuplelist, attorder)"""
# DON'T eliminate nulls
from types import IntType
tt = type
values = []
for exp in self.exporder:
values.append(exp.value(assnlist))
if len(values)>1:
valtups = apply(map, (None,) + tuple(values) )
else:
valtups = values[0]
kjUndump = kjbuckets.kjUndump
undumper = tuple(self.attorder)
for i in xrange(len(valtups)):
test = assnlist[i]
if tt(test) is IntType or test is None:
valtups[i] = 0 # null/false
else:
tup = valtups[i]
valtups[i] = kjUndump(undumper, tup)
return (valtups, self.attorder)
def relbind(self, dict, db):
"""disambiguate missing rel names if possible.
also choose output names appropriately."""
# CURRENTLY THIS IS AN "IN PLACE" OPERATION
order = self.order
attorder = self.attorder
exporder = self.exporder
known = {}
for i in xrange(len(order)):
(att, exp) = order[i]
#print exp
exp = exp.relbind(dict, db)
if att is None:
# choose a name for this column
#print exp
(rname, aname) = exp.attribute()
if known.has_key(aname):
both = rname+"."+aname
att = both
count = 0
while known.has_key(att):
# crank away!
count = count+1
att = both+"."+`count`
else:
att = aname
else:
if known.has_key(att):
raise NameError, `att`+" ambiguous in select list"
order[i] = (att, exp)
exporder[i] = exp
attorder[i] = att
known[att] = att
return self
class BTPredicate(SimpleRecursive):
"""superclass for bound tuple predicates.
Eventually should be modified to use "compile" for speed
to generate an "inlined" evaluation function.
self(bt) returns bt with additional equality constraints
(possible) or None if predicate fails."""
false = 0
constraints = None
contains_aggregate = 0
def __init__(self, constraints=None):
"""default interpretation: True."""
if constraints is not None:
self.constraints = constraints.close()
def initargs(self):
return (self.constraints,)
def relbind(self, dict, db):
c = self.constraints
if c is None: return self
return BTPredicate( self.constraints.relbind(dict, db) )
def uncache(self):
pass
#def evaluate(self, db, relnames):
#"""evaluate the predicate over database bindings."""
# pretty simple strategy right now...
### need to do something about all/distinct...
#c = self.constraints
#if c is None:
# c = BoundTuple()
#order = c.relorder(db, relnames)
#if not order:
# raise ValueError, "attempt to evaluate over no relations: "+`relnames`
#result = [c]
#for r in order:
# result = hashjoin(result, r, db[r])
#if self.__class__==BTPredicate:
# # if it's just equality conjunction, we're done
# return result
#else:
# # apply additional constraints
# return self(result)
def domain(self):
c = self.constraints
kjSet = kjbuckets.kjSet
if c is None: return kjSet()
return c.domain()
def __repr__(self):
if self.false: return "FALSE"
c = self.constraints
if c is None: c = "true"
return "[pred](%s)" % c
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
if self.false: return self
if not self.constraints: return None
return self
def negated_constraints(self):
"""equality constraints always false of satisfactory tuple."""
return BoundTuple() # there aren't any
def __call__(self, assignments, toplevel=0):
"""apply self to sequence of assignments
return copy of asssignments with false results
replaced by 0! Input may have 0's!"""
# optimization
# if toplevel, the btpred has been evaluated during join.
if toplevel:
return list(assignments)
from types import IntType
tt = type
lbt = len(assignments)
if self.false: return [0] * lbt
c = self.constraints
if c is None or not c:
result = assignments[:] # no constraints
else:
assns = c.assns
eqs = c.eqs
eqsinteresting = 0
for (a,b) in eqs.items():
if a!=b:
eqsinteresting = 1
result = assignments[:]
for i in xrange(lbt):
this = assignments[i]
#print "comparing", self, "to", this
if type(this) is IntType: continue
this = (this + assns).Clean()
if this is None:
result[i] = 0
elif eqsinteresting:
this = this.remap(eqs)
if this is None:
result[i] = 0
return result
def __and__(self, other):
"""NOTE: all subclasses must define an __and__!!!"""
#print "BTPredicate.__and__", (self, other)
if self.__class__==BTPredicate and other.__class__==BTPredicate:
c = self.constraints
o = other.constraints
if c is None: return other
if o is None: return self
if self.false: return self
if other.false: return other
# optimization for simple constraints
all = (c+o)
result = BTPredicate( all ) # all constraints
if all is None: result.false = 1
else:
result = other & self
return result
def __or__(self, other):
if self.__class__==BTPredicate and other.__class__==BTPredicate:
c = self.constraints
o = other.constraints
if c is None: return self # true dominates
if o is None: return other
if other.false: return self
if self.false: return other
if self == other: return self
result = BTor_pred([self, other])
return result
def __invert__(self):
if self.false:
return BTPredicate()
if not self.constraints:
result = BTPredicate()
result.false = 1
return result
return BTnot_pred(self)
def __cmp__(self, other):
test = cmp(other.__class__, self.__class__)
if test: return test
if self.false and other.false: return 0
return cmp(self.constraints, other.constraints)
def __hash__(self):
if self.false: return 11111
return hash(self.constraints)
class BTor_pred(BTPredicate):
def __init__(self, members):
# replace any OR in members with its members
#print "BTor_pred", members
for m in members[:]:
if m.__class__==BTor_pred:
members.remove(m)
members = members + m.members
#print "before", members
members = self.members = kjbuckets.kjSet(members).items()
#print members
for m in members[:]:
if m.false: members.remove(m)
self.constraints = None # common constraints
for m in members:
if m.contains_aggregate:
self.contains_aggregate = 1
if members:
# common constraints are those in all members
constraints = members[0].constraints
for m in members[1:]:
mc = m.constraints
if not constraints or not mc:
constraints = None
break
constraints = constraints & mc
self.constraints = constraints
#print members
def initargs(self):
return (self.members,)
def relbind(self, dict, db):
ms = []
for m in self.members:
ms.append( m.relbind(dict, db) )
return BTor_pred(ms)
def uncache(self):
for m in self.members:
m.uncache()
def domain(self):
all = BTPredicate.domain(self).items()
for x in self.members:
all = all + x.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
c = self.constraints
m = self.members
mr = map(repr, m)
from string import join
mr.sort()
mr = join(mr, " | ")
if not mr: mr = "FALSE_OR"
if c:
mr = "[disj](%s and %s)" % (c, mr)
return mr
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
ms = self.members
for i in xrange(len(ms)):
ms[i] = ms[i].detrivialize()
# now suck out subordinate ors
someor = None
for m in ms:
if m.__class__== BTor_pred:
someor = m
ms.remove(m)
break
if someor is not None:
result = someor
for m in ms:
result = result + m
return result.detrivialize()
allfalse = 1
for m in ms:
if m is None: allfalse=0; break # true member
allfalse = allfalse & m.false
if allfalse: return ~BTPredicate() # boundary case
ms[:] = filter(None, ms)
if not ms: return None # all true.
ms[:] = kjbuckets.kjSet(ms).items()
if len(ms)==1: return ms[0] # or of 1
return self
def __call__(self, boundtuples, toplevel=0):
# apply common constraints first
lbt = len(boundtuples)
# boundary case for or is false
members = self.members
if not members:
return [0] * lbt
current = BTPredicate.__call__(self, boundtuples, toplevel)
# now apply first alternative
alt1 = members[0](current)
# determine questionables
questionables = current[:]
rng = xrange(len(current))
from types import IntType
tt = type
for i in rng:
if tt(alt1[i]) is not IntType:
questionables[i]=0
# now test other alternatives
#print "alt1", members[0]
#for x in alt1:
#print x
for m in self.members[1:]:
#print "questionables", m
#for x in questionables:
#print x
passm = m(questionables)
for i in rng:
test = passm[i]
if tt(test) is not IntType:
questionables[i] = 0
alt1[i] = test
return alt1
def negated_constraints(self):
"""the negated constraints of an OR are
the negated constraints of *all* members"""
ms = self.members
result = ms.negated_constraints()
for m in ms[1:]:
if not result: return result
mc = m.negated_constraints()
if not mc: return mc
result = result & mc
return result
def __and__(self, other):
"""push "and" down"""
newmembers = self.members[:]
for i in xrange(len(newmembers)):
newmembers[i] = newmembers[i] & other
return BTor_pred(newmembers)
def __or__(self, other):
"""collapse two ors, otherwise just add new member"""
if self.__class__==BTor_pred and other.__class__==BTor_pred:
return BTor_pred(self.members+other.members)
return BTor_pred(self.members + [other])
def __invert__(self):
"""translate to and-not"""
ms = self.members
if not ms: return BTPredicate() # boundary case
result = ~ms[0]
for m in ms[1:]:
result = result & ~m
return result
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
kjSet = kjbuckets.kjSet
test = cmp(kjSet(self.members), kjSet(other.members))
if test: return test
return BTPredicate.__cmp__(self, other)
def __hash__(self):
return hash(kjbuckets.kjSet(self.members))
class BTnot_pred(BTPredicate):
def __init__(self, thing):
self.negated = thing
self.contains_aggregate = thing.contains_aggregate
self.constraints = thing.negated_constraints()
def initargs(self):
return (self.negated,)
def relbind(self, dict, db):
return BTnot_pred( self.negated.relbind(dict, db) )
def uncache(self):
self.negated.uncache()
def domain(self):
result = BTPredicate.domain(self) + self.negated.domain()
#print "neg domain is", `self`, result
return result
def __repr__(self):
c = self.constraints
n = self.negated
r = "(NOT %s)" % n
if c: r = "[neg](%s & %s)" % (c, r)
return r
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
# first, fix or/and/not precedence
thing = self.negated
if thing.__class__ == BTnot_pred:
return thing.negated.detrivialize()
if thing.__class__ == BTor_pred:
# translate to and_not
members = thing.members[:]
for i in xrange(len(members)):
members[i] = ~members[i]
result = BTand_pred(members)
return result.detrivialize()
if thing.__class__ == BTand_pred:
# translate to or_not
members = thing.members[:]
c = thing.constraints # precondition
if c is not None:
members.append(BTPredicate(c))
for i in xrange(len(members)):
members[i] = ~members[i]
result = BTor_pred(members)
return result.detrivialize()
self.negated = thing = self.negated.detrivialize()
if thing is None: return ~BTPredicate() # uniquely false
if thing.false: return None # uniquely true
return self
def __call__(self, boundtuples, toplevel=0):
from types import IntType
tt = type
current = BTPredicate.__call__(self, boundtuples, toplevel)
omit = self.negated(current)
for i in xrange(len(current)):
if tt(omit[i]) is not IntType:
current[i]=0
return current
def negated_constraints(self):
"""the negated constraints of a NOT are the
negated constraints of the thing negated."""
return self.negated.constraints
def __and__(self, other):
"""do the obvious thing."""
return BTand_pred([self, other])
def __or__(self, other):
"""do the obvious thing"""
return BTor_pred([self, other])
def __invert__(self):
return self.negated
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.negated,other.negated)
if test: return test
return BTPredicate.__cmp__(self,other)
def __hash__(self):
return hash(self.negated)^787876^hash(self.constraints)
class BTand_pred(BTPredicate):
def __init__(self, members, precondition=None):
#print "BTand_pred", (members, precondition)
members = self.members = kjbuckets.kjSet(members).items()
self.constraints = precondition # common constraints
if members:
# common constraints are those in any member
if precondition is not None:
constraints = precondition
else:
constraints = BoundTuple()
for i in xrange(len(members)):
m = members[i]
mc = m.constraints
if mc:
#print "constraints", constraints
constraints = constraints + mc
if constraints is None: break
if m.__class__==BTPredicate:
members[i] = None # subsumed above
members = self.members = filter(None, members)
for m in members:
if m.contains_aggregate:
self.contains_aggregate=1
### consider propagating constraints down?
self.constraints = constraints
if constraints is None: self.false = 1
def initargs(self):
return (self.members, self.constraints)
def relbind(self, dict, db):
ms = []
for m in self.members:
ms.append( m.relbind(dict, db) )
c = self.constraints.relbind(dict, db)
return BTand_pred(ms, c)
def uncache(self):
for m in self.members:
m.uncache()
def domain(self):
all = BTPredicate.domain(self).items()
for x in self.members:
all = all + x.domain().items()
return kjbuckets.kjSet(all)
def __repr__(self):
m = self.members
c = self.constraints
r = map(repr, m)
if self.false: r.insert(0, "FALSE")
from string import join
r = join(r, " AND ")
r = "(%s)" % r
if c: r = "[conj](%s and %s)" % (c, r)
return r
def detrivialize(self):
"""hook added to allow elimination of trivialities
return None if completely true, or simpler form
or self, if no simplification is possible."""
# first apply demorgan's law to push ands down
# (exponential in worst case).
#print "detrivialize"
#print self
ms = self.members
some_or = None
c = self.constraints
for m in ms:
if m.__class__==BTor_pred:
some_or = m
ms.remove(m)
break
if some_or is not None:
result = some_or
if c is not None:
some_or = some_or & BTPredicate(c)
for m in ms:
result = result & m # preserves or/and precedence
if result.__class__!=BTor_pred:
raise "what the?"
result = result.detrivialize()
#print "or detected, returning"
#print result
return result
for i in xrange(len(ms)):
ms[i] = ms[i].detrivialize()
ms[:] = filter(None, ms)
if not ms:
#print "returning boundary case of condition"
if c is None:
return None
else:
return BTPredicate(c).detrivialize()
ms[:] = kjbuckets.kjSet(ms).items()
if len(ms)==1 and c is None:
#print "and of 1, returning"
#print ms[0]
return ms[0] # and of 1
return self
def __call__(self, boundtuples, toplevel=0):
# apply common constraints first
current = BTPredicate.__call__(self, boundtuples, toplevel)
for m in self.members:
current = m(current)
return current
def negated_constraints(self):
"""the negated constraints of an AND are
the negated constraints of *any* member"""
ms = self.members
result = BoundTuple()
for m in ms:
mc = m.negated_constraints()
if mc: result = result + mc
return result
def __and__(self, other):
"""push "and" down if other is an or"""
if other.__class__==BTor_pred:
return other & self
c = self.constraints
# merge in other and
if other.__class__==BTand_pred:
allmem = self.members+other.members
oc = other.constraints
if c is None:
c = oc
elif oc is not None:
c = c+oc
return BTand_pred(allmem, c)
return BTand_pred(self.members + [other], c)
def __or__(self, other):
"""do the obvious thing."""
return BTor_pred([self, other])
def __invert__(self):
"""translate to or-not"""
ms = self.members
if not ms: return ~BTPredicate() # boundary case
result = ~ms[0]
for m in ms[1:]:
result = result | ~m
return result
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
kjSet = kjbuckets.kjSet
test = cmp(kjSet(self.members), kjSet(other.members))
if test: return test
return BTPredicate.__cmp__(self, other)
def __hash__(self):
return hash(kjbuckets.kjSet(self.members))
class NontrivialEqPred(BTPredicate):
"""equation of nontrivial expressions."""
def __init__(self, left, right):
#print "making pred", self.__class__, left, right
# maybe should used reflexivity...
self.left = left
self.right = right
self.contains_aggregate = left.contains_aggregate or right.contains_aggregate
def initargs(self):
return (self.left, self.right)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.right, other.right)
if test: return test
return cmp(other.left, other.left)
def hash(self, other):
return hash(self.left) ^ hash(self.right)
def relbind(self, dict, db):
Class = self.__class__
return Class(self.left.relbind(dict,db), self.right.relbind(dict,db) )
def uncache(self):
self.left.uncache()
self.right.uncache()
def domain(self):
return self.left.domain() + self.right.domain()
op = "=="
def __repr__(self):
return "(%s)%s(%s)" % (self.left, self.op, self.right)
def detrivialize(self):
return self
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if type(t) is not IntType and lv[i]!=rv[i]:
result[i] = 0
return result
def negated_constraints(self):
return None
def __and__(self, other):
return BTand_pred( [self, other] )
def __or__(self, other):
return BTor_pred( [self, other] )
def __invert__(self):
return BTnot_pred(self)
class BetweenPredicate(NontrivialEqPred):
"""e1 BETWEEN e2 AND e3"""
def __init__(self, middle, lower, upper):
self.middle = middle
self.lower = lower
self.upper = upper
def initargs(self):
return (self.middle, self.lower, self.upper)
def domain(self):
return (
self.middle.domain() + self.lower.domain() + self.upper.domain())
def relbind(self, dict, db):
self.middle = self.middle.relbind(dict, db)
self.lower = self.lower.relbind(dict, db)
self.upper = self.upper.relbind(dict, db)
return self
def uncache(self):
self.middle.uncache()
self.upper.uncache()
self.lower.uncache()
def __repr__(self):
return "(%s BETWEEN %s AND %s)" % (
self.middle, self.lower, self.upper)
def __hash__(self):
return hash(self.middle)^~hash(self.lower)^hash(self.upper)^55
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.lower, other.lower)
if test: return test
test = cmp(self.middle, other.middle)
if test: return test
return cmp(self.upper, other.upper)
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lowv = self.lower.value(assigns)
upv = self.upper.value(assigns)
midv = self.middle.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if tt(t) is not IntType:
midvi = midv[i]
if lowv[i]>midvi or upv[i]<midvi:
result[i] = 0
return result
class ExistsPred(NontrivialEqPred):
"""EXISTS subquery."""
contains_aggregate = 0
def __init__(self, subq):
self.cached_result = None
self.cachable = None
self.subq = subq
def initargs(self):
return (self.subq,)
def domain(self):
result = self.subq.unbound()
# if there are no outer bindings, evaluate ONCE!
if not result:
self.cachable = 1
return result
def relbind(self, dict, db):
self.subq = self.subq.relbind(db, dict)
return self
def uncache(self):
self.cached_result = None
self.subq.uncache()
def __repr__(self):
return "\nEXISTS\n%s\n" % (self.subq,)
def __call__(self, assigns, toplevel=0):
### should optimize!!!
#print "exists"
#print self.subq
from types import IntType
tt = type
eval = self.subq.eval
result = assigns[:]
# shortcut: if cachable, eval only once and cache
if self.cachable:
test = self.cached_result
if test is None:
self.cached_result = test = eval()
#print "exists cached", self.cached_result
if test:
return result
else:
return [0] * len(result)
kjDict = kjbuckets.kjDict
for i in xrange(len(assigns)):
#print "exists uncached"
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
testbtup = BoundTuple()
testbtup.assns = kjDict(assignsi)
test = eval(outerboundtuple=testbtup).rows()
#for x in test:
#print "exists for", assignsi
#print x
#break
if not test:
result[i] = 0
return result
def __hash__(self):
return hash(self.subq)^3333
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
return cmp(self.subq, other.subq)
class QuantEQ(NontrivialEqPred):
"""Quantified equal any predicate"""
def __init__(self, expr, subq):
self.expr = expr
self.subq = subq
self.cachable = 0
self.cached_column = None
self.att = None
def initargs(self):
return (self.expr, self.subq)
def uncache(self):
self.cached_column = None
def domain(self):
first = self.subq.unbound()
if not first:
self.cachable = 1
more = self.expr.domain()
return first + more
def relbind(self, dict, db):
subq = self.subq = self.subq.relbind(db, dict)
self.expr = self.expr.relbind(dict, db)
# test that subquery is single column and determine att
sl = subq.select_list
atts = sl.attorder
if len(atts)<>1:
raise ValueError, \
"Quantified predicate requires unit select list: %s" % atts
self.att = atts[0]
return self
fmt = "(%s %s ANY %s)"
op = "="
def __repr__(self):
return self.fmt % (self.expr, self.op, self.subq)
def __call__(self, assigns, toplevel=0):
cached_column = self.cached_column
cachable = self.cachable
expr = self.expr
subq = self.subq
att = self.att
if cachable:
if cached_column is None:
subqr = subq.eval().rows()
cc = self.cached_column = dump_single_column(subqr, att)
#print self, "cached", self.cached_column
exprvals = expr.value(assigns)
kjDict = kjbuckets.kjDict
compare = self.compare
tt = type
from types import IntType
result = assigns[:]
for i in xrange(len(assigns)):
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
thisval = exprvals[i]
testbtup = BoundTuple()
testbtup.assns = kjDict(assignsi)
if not cachable:
subqr = subq.eval(outerboundtuple=testbtup).rows()
cc = dump_single_column(subqr, att)
#print self, "uncached", cc, thisval
if not compare(thisval, cc):
#print "eliminated", assignsi
result[i] = 0
return result
def compare(self, value, column):
return value in column
def __hash__(self):
return hash(self.subq) ^ ~hash(self.expr)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.expr, other.expr)
if test: return test
return cmp(self.subq, other.subq)
# "expr IN (subq)" same as "expr = ANY (subq)"
InPredicate = QuantEQ
class InLits(NontrivialEqPred):
"""expr IN literals, support dynamic bindings."""
def __init__(self, expr, lits):
self.expr = expr
self.lits = lits
self.cached_lits = None
def initargs(self):
return (self.expr, self.lits)
def uncache(self):
self.cached_lits = None
def domain(self):
d = []
for l in self.lits:
d0 = l.domain()
if d0:
d = d + d0.items()
d0 = self.expr.domain()
if d:
kjSet = kjbuckets.kjSet
return d0 + kjSet(d)
else:
return d0
def relbind(self, dict, db):
newlits = []
for l in self.lits:
newlits.append(l.relbind(dict, db))
self.lits = newlits
self.expr = self.expr.relbind(dict, db)
return self
fmt = "(%s IN %s)"
def __repr__(self):
return self.fmt % (self.expr, self.lits)
def __call__(self, assigns, toplevel=0):
# LITERALS ARE CONSTANT! NEED ONLY LOOK FOR ONE ASSIGN.
tt = type
from types import IntType
litvals = self.cached_lits
if litvals is None:
assigns0 = []
for asn in assigns:
if tt(asn) is not IntType:
assigns0.append(asn)
break
if not assigns0:
# all false/unknown
return assigns
litvals = []
for lit in self.lits:
value = lit.value(assigns0)
litvals.append(value[0])
self.cached_lits = litvals
expr = self.expr
exprvals = expr.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
assignsi = assigns[i]
if tt(assignsi) is IntType: continue
thisval = exprvals[i]
if thisval not in litvals:
#print "eliminated", assignsi
result[i] = 0
return result
def compare(self, value, column):
return value in column
def __hash__(self):
return 10 ^ hash(self.expr)
def __cmp__(self, other):
test = cmp(self.__class__, other.__class__)
if test: return test
test = cmp(self.expr, other.expr)
if test: return test
return cmp(self.lits, other.lits)
class QuantNE(QuantEQ):
"""Quantified not equal any predicate"""
op = "<>"
def compare(self, value, column):
for x in column:
if value!=x: return 1
return 0
### note: faster NOT IN using QuantNE?
class QuantLT(QuantEQ):
"""Quantified less than any predicate"""
op = "<"
def uncache(self):
self.testval = self.cached = self.cached_column = None
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = max(column)
self.cached = 1
else:
testval = max(column)
return value < testval
class QuantLE(QuantLT):
"""Quantified less equal any predicate"""
op = "<="
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = max(column)
self.cached = 1
else:
testval = max(column)
return value <= testval
class QuantGE(QuantLT):
"""Quantified greater equal any predicate"""
op = ">="
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
testval = self.testval = min(column)
self.cached = 1
else:
testval = min(column)
return value >= testval
class QuantGT(QuantLT):
"""Quantified greater than any predicate"""
op = ">"
def compare(self, value, column):
if self.cachable:
if self.cached:
testval = self.testval
else:
self.testval = testval = min(column)
self.cached = 1
else:
testval = min(column)
return value > testval
def dump_single_column(assigns, att):
"""dump single column assignment"""
result = assigns[:]
for i in xrange(len(result)):
result[i] = result[i][att]
return result
class LessPred(NontrivialEqPred):
op = "<"
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if tt(t) is not IntType and lv[i]>=rv[i]:
result[i] = 0
return result
def __inv__(self):
return LessEqPred(self.right, self.left)
def __hash__(self):
return hash(self.left)^hash(self.right)
class LessEqPred(LessPred):
op = "<="
def __call__(self, assigns, toplevel=0):
from types import IntType
tt = type
lv = self.left.value(assigns)
rv = self.right.value(assigns)
result = assigns[:]
for i in xrange(len(assigns)):
t = assigns[i]
if tt(t) is not IntType and lv[i]>rv[i]:
result[i] = 0
return result
def __inv__(self):
return LessPred(self.right, self.left)
class SubQueryExpression(BoundMinus, SimpleRecursive):
"""sub query expression (subq), must eval to single column, single value"""
def __init__(self, subq):
self.subq = subq
self.att = self.cachable = self.cached = self.cached_value = None
def initargs(self):
return (self.subq,)
def uncache(self):
self.cached = self.cached_value = None
def domain(self):
result = self.subq.unbound()
if not result:
self.cachable = 1
#print "expr subq domain", result
return result
def relbind(self, dict, db):
subq = self.subq = self.subq.relbind(db, dict)
# test that subquery is single column and determine att
sl = subq.select_list
atts = sl.attorder
if len(atts)<>1:
raise ValueError, \
"Quantified predicate requires unit select list: %s" % atts
self.att = atts[0]
return self
def __repr__(self):
return "(%s)" % self.subq
def value(self, contexts):
subq = self.subq
att = self.att
if self.cachable:
if self.cached:
cached_value = self.cached_value
else:
self.cached = 1
seval = subq.eval().rows()
lse = len(seval)
if lse<>1:
raise ValueError, \
"const subquery expression must return 1 result: got %s" % lse
self.cached_value = cached_value = seval[0][att]
#print "const subq cached", cached_value
return [cached_value] * len(contexts)
from types import IntType
tt = type
result = contexts[:]
kjDict = kjbuckets.kjDict
for i in xrange(len(contexts)):
contextsi = contexts[i]
if tt(contextsi) is not IntType:
testbtup = BoundTuple()
testbtup.assns = kjDict(contextsi)
#print "subq exp", testbtup
seval = subq.eval(outerboundtuple=testbtup).rows()
lse = len(seval)
if lse<>1:
raise ValueError, \
"dynamic subquery expression must return 1 result: got %s" % lse
result[i] = seval[0][att]
#print "nonconst subq uncached", result[i], contextsi
return result
SELECT_TEMPLATE = """\
SELECT %s %s
FROM %s
WHERE %s
GROUP BY %s
HAVING %s %s
ORDER BY %s %s
"""
def dynamic_binding(ndynamic, dynamic):
"""create bindings from dynamic tuple for ndynamic parameters
if a tuple is given create one
if a list is given create many
"""
from types import ListType, TupleType
if not dynamic:
if ndynamic>0:
raise ValueError, `ndynamic`+" dynamic parameters unbound"
return [kjbuckets.kjDict()]
ldyn = len(dynamic)
undumper = map(None, [0]*ndynamic, range(ndynamic))
undumper = tuple(undumper)
tdyn = type(dynamic)
if tdyn is TupleType:
ldyn = len(dynamic)
if len(dynamic)!=ndynamic:
raise ValueError, "%s,%s: wrong number of dynamics" % (ldyn,ndynamic)
dynamic = [dynamic]
elif tdyn is not ListType:
raise TypeError, "dynamic parameters must be list or tuple"
else:
lens = map(len, dynamic)
ndynamic = max(lens)
if ndynamic!=min(lens):
raise ValueError, "dynamic parameters of inconsistent lengths"
undumper = map(None, [0]*ndynamic, range(ndynamic))
undumper = tuple(undumper)
result = list(dynamic)
kjUndump = kjbuckets.kjUndump
for i in xrange(len(dynamic)):
dyn = dynamic[i]
ldyn = len(dyn)
#print undumper, dyn
if ldyn==1:
dynresult = kjUndump(undumper, dyn[0])
else:
dynresult = kjUndump(undumper, dyn)
result[i] = dynresult
return result
class Selector:
"""For implementing, eg the SQL SELECT statement."""
def __init__(self, alldistinct,
select_list,
table_reference_list,
where_pred,
group_list,
having_cond,
union_select =None,
order_by_spec =None,
ndynamic=0, # number of dyn params expected
):
self.ndynamic = ndynamic
self.alldistinct = alldistinct
self.select_list = select_list
self.table_list = table_reference_list
self.where_pred = where_pred
self.group_list = group_list
self.having_cond = having_cond
self.union_select = union_select
self.order_by = order_by_spec
#self.union_spec = "DISTINCT" # default union mode
self.relbindings = None # binding of relations
self.unbound_set = None # unbound attributes
self.rel_atts = None # graph of alias-->attname bound in self
self.all_aggregate = 0
if select_list!="*" and not group_list:
if select_list.contains_aggregate:
### should restore this check somewhere else!
#if select_list.contains_nonaggregate:
#raise ValueError, "aggregates/nonaggregates don't mix without grouping"
self.all_aggregate = 1
if where_pred and where_pred.contains_aggregate:
raise ValueError, "aggregate in WHERE"
self.query_plan = None
def initargs(self):
#print self.alldistinct
#print self.select_list
#print self.table_list
#print self.where_pred
#print self.having_cond
#print self.union_select
#print self.group_list
#print self.order_by
#print self.ndynamic
# note: order by requires special handling
return (self.alldistinct, self.select_list, self.table_list, self.where_pred,
None, self.having_cond, self.union_select, None,
self.ndynamic)
def marshaldata(self):
order_by = self.order_by
if order_by:
order_by = map(serialize, order_by)
group_list = self.group_list
if group_list:
group_list = map(serialize, group_list)
#print "marshaldata"
#print order_by
#print group_list
return (order_by, group_list)
def demarshal(self, data):
(order_by, group_list) = data
if order_by:
order_by = map(deserialize, order_by)
if group_list:
group_list = map(deserialize, group_list)
#print "demarshal"
#print order_by
#print group_list
self.order_by = order_by
self.group_list = group_list
def unbound(self):
result = self.unbound_set
if result is None:
raise ValueError, "binding not available"
return result
def uncache(self):
wp = self.where_pred
hc = self.having_cond
sl = self.select_list
if wp is not None: wp.uncache()
if hc is not None: hc.uncache()
sl.uncache()
qp = self.query_plan
if qp:
for joiner in qp:
joiner.uncache()
def relbind(self, db, outerbindings=None):
ad = self.alldistinct
sl = self.select_list
tl = self.table_list
wp = self.where_pred
gl = self.group_list
hc = self.having_cond
us = self.union_select
ob = self.order_by
test = db.bindings(tl)
#print len(test)
#for x in test:
#print x
(attbindings, relbindings, ambiguous, ambiguousatts) = test
if outerbindings:
# bind in outerbindings where unambiguous
for (a,r) in outerbindings.items():
if ((not attbindings.has_key(a))
and (not ambiguousatts.has_key(a)) ):
attbindings[a] = r
# fix "*" select list
if sl=="*":
sl = TupleCollector()
for (a,r) in attbindings.items():
sl.addbinding(None, BoundAttribute(r,a))
for (dotted, (r,a)) in ambiguous.items():
sl.addbinding(dotted, BoundAttribute(r,a))
sl = sl.relbind(attbindings, db)
wp = wp.relbind(attbindings, db)
if hc is not None: hc = hc.relbind(attbindings, db)
if us is not None: us = us.relbind(db, attbindings)
# bind grouping if present
if gl:
gl = relbind_sequence(gl, attbindings, db)
# bind ordering list if present
#print ob
if ob:
ob = relbind_sequence(ob, attbindings, db)
ob = orderbind_sequence(ob, sl.order)
result = Selector(ad, sl, tl, wp, gl, hc, us, ob)
result.relbindings = relbindings
result.ndynamic = self.ndynamic
result.check_domains()
result.plan_query()
query_plan = result.query_plan
for i in range(len(query_plan)):
query_plan[i] = query_plan[i].relbind(db, attbindings)
return result
def plan_query(self):
"""generate a query plan (sequence of join operators)."""
rel_atts = self.rel_atts # rel-->attname
where_pred = self.where_pred.detrivialize()
#select_list = self.select_list
# shortcut
if where_pred is None:
bt = BoundTuple()
else:
bt = self.where_pred.constraints
if bt is None:
bt = BoundTuple()
eqs = kjbuckets.kjGraph(bt.eqs)
witness = kjbuckets.kjDict()
# set all known and unbound atts as witnessed
for att in bt.assns.keys():
witness[att] = 1
#print self, "self.unbound_set", self.unbound_set
for att in self.unbound_set.items():
witness[att] = 1
relbindings = self.relbindings
allrels = relbindings.keys()
#print relbindings
allrels = bt.relorder(relbindings, allrels)
#print allrels
rel_atts = self.rel_atts
plan = []
for rel in allrels:
relation = relbindings[rel]
ratts = rel_atts.neighbors(rel)
h = HashJoiner(bt, rel, ratts, relation, witness)
plan.append(h)
for a in ratts:
ra = (rel, a)
witness[ra] = 1
eqs[ra] = ra
witness = witness.remap(eqs)
self.query_plan = plan
def check_domains(self):
"""determine set of unbound names in self.
"""
relbindings = self.relbindings
sl = self.select_list
wp = self.where_pred
gl = self.group_list
hc = self.having_cond
us = self.union_select
all = sl.domain().items()
if wp is not None:
all = all + wp.domain().items()
# ignore group_list ???
if hc is not None:
all = all + hc.domain().items()
kjSet = kjbuckets.kjSet
kjGraph = kjbuckets.kjGraph
alldomain = kjSet(all)
rel_atts = self.rel_atts = kjGraph(all)
allnames = kjSet()
#print "relbindings", relbindings.keys()
for name in relbindings.keys():
rel = relbindings[name]
for att in rel.attributes():
allnames[ (name, att) ] = 1
# union compatibility check
if us is not None:
us.check_domains()
myatts = self.attributes()
thoseatts = us.attributes()
if myatts!=thoseatts:
if len(myatts)!=len(thoseatts):
raise IndexError, "outer %s, inner %s: union select lists lengths differ"\
% (len(myatts), len(thoseatts))
for p in map(None, myatts, thoseatts):
(x,y)=p
if x!=y:
raise NameError, "%s union names don't match" % (p,)
self.unbound_set = alldomain - allnames
def attributes(self):
return self.select_list.attorder
def eval(self, dynamic=None, outerboundtuple=None):
"""leaves a lot to be desired.
dynamic and outerboundtuple are mutually
exclusive. dynamic is only pertinent to
top levels, outerboundtuple to subqueries"""
#print "select eval", dynamic, outerboundtuple
from gfdb0 import Relation0
# only uncache if outerboundtuple is None (not subquery)
# ???
if outerboundtuple is None:
self.uncache()
query_plan = self.query_plan
where_pred = self.where_pred.detrivialize()
select_list = self.select_list
# shortcut
if where_pred is not None and where_pred.false:
return Relation0(select_list.attorder, [])
#print "where_pred", where_pred
if where_pred is None or where_pred.constraints is None:
assn0 = assn1 = kjbuckets.kjDict()
else:
assn1 = self.where_pred.constraints.assns
assn0 = assn1 = kjbuckets.kjDict(assn1)
# erase stored results from possible previous evaluation
ndynamic = self.ndynamic
if outerboundtuple is not None:
assn1 = assn1 + outerboundtuple.assns
elif ndynamic:
dyn = dynamic_binding(ndynamic, dynamic)
if len(dyn)!=1:
raise ValueError, "only one dynamic subst for selection allowed"
dyn = dyn[0]
assn1 = assn1 + dyn
#print "dynamic", bt
#print "assn1", assn1
# check unbound names
unbound_set = self.unbound_set
#print "unbound", unbound_set
#print unbound_set
#print self.rel_atts
for pair in unbound_set.items():
if not assn1.has_key(pair):
raise KeyError, `pair`+": unbound in selection"
assn1 = (unbound_set * assn1) + assn0
#print "assn1 now", assn1
substseq = [assn1]
for h in query_plan:
#print "***"
#for x in substseq:
#print x
#print "***"
substseq = h.join(substseq)
if not substseq: break
#print "***"
#for x in substseq:
#print x
#print "***"
# apply the rest of the where predicate at top level
if substseq and where_pred is not None:
#where_pred.uncache()
substseq = where_pred(substseq, 1)
# eliminate zeros/nulls
substseq = no_ints_nulls(substseq)
# apply grouping if present
group_list = self.group_list
if substseq and group_list:
substseq = aggregate(substseq, group_list)
having_cond = self.having_cond
#print having_cond
if having_cond is not None:
#having_cond.uncache()
substseq = no_ints_nulls(having_cond(substseq))
elif self.all_aggregate:
# universal group
substseq = [kjbuckets.kjDict( [(None, substseq)] ) ]
(tups, attorder) = select_list.map(substseq)
# do UNION if present
union_select = self.union_select
if union_select is not None:
tups = union_select.eval(tups, dynamic, outerboundtuple)
# apply DISTINCT if appropriate
if self.alldistinct=="DISTINCT":
tups = kjbuckets.kjSet(tups).items()
# apply ordering if present
ob = self.order_by
if ob:
tups = order_tuples(ob, tups)
return Relation0(attorder, tups)
def __repr__(self):
ndyn = ""
if self.ndynamic:
ndyn = "\n[%s dynamic parameters]" % self.ndynamic
result = SELECT_TEMPLATE % (
self.alldistinct,
self.select_list,
self.table_list,
self.where_pred,
self.group_list,
self.having_cond,
#union_spec,
self.union_select,
self.order_by,
ndyn
)
return result
class Union(SimpleRecursive):
"""union clause."""
def __init__(self, alldistinct, selection):
self.alldistinct = alldistinct
self.selection = selection
def initargs(self):
return (self.alldistinct, self.selection)
def unbound(self):
return self.selection.unbound()
def relbind(self, db, outer=None):
self.selection = self.selection.relbind(db, outer)
return self
def check_domains(self):
self.selection.check_domains()
def attributes(self):
return self.selection.attributes()
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
allrows = rows + assns
if self.alldistinct=="DISTINCT":
allrows = kjbuckets.kjSet(allrows).items()
return allrows
def __repr__(self):
return "\nUNION %s %s " % (self.alldistinct, self.selection)
class Intersect(Union):
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
kjSet = kjbuckets.kjSet
allrows = (kjSet(assns) & kjSet(rows)).items()
return allrows
op = "INTERSECT"
def __repr__(self):
return "\n%s %s" % (self.op, self.selection)
class Except(Union):
def eval(self, assns, dyn=None, outer=None):
r = self.selection.eval(dyn, outer)
rows = r.rows()
kjSet = kjbuckets.kjSet
allrows = (kjSet(assns) - kjSet(rows)).items()
return allrows
op = "EXCEPT"
class Parse_Context:
"""contextual information for parsing
p.param() returns a new sequence number for external parameter.
"""
# not serializable
parameter_index = 0
# no __init__ yet
def param(self):
temp = self.parameter_index
self.parameter_index = temp+1
return temp
def ndynamic(self):
return self.parameter_index
# update/delete/insert statements
import sqlmod
CreateTable = sqlmod.CreateTable
CreateIndex = sqlmod.CreateIndex
DropIndex = sqlmod.DropIndex
DropTable = sqlmod.DropTable
UpdateOp = sqlmod.UpdateOp
DeleteOp = sqlmod.DeleteOp
InsertOp = sqlmod.InsertOp
InsertValues = sqlmod.InsertValues
InsertSubSelect = sqlmod.InsertSubSelect
ColumnDef = sqlmod.ColumnDef
CreateView = sqlmod.CreateView
DropView = sqlmod.DropView
# update storage structures from gfdb0
import gfdb0
Add_Tuples = gfdb0.Add_Tuples
Erase_Tuples = gfdb0.Erase_Tuples
Reset_Tuples = gfdb0.Reset_Tuples
####### testing
# test helpers
#def tp(**kw):
# return maketuple(kw)
#def st(**kw):
# return BTPredicate(BoundTuple(r=kw))
'''this module indicates where the sql datastructures are marshalled
Auto generated on install: better not touch!
'''
filename = '/ext/dev/users/jim2/lib/python/Products/AqueductGadfly/gadfly/sql.mar'
<!--#var standard_html_header-->
<!--#var TABLE_TYPE--><!--#if TABLE_OWNER-->
owned by <!--#var TABLE_OWNER--><!--#/if-->
<!--#if REMARKS--><br><!--#var REMARKS--><!--#/if-->
<!--#var standard_html_footer-->
<!--#var standard_html_header-->
<a href="tableNamed/<!--#var Name-->/manage_designInput">Design Input *</a>
<a href="tableNamed/<!--#var Name-->/manage_designUpdate">Design Update *</a>
<a href="tableNamed/<!--#var Name-->/manage_designDelete">Design Delete</a>
<!--#var standard_html_footer-->
<html>
<head><title><!--#var title_or_id--> tables</title></head>
<body bgcolor="#FFFFFF" link="#000099" vlink="#555555" alink="#77003B">
<!--#var manage_tabs-->
<h2><!--#var title_or_id--> tables</h2>
<form action="manage_wizard" method="POST">
<table cellspacing="2">
<tr>
<th align="LEFT" valign="TOP">Available tables</th>
<td align="LEFT" valign="TOP">
<select name="tables:list" size=9 multiple>
<!--#in table_info-->
<option value="<!--#var sequence-key-->">
<!--#var sequence-key--> <!--#var sequence-item-->
</option>
<!--#/in-->
</select>
</td>
</tr>
<tr>
<th align="LEFT" valign="TOP">Statement type</th>
<td align="LEFT" valign="TOP">
<select name="statement">
<option>SELECT</option>
<option>INSERT</option>
<option>UPDATE</option>
</select>
</td>
</tr>
<tr>
<td></td>
<td><br><input type="SUBMIT" value="Generate SQL"></td>
</tr>
</table>
</form>
</body>
</html>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment