Mailing List Archive

example checkers based on compiler package
There was some discussion on python-dev over the weekend about
generating warnings, and Moshe Zadke posted a selfnanny that warned
about methods that didn't have self as the first argument.

I think these kinds of warnings are useful, and I'd like to see a more
general framework for them built are Python abstract syntax originally
from P2C. Ideally, they would be available as command line tools and
integrated into GUIs like IDLE in some useful way.

I've included a couple of quick examples I coded up last night based
on the compiler package (recently re-factored) that is resident in
python/nondist/src/Compiler. The analysis on the one that checks for
name errors is a bit of a mess, but the overall structure seems right.

I'm hoping to collect a few more examples of checkers and generalize
from them to develop a framework for checking for errors and reporting
them.

Jeremy

------------ checkself.py ------------
"""Check for methods that do not have self as the first argument"""

from compiler import parseFile, walk, ast, misc

class Warning:
def __init__(self, filename, klass, method, lineno, msg):
self.filename = filename
self.klass = klass
self.method = method
self.lineno = lineno
self.msg = msg

_template = "%(filename)s:%(lineno)s %(klass)s.%(method)s: %(msg)s"

def __str__(self):
return self._template % self.__dict__

class NoArgsWarning(Warning):
super_init = Warning.__init__

def __init__(self, filename, klass, method, lineno):
self.super_init(filename, klass, method, lineno,
"no arguments")

class NotSelfWarning(Warning):
super_init = Warning.__init__

def __init__(self, filename, klass, method, lineno, argname):
self.super_init(filename, klass, method, lineno,
"self slot is named %s" % argname)

class CheckSelf:
def __init__(self, filename):
self.filename = filename
self.warnings = []
self.scope = misc.Stack()

def inClass(self):
if self.scope:
return isinstance(self.scope.top(), ast.Class)
return 0

def visitClass(self, klass):
self.scope.push(klass)
self.visit(klass.code)
self.scope.pop()
return 1

def visitFunction(self, func):
if self.inClass():
classname = self.scope.top().name
if len(func.argnames) == 0:
w = NoArgsWarning(self.filename, classname, func.name,
func.lineno)
self.warnings.append(w)
elif func.argnames[0] != "self":
w = NotSelfWarning(self.filename, classname, func.name,
func.lineno, func.argnames[0])
self.warnings.append(w)
self.scope.push(func)
self.visit(func.code)
self.scope.pop()
return 1

def check(filename):
global p, check
p = parseFile(filename)
check = CheckSelf(filename)
walk(p, check)
for w in check.warnings:
print w

if __name__ == "__main__":
import sys

# XXX need to do real arg processing
check(sys.argv[1])

------------ badself.py ------------
def foo():
return 12

class Foo:
def __init__():
pass

def foo(self, foo):
pass

def bar(this, that):
def baz(this=that):
return this
return baz

def bar():
class Quux:
def __init__(self):
self.sum = 1
def quam(x, y):
self.sum = self.sum + (x * y)
return Quux()

------------ checknames.py ------------
"""Check for NameErrors"""

from compiler import parseFile, walk
from compiler.misc import Stack, Set

import __builtin__
from UserDict import UserDict

class Warning:
def __init__(self, filename, funcname, lineno):
self.filename = filename
self.funcname = funcname
self.lineno = lineno

def __str__(self):
return self._template % self.__dict__

class UndefinedLocal(Warning):
super_init = Warning.__init__

def __init__(self, filename, funcname, lineno, name):
self.super_init(filename, funcname, lineno)
self.name = name

_template = "%(filename)s:%(lineno)s %(funcname)s undefined local %(name)s"

class NameError(UndefinedLocal):
_template = "%(filename)s:%(lineno)s %(funcname)s undefined name %(name)s"

class NameSet(UserDict):
"""Track names and the line numbers where they are referenced"""
def __init__(self):
self.data = self.names = {}

def add(self, name, lineno):
l = self.names.get(name, [])
l.append(lineno)
self.names[name] = l

class CheckNames:
def __init__(self, filename):
self.filename = filename
self.warnings = []
self.scope = Stack()
self.gUse = NameSet()
self.gDef = NameSet()
# _locals is the stack of local namespaces
# locals is the top of the stack
self._locals = Stack()
self.lUse = None
self.lDef = None
self.lGlobals = None # var declared global
# holds scope,def,use,global triples for later analysis
self.todo = []

def enterNamespace(self, node):
## print node.name
self.scope.push(node)
self.lUse = use = NameSet()
self.lDef = _def = NameSet()
self.lGlobals = gbl = NameSet()
self._locals.push((use, _def, gbl))

def exitNamespace(self):
## print
self.todo.append((self.scope.top(), self.lDef, self.lUse,
self.lGlobals))
self.scope.pop()
self._locals.pop()
if self._locals:
self.lUse, self.lDef, self.lGlobals = self._locals.top()
else:
self.lUse = self.lDef = self.lGlobals = None

def warn(self, warning, funcname, lineno, *args):
args = (self.filename, funcname, lineno) + args
self.warnings.append(apply(warning, args))

def defName(self, name, lineno, local=1):
## print "defName(%s, %s, local=%s)" % (name, lineno, local)
if self.lUse is None:
self.gDef.add(name, lineno)
elif local == 0:
self.gDef.add(name, lineno)
self.lGlobals.add(name, lineno)
else:
self.lDef.add(name, lineno)

def useName(self, name, lineno, local=1):
## print "useName(%s, %s, local=%s)" % (name, lineno, local)
if self.lUse is None:
self.gUse.add(name, lineno)
elif local == 0:
self.gUse.add(name, lineno)
self.lUse.add(name, lineno)
else:
self.lUse.add(name, lineno)

def check(self):
for s, d, u, g in self.todo:
self._check(s, d, u, g, self.gDef)
# XXX then check the globals

def _check(self, scope, _def, use, gbl, globals):
# check for NameError
# a name is defined iff it is in def.keys()
# a name is global iff it is in gdefs.keys()
gdefs = UserDict()
gdefs.update(globals)
gdefs.update(__builtin__.__dict__)
defs = UserDict()
defs.update(gdefs)
defs.update(_def)
errors = Set()
for name in use.keys():
if not defs.has_key(name):
firstuse = use[name][0]
self.warn(NameError, scope.name, firstuse, name)
errors.add(name)

# check for UndefinedLocalNameError
# order == use & def sorted by lineno
# elements are lineno, flag, name
# flag = 0 if use, flag = 1 if def
order = []
for name, lines in use.items():
if gdefs.has_key(name) and not _def.has_key(name):
# this is a global ref, we can skip it
continue
for lineno in lines:
order.append(lineno, 0, name)
for name, lines in _def.items():
for lineno in lines:
order.append(lineno, 1, name)
order.sort()
# ready contains names that have been defined or warned about
ready = Set()
for lineno, flag, name in order:
if flag == 0: # use
if not ready.has_elt(name) and not errors.has_elt(name):
self.warn(UndefinedLocal, scope.name, lineno, name)
ready.add(name) # don't warn again
else:
ready.add(name)

# below are visitor methods


def visitFunction(self, node, noname=0):
for expr in node.defaults:
self.visit(expr)
if not noname:
self.defName(node.name, node.lineno)
self.enterNamespace(node)
for name in node.argnames:
self.defName(name, node.lineno)
self.visit(node.code)
self.exitNamespace()
return 1

def visitLambda(self, node):
return self.visitFunction(node, noname=1)

def visitClass(self, node):
for expr in node.bases:
self.visit(expr)
self.defName(node.name, node.lineno)
self.enterNamespace(node)
self.visit(node.code)
self.exitNamespace()
return 1

def visitName(self, node):
self.useName(node.name, node.lineno)

def visitGlobal(self, node):
for name in node.names:
self.defName(name, node.lineno, local=0)

def visitImport(self, node):
for name in node.names:
self.defName(name, node.lineno)

visitFrom = visitImport

def visitAssName(self, node):
self.defName(node.name, node.lineno)

def check(filename):
global p, checker
p = parseFile(filename)
checker = CheckNames(filename)
walk(p, checker)
checker.check()
for w in checker.warnings:
print w

if __name__ == "__main__":
import sys

# XXX need to do real arg processing
check(sys.argv[1])

------------ badnames.py ------------
# XXX can we detect race conditions on accesses to global variables?
# probably can (conservatively) by noting variables _created_ by
# global decls in funcs
import string
import time

def foo(x):
return x + y

def foo2(x):
return x + z

a = 4

def foo3(x):
a, b = x, a

def bar(x):
z = x
global z

def bar2(x):
f = string.strip
a = f(x)
import string
return string.lower(a)

def baz(x, y):
return x + y + z

def outer(x):
def inner(y):
return x + y
return inner
Re: example checkers based on compiler package [ In reply to ]
On Mon, 6 Mar 2000, Jeremy Hylton wrote:

> I think these kinds of warnings are useful, and I'd like to see a more
> general framework for them built are Python abstract syntax originally
> from P2C. Ideally, they would be available as command line tools and
> integrated into GUIs like IDLE in some useful way.

Yes! Guido already suggested we have a standard API to them. One thing
I suggested was that the abstract API include not only the input (one form
or another of an AST), but the output: so IDE's wouldn't have to parse
strings, but get a warning class. Something like a:

An output of a warning can be a subclass of GeneralWarning, and should
implemented the following methods:

1. line-no() -- returns an integer
2. columns() -- returns either a pair of integers, or None
3. message() -- returns a string containing a message
4. __str__() -- comes for free if inheriting GeneralWarning,
and formats the warning message.

> I've included a couple of quick examples I coded up last night based
> on the compiler package (recently re-factored) that is resident in
> python/nondist/src/Compiler. The analysis on the one that checks for
> name errors is a bit of a mess, but the overall structure seems right.

One thing I had trouble with is that in my implementation of selfnanny,
I used Python's stack for recursion while you used an explicit stack.
It's probably because of the visitor pattern, which is just another
argument for co-routines and generators.

> I'm hoping to collect a few more examples of checkers and generalize
> from them to develop a framework for checking for errors and reporting
> them.

Cool!
Brainstorming: what kind of warnings would people find useful? In
selfnanny, I wanted to include checking for assigment to self, and
checking for "possible use before definition of local variables" sounds
good. Another check could be a CP4E "checking that no two identifiers
differ only by case". I might code up a few if I have the time...

What I'd really want (but it sounds really hard) is a framework for
partial ASTs: warning people as they write code.

--
Moshe Zadka <mzadka@geocities.com>.
http://www.oreilly.com/news/prescod_0300.html
Re: [Compiler-sig] Re: example checkers based on compiler package [ In reply to ]
Moshe Zadka <moshez@math.huji.ac.il> writes:

> On Mon, 6 Mar 2000, Jeremy Hylton wrote:
>
> > I think these kinds of warnings are useful, and I'd like to see a more
> > general framework for them built are Python abstract syntax originally
> > from P2C. Ideally, they would be available as command line tools and
> > integrated into GUIs like IDLE in some useful way.
>
> Yes! Guido already suggested we have a standard API to them. One thing
> I suggested was that the abstract API include not only the input (one form
> or another of an AST), but the output: so IDE's wouldn't have to parse
> strings, but get a warning class.

That would be seriously cool.

> Something like a:
>
> An output of a warning can be a subclass of GeneralWarning, and should
> implemented the following methods:
>
> 1. line-no() -- returns an integer
> 2. columns() -- returns either a pair of integers, or None
> 3. message() -- returns a string containing a message
> 4. __str__() -- comes for free if inheriting GeneralWarning,
> and formats the warning message.

Wouldn't it make sense to include function/class name here too? A
checker is likely to now, and it would save reparsing to find it out.

[little snip]

> > I'm hoping to collect a few more examples of checkers and generalize
> > from them to develop a framework for checking for errors and reporting
> > them.
>
> Cool!
> Brainstorming: what kind of warnings would people find useful? In
> selfnanny, I wanted to include checking for assigment to self, and
> checking for "possible use before definition of local variables" sounds
> good. Another check could be a CP4E "checking that no two identifiers
> differ only by case". I might code up a few if I have the time...

Is there stuff in the current Compiler code to do control flow
analysis? You'd need that to check for use before definition in
meaningful cases, and also if you ever want to do any optimisation...

> What I'd really want (but it sounds really hard) is a framework for
> partial ASTs: warning people as they write code.

I agree (on both points).

Cheers,
M.

--
very few people approach me in real life and insist on proving they are
drooling idiots. -- Erik Naggum, comp.lang.lisp