Mailing List Archive: Python-list digest, Vol 1 #455

* Matej Cepl
|
| That was exactly my question: how to make filter with htmllib.py
| doing nothing and just copying HTML input on output?

Here's a simple implementation. Note that you'll need to escape all
'<'s and '&'s in element content and in attribute values. And in
attribute values you'll also need to escape '"'s.

import sys,htmllib

class IDTransform(htmllib.HTMLParser):

def __init__(self,out=sys.stdout):
htmllib.HTMLParser.__init__(self,None)
self.out=out

def handle_pi(self,data):
self.out.write("<?%s>\n" % data)

def handle_starttag(self,tag,method,attributes):
self.out.write("<%s " % tag)
for pair in attributes:
self.out.write("%s=\"%s\"" % pair)
self.out.write(">\n")

def handle_endtag(self,tag,method):
self.out.write("</%s>\n" % tag)

def unknown_entityref(self,name):
self.out.write("&%s;" % name)

def unknown_charref(self,no):
self.out.write("&#%s;" % no)

def handle_data(self,data):
self.out.write(data)

--Lars M.