Mailing List Archive

Please add to CVS: Improved HTMLParser.jj
I've added these features to the latest version of HTMLParser:

* Support for parsing metatags - new method getMetaTags()
* Fix to ignore inline <Style> tags

The "<Style>" fix sorts out the problem where a document's summary would end
up consisting of just CSS declarations - these are now ignored in the same way
as "<script>" declarations.
Tested out OK parsing over 1000 html docs

Thanks in advance
Mark


============CODE BEGINS ==============================

/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/

// HTMLParser.jj

options {
STATIC = false;
OPTIMIZE_TOKEN_MANAGER = true;
//DEBUG_LOOKAHEAD = true;
//DEBUG_TOKEN_MANAGER = true;
}

PARSER_BEGIN(HTMLParser)

package org.apache.lucene.demo.html;

import java.io.*;
import java.util.Properties;

public class HTMLParser {
public static int SUMMARY_LENGTH = 200;

StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
Properties metaTags=new Properties();
String currentMetaTag="";
int length = 0;
boolean titleComplete = false;
boolean inTitle = false;
boolean inMetaTag = false;
boolean inStyle = false;
boolean inScript = false;
boolean afterTag = false;
boolean afterSpace = false;
String eol = System.getProperty("line.separator");
PipedReader pipeIn = null;
PipedWriter pipeOut;

public HTMLParser(File file) throws FileNotFoundException {
this(new FileInputStream(file));
}

public String getTitle() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || (length > SUMMARY_LENGTH))
break;
wait(10);
}
}
return title.toString().trim();
}

public Properties getMetaTags() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || (length > SUMMARY_LENGTH))
break;
wait(10);
}
}
return metaTags;
}


public String getSummary() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (summary.length() >= SUMMARY_LENGTH)
break;
wait(10);
}
}
if (summary.length() > SUMMARY_LENGTH)
summary.setLength(SUMMARY_LENGTH);

String sum = summary.toString().trim();
String tit = getTitle();
if (sum.startsWith(tit))
return sum.substring(tit.length());
else
return sum;
}

public Reader getReader() throws IOException {
if (pipeIn == null) {
pipeIn = new PipedReader();
pipeOut = new PipedWriter(pipeIn);

Thread thread = new ParserThread(this);
thread.start(); // start parsing
}

return pipeIn;
}

void addToSummary(String text) {
if (summary.length() < SUMMARY_LENGTH) {
summary.append(text);
if (summary.length() >= SUMMARY_LENGTH) {
synchronized(this) {
notifyAll();
}
}
}
}

void addText(String text) throws IOException {
if (inScript)
return;
if (inStyle)
return;
if (inMetaTag)
{
metaTags.setProperty(currentMetaTag, text);
return;
}
if (inTitle)
title.append(text);
else {
addToSummary(text);
if (!titleComplete && !title.equals("")) { // finished title
synchronized(this) {
titleComplete = true; // tell waiting threads
notifyAll();
}
}
}

length += text.length();
pipeOut.write(text);

afterSpace = false;
}

void addSpace() throws IOException {
if (inScript)
return;
if (!afterSpace) {
if (inTitle)
title.append(" ");
else
addToSummary(" ");

String space = afterTag ? eol : " ";
length += space.length();
pipeOut.write(space);
afterSpace = true;
}
}

// void handleException(Exception e) {
// System.out.println(e.toString()); // print the error message
// System.out.println("Skipping...");
// Token t;
// do {
// t = getNextToken();
// } while (t.kind != TagEnd);
// }
}

PARSER_END(HTMLParser)


void HTMLDocument() throws IOException :
{
Token t;
}
{
// try {
( Tag() { afterTag = true; }
| t=Decl() { afterTag = true; }
| CommentTag() { afterTag = true; }
| t=<Word> { addText(t.image); afterTag = false; }
| t=<Entity> { addText(Entities.decode(t.image)); afterTag = false; }
| t=<Punct> { addText(t.image); afterTag = false; }
| <Space> { addSpace(); afterTag = false; }
)* <EOF>
// } catch (ParseException e) {
// handleException(e);
// }
}

void Tag() throws IOException :
{
Token t1, t2;
boolean inImg = false;
}
{
t1=<TagName> {
inTitle = t1.image.equalsIgnoreCase("<title"); // keep track if in <TITLE>
inMetaTag = t1.image.equalsIgnoreCase("<META"); // keep track if in <META>
inStyle = t1.image.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
inImg = t1.image.equalsIgnoreCase("<img"); // keep track if in <IMG>
if (inScript) { // keep track if in <SCRIPT>
inScript = !t1.image.equalsIgnoreCase("</script");
} else {
inScript = t1.image.equalsIgnoreCase("<script");
}
}
(t1=<ArgName>
(<ArgEquals>
(t2=ArgValue() // save ALT text in IMG tag
{
if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
addText("[" + t2.image + "]");

if(inMetaTag &&
( t1.image.equalsIgnoreCase("name") ||
t1.image.equalsIgnoreCase("HTTP-EQUIV")
)
&& t2 != null)
{
currentMetaTag=t2.image.toLowerCase();
}
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null)
{
addText(t2.image);
}
}
)?
)?
)*
<TagEnd>
}

Token ArgValue() :
{
Token t = null;
}
{
t=<ArgValue> { return t; }
| LOOKAHEAD(2)
<ArgQuote1> <CloseQuote1> { return t; }
| <ArgQuote1> t=<Quote1Text> <CloseQuote1> { return t; }
| LOOKAHEAD(2)
<ArgQuote2> <CloseQuote2> { return t; }
| <ArgQuote2> t=<Quote2Text> <CloseQuote2> { return t; }
}


Token Decl() :
{
Token t;
}
{
t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
{ return t; }
}


void CommentTag() :
{}
{
(<Comment1> ( <CommentText1> )* <CommentEnd1>)
|
(<Comment2> ( <CommentText2> )* <CommentEnd2>)
}


TOKEN :
{
< TagName: "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
| < DeclName: "<" "!" ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag

| < Comment1: "<!--" > : WithinComment1
| < Comment2: "<!" > : WithinComment2

| < Word: ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
<LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
| < #LET: ["A"-"Z","a"-"z","0"-"9"] >
| < #NUM: ["0"-"9"] >

| < Entity: ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? ) >

| < Space: (<SP>)+ >
| < #SP: [" ","\t","\r","\n"] >

| < Punct: ~[] > // Keep this last. It is a catch-all.
}


<WithinTag> TOKEN:
{
< ArgName: (~[" ","\t","\r","\n","=",">","'","\""])
(~[" ","\t","\r","\n","=",">"])* >
| < ArgEquals: "=" > : AfterEquals
| < TagEnd: ">" | "=>" > : DEFAULT
}

<AfterEquals> TOKEN:
{
< ArgValue: (~[" ","\t","\r","\n","=",">","'","\""])
(~[" ","\t","\r","\n",">"])* > : WithinTag
}

<WithinTag, AfterEquals> TOKEN:
{
< ArgQuote1: "'" > : WithinQuote1
| < ArgQuote2: "\"" > : WithinQuote2
}

<WithinTag, AfterEquals> SKIP:
{
< <Space> >
}

<WithinQuote1> TOKEN:
{
< Quote1Text: (~["'"])+ >
| < CloseQuote1: <ArgQuote1> > : WithinTag
}

<WithinQuote2> TOKEN:
{
< Quote2Text: (~["\""])+ >
| < CloseQuote2: <ArgQuote2> > : WithinTag
}


<WithinComment1> TOKEN :
{
< CommentText1: (~["-"])+ | "-" >
| < CommentEnd1: "-->" > : DEFAULT
}

<WithinComment2> TOKEN :
{
< CommentText2: (~[">"])+ >
| < CommentEnd2: ">" > : DEFAULT
}


--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>
Re: Please add to CVS: Improved HTMLParser.jj [ In reply to ]
Thanks for the contribution.

I haven't tested it (I don't have any ready code to do it), but I've
made a diff (attached for those who prefer that), looked over it, run
'ant demo', and got no errors, so if anyone sees any problems please
yell.

Otis


--- Mark Harwood <markharwood@totalise.co.uk> wrote:
> I've added these features to the latest version of HTMLParser:
>
> * Support for parsing metatags - new method getMetaTags()
> * Fix to ignore inline <Style></style> tags
>
> The "<Style></style>" fix sorts out the problem where a document's
summary
> would end
> up consisting of just CSS declarations - these are now ignored in the
> same way
> as "<script>" declarations.
> Tested out OK parsing over 1000 html docs
>
> Thanks in advance
> Mark
>
>
> ============CODE BEGINS ==============================
>
> /*
> ====================================================================
> * The Apache Software License, Version 1.1
> *
> * Copyright (c) 2001 The Apache Software Foundation. All rights
> * reserved.
> *
> * Redistribution and use in source and binary forms, with or without
> * modification, are permitted provided that the following conditions
> * are met:
> *
> * 1. Redistributions of source code must retain the above copyright
> * notice, this list of conditions and the following disclaimer.
> *
> * 2. Redistributions in binary form must reproduce the above
> copyright
> * notice, this list of conditions and the following disclaimer in
> * the documentation and/or other materials provided with the
> * distribution.
> *
> * 3. The end-user documentation included with the redistribution,
> * if any, must include the following acknowledgment:
> * "This product includes software developed by the
> * Apache Software Foundation (http://www.apache.org/)."
> * Alternately, this acknowledgment may appear in the software
> itself,
> * if and wherever such third-party acknowledgments normally
> appear.
> *
> * 4. The names "Apache" and "Apache Software Foundation" and
> * "Apache Lucene" must not be used to endorse or promote products
> * derived from this software without prior written permission.
> For
> * written permission, please contact apache@apache.org.
> *
> * 5. Products derived from this software may not be called "Apache",
> * "Apache Lucene", nor may "Apache" appear in their name, without
> * prior written permission of the Apache Software Foundation.
> *
> * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
> * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
> * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND
> * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> * SUCH DAMAGE.
> *
> ====================================================================
> *
> * This software consists of voluntary contributions made by many
> * individuals on behalf of the Apache Software Foundation. For more
> * information on the Apache Software Foundation, please see
> * <http://www.apache.org/>.
> */
>
> // HTMLParser.jj
>
> options {
> STATIC = false;
> OPTIMIZE_TOKEN_MANAGER = true;
> //DEBUG_LOOKAHEAD = true;
> //DEBUG_TOKEN_MANAGER = true;
> }
>
> PARSER_BEGIN(HTMLParser)
>
> package org.apache.lucene.demo.html;
>
> import java.io.*;
> import java.util.Properties;
>
> public class HTMLParser {
> public static int SUMMARY_LENGTH = 200;
>
> StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
> StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
> Properties metaTags=new Properties();
> String currentMetaTag="";
> int length = 0;
> boolean titleComplete = false;
> boolean inTitle = false;
> boolean inMetaTag = false;
> boolean inStyle = false;
> boolean inScript = false;
> boolean afterTag = false;
> boolean afterSpace = false;
> String eol = System.getProperty("line.separator");
> PipedReader pipeIn = null;
> PipedWriter pipeOut;
>
> public HTMLParser(File file) throws FileNotFoundException {
> this(new FileInputStream(file));
> }
>
> public String getTitle() throws IOException, InterruptedException {
> if (pipeIn == null)
> getReader(); // spawn parsing thread
> while (true) {
> synchronized(this) {
> if (titleComplete || (length > SUMMARY_LENGTH))
> break;
> wait(10);
> }
> }
> return title.toString().trim();
> }
>
> public Properties getMetaTags() throws IOException,
> InterruptedException {
> if (pipeIn == null)
> getReader(); // spawn parsing thread
> while (true) {
> synchronized(this) {
> if (titleComplete || (length > SUMMARY_LENGTH))
> break;
> wait(10);
> }
> }
> return metaTags;
> }
>
>
> public String getSummary() throws IOException, InterruptedException
> {
> if (pipeIn == null)
> getReader(); // spawn parsing thread
> while (true) {
> synchronized(this) {
> if (summary.length() >= SUMMARY_LENGTH)
> break;
> wait(10);
> }
> }
> if (summary.length() > SUMMARY_LENGTH)
> summary.setLength(SUMMARY_LENGTH);
>
> String sum = summary.toString().trim();
> String tit = getTitle();
> if (sum.startsWith(tit))
> return sum.substring(tit.length());
> else
> return sum;
> }
>
> public Reader getReader() throws IOException {
> if (pipeIn == null) {
> pipeIn = new PipedReader();
> pipeOut = new PipedWriter(pipeIn);
>
> Thread thread = new ParserThread(this);
> thread.start(); // start parsing
> }
>
> return pipeIn;
> }
>
> void addToSummary(String text) {
> if (summary.length() < SUMMARY_LENGTH) {
> summary.append(text);
> if (summary.length() >= SUMMARY_LENGTH) {
> synchronized(this) {
> notifyAll();
> }
> }
> }
> }
>
> void addText(String text) throws IOException {
> if (inScript)
> return;
> if (inStyle)
> return;
> if (inMetaTag)
> {
> metaTags.setProperty(currentMetaTag, text);
> return;
> }
> if (inTitle)
> title.append(text);
> else {
> addToSummary(text);
> if (!titleComplete && !title.equals("")) { // finished title
> synchronized(this) {
> titleComplete = true; // tell waiting threads
> notifyAll();
> }
> }
> }
>
> length += text.length();
> pipeOut.write(text);
>
> afterSpace = false;
> }
>
> void addSpace() throws IOException {
> if (inScript)
> return;
> if (!afterSpace) {
> if (inTitle)
> title.append(" ");
> else
> addToSummary(" ");
>
> String space = afterTag ? eol : " ";
> length += space.length();
> pipeOut.write(space);
> afterSpace = true;
> }
> }
>
> // void handleException(Exception e) {
> // System.out.println(e.toString()); // print the error message
> // System.out.println("Skipping...");
> // Token t;
> // do {
> // t = getNextToken();
> // } while (t.kind != TagEnd);
> // }
> }
>
> PARSER_END(HTMLParser)
>
>
> void HTMLDocument() throws IOException :
> {
> Token t;
> }
> {
> // try {
> ( Tag() { afterTag = true; }
> | t=Decl() { afterTag = true; }
> | CommentTag() { afterTag = true; }
> | t=<Word> { addText(t.image); afterTag = false; }
> | t=<Entity> { addText(Entities.decode(t.image)); afterTag =
> false; }
> | t=<Punct> { addText(t.image); afterTag = false; }
> | <Space> { addSpace(); afterTag = false; }
> )* <EOF>
> // } catch (ParseException e) {
> // handleException(e);
> // }
> }
>
> void Tag() throws IOException :
> {
> Token t1, t2;
> boolean inImg = false;
> }
> {
> t1=<TagName> {
> inTitle = t1.image.equalsIgnoreCase("<title"); // keep track if
> in <TITLE>
> inMetaTag = t1.image.equalsIgnoreCase("<META"); // keep track if
> in <META>
> inStyle = t1.image.equalsIgnoreCase("<STYLE"); // keep track if
> in <STYLE>
> inImg = t1.image.equalsIgnoreCase("<img"); // keep track if in
> <IMG>
> if (inScript) { // keep track if in <SCRIPT>
> inScript = !t1.image.equalsIgnoreCase("</script");
> } else {
> inScript = t1.image.equalsIgnoreCase("<script");
> }
> }
> (t1=<ArgName>
> (<ArgEquals>
> (t2=ArgValue() // save ALT text in IMG tag
> {
> if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
> addText("[" + t2.image + "]");
>
> if(inMetaTag &&
> ( t1.image.equalsIgnoreCase("name") ||
> t1.image.equalsIgnoreCase("HTTP-EQUIV")
> )
> && t2 != null)
> {
> currentMetaTag=t2.image.toLowerCase();
> }
> if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
> null)
> {
> addText(t2.image);
> }
> }
> )?
> )?
> )*
> <TagEnd>
> }
>
> Token ArgValue() :
> {
> Token t = null;
> }
> {
> t=<ArgValue> { return t; }
> | LOOKAHEAD(2)
> <ArgQuote1> <CloseQuote1> { return t; }
> | <ArgQuote1> t=<Quote1Text> <CloseQuote1> { return t; }
> | LOOKAHEAD(2)
> <ArgQuote2> <CloseQuote2> { return t; }
> | <ArgQuote2> t=<Quote2Text> <CloseQuote2> { return t; }
> }
>
>
> Token Decl() :
> {
> Token t;
> }
> {
> t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
> { return t; }
> }
>
>
> void CommentTag() :
> {}
> {
> (<Comment1> ( <CommentText1> )* <CommentEnd1>)
> |
> (<Comment2> ( <CommentText2> )* <CommentEnd2>)
> }
>
>
> TOKEN :
> {
> < TagName: "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
> | < DeclName: "<" "!" ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
>
> | < Comment1: "<!--" > : WithinComment1
> | < Comment2: "<!" > : WithinComment2
>
> | < Word: ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
> <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM>
> )+ >
> | < #LET: ["A"-"Z","a"-"z","0"-"9"] >
> | < #NUM: ["0"-"9"] >
>
> | < Entity: ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+
> (";")? ) >
>
> | < Space: (<SP>)+ >
> | < #SP: [" ","\t","\r","\n"] >
>
> | < Punct: ~[] > // Keep this last. It is a catch-all.
> }
>
>
> <WithinTag> TOKEN:
> {
> < ArgName: (~[" ","\t","\r","\n","=",">","'","\""])
> (~[" ","\t","\r","\n","=",">"])* >
> | < ArgEquals: "=" > : AfterEquals
> | < TagEnd: ">" | "=>" > : DEFAULT
> }
>
> <AfterEquals> TOKEN:
> {
> < ArgValue: (~[" ","\t","\r","\n","=",">","'","\""])
> (~[" ","\t","\r","\n",">"])* > : WithinTag
> }
>
> <WithinTag, AfterEquals> TOKEN:
> {
> < ArgQuote1: "'" > : WithinQuote1
> | < ArgQuote2: "\"" > : WithinQuote2
> }
>
> <WithinTag, AfterEquals> SKIP:
> {
> < <Space> >
> }
>
> <WithinQuote1> TOKEN:
> {
> < Quote1Text: (~["'"])+ >
> | < CloseQuote1: <ArgQuote1> > : WithinTag
> }
>
> <WithinQuote2> TOKEN:
> {
> < Quote2Text: (~["\""])+ >
> | < CloseQuote2: <ArgQuote2> > : WithinTag
> }
>
>
> <WithinComment1> TOKEN :
> {
> < CommentText1: (~["-"])+ | "-" >
> | < CommentEnd1: "-->" > : DEFAULT
> }
>
> <WithinComment2> TOKEN :
> {
> < CommentText2: (~[">"])+ >
> | < CommentEnd2: ">" > : DEFAULT
> }
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
> For additional commands, e-mail:
> <mailto:lucene-dev-help@jakarta.apache.org>
>

</textarea></font>


</td></tr>
<tr>
<td><input type=checkbox name="SigAtt" value="1" id="SA">Use
my signature</td>
<td align=right>
<input type=checkbox name=Format value="" onclick="setFormat()">
Allow HTML tags [<a href="javascript:Preview()">Preview</a>]
</td>
</tr>
</table>
</td>
</tr>
<tr class=frmb>
<td align=right><b>Options:</b></td>
<td>
<table border=0 cellpadding=2 cellspacing=0>
<tr><td colspan=2><input type=checkbox name="SaveCopy" value="yes"
id="Save">Save a copy in your <b>Sent Items</b> folder</td></tr>

</table>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td>
<table cellpadding=4 cellspacing=0 border=0 width="100%">
<tr class=bbar bgcolor="#3f6c96">
<td nowrap>
<input type=submit name=SEND
value="&nbsp;&nbsp;Send&nbsp;&nbsp;" title="Send Message" class=abutton
>
&nbsp;
<input type=submit name=SD value="Save as a Draft" title="Save
Message in your Drafts folder" class=fbutton >
&nbsp;
<input type=submit name=SC value="Spell Check" title="Check
your message's spelling before sending" class=fbutton >
&nbsp;
<input type=submit name=CAN value="Cancel" title="Cancel"
class=fbutton>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table width="100%" cellpadding=0 cellspacing=0 border=0>
<tr><td class=bgd bgcolor="#9bbad6" height=4><img
src="http://us.i1.yimg.com/us.yimg.com/i/space.gif" width=2
height=3></td></tr>
</table>

<center>
<br>
<table cellpadding=4 cellspacing=0 border=0>
<tr>
<td><a href="/ym/Welcome?YY=99325">Mail</a> - <a
href="http://address.yahoo.com/yab/us">Address Book</a> - <a
href="http://calendar.yahoo.com">Calendar</a> - <a
href="http://notepad.yahoo.com">Notepad</a></td>
</tr>
</table>
</center>

<br>
<table cellpadding=0 cellspacing=0 border=0 width="100%"><tr><td
bgcolor=#a0b8c8>
<table cellpadding=1 cellspacing=1 border=0 width="100%">

<tr valign=top bgcolor=#ffffff><td align=center>
<font face="arial" size=-2>
<A
href="http://rd.yahoo.com/footer/?http://address.yahoo.com/">Address&nbsp;Book</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://auctions.yahoo.com/">Auctions</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://autos.yahoo.com/">Autos</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://briefcase.yahoo.com/">Briefcase</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://calendar.yahoo.com/">Calendar</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://careers.yahoo.com/">Careers</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://chat.yahoo.com/">Chat</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://classifieds.yahoo.com/">Classifieds</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://finance.yahoo.com/">Finance</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://games.yahoo.com/">Games</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://geocities.yahoo.com/">Geocities</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://greetings.yahoo.com/">Greetings</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://groups.yahoo.com/">Groups</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://www.yahooligans.com/">Kids</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://mail.yahoo.com/">Mail</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://maps.yahoo.com/">Maps</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://members.yahoo.com/">Member&nbsp;Directory</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://messenger.yahoo.com/">Messenger</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://mobile.yahoo.com/">Mobile</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://movies.yahoo.com/">Movies</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://music.yahoo.com/">Music</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://my.yahoo.com/">My&nbsp;Yahoo!</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://news.yahoo.com/">News</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://paydirect.yahoo.com/">PayDirect</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://people.yahoo.com/">People&nbsp;Search</A>
&#183; <A

href="http://rd.yahoo.com/O=1/footer/?http://personals.yahoo.com/">Personals</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://photos.yahoo.com/">Photos</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://radio.yahoo.com/">Radio</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://shopping.yahoo.com/">Shopping</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://sports.yahoo.com/">Sports</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://tv.yahoo.com/">TV</A> &#183;
<A

href="http://rd.yahoo.com/footer/?http://travel.yahoo.com/">Travel</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://warehouse.yahoo.com/">Warehouse</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://weather.yahoo.com/">Weather</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://yp.yahoo.com/">Yellow&nbsp;Pages</A>
&#183; <A

href="http://rd.yahoo.com/footer/?http://docs.yahoo.com/docs/family/more.html">more...</A>
</font>
</td></tr></table>
</td></tr></table>
<center><small><br>Copyright &copy; 1994-2002 <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=0/*http://rd.yahoo.com/mail_us/tos/?http://www.yahoo.com"
target="_blank">Yahoo!</a> Inc. All rights reserved.<a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=1/*http://docs.yahoo.com/info/terms/">Terms
of Service</a> - <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=2/*http://docs.yahoo.com/info/guidelines/mail.html">Guidelines</a><br>NOTICE:
We collect personal information on this site.<br>To learn more about
how we use your information, see our <a
href="http://rd.yahoo.com/M=224039.2020109.3495275.1958505/D=mail/P=m2a0au1411000200/S=150500005:FOOT2/A=1052425/R=3/*http://privacy.yahoo.com/privacy/us/mail/">Privacy
Policy</a></small></center><script>
var ypim_MA_Farm_URL = "http://us.f127.mail.yahoo.com";
var ypim_AB_URL = "http://address.yahoo.com/yab/us";
var ypim_CA_URL = "http://calendar.yahoo.com";
var ypim_NP_URL = "http://notepad.yahoo.com";
var ypim_MA_YY = "418009";
var ypim_IMG = "http://us.i1.yimg.com/us.yimg.com/i/us/pim";
var ypim_Loc = "us";
var ypim_IsCalendarView = false;
var ypim_IsNotepadView = false;
var ypim_i18n_CheckMail = "Check Mail";
var ypim_i18n_Compose = "Compose";
var ypim_i18n_Folders = "Folders";
var ypim_i18n_Search = "Search";
var ypim_i18n_Options = "Options";
var ypim_i18n_Help = "Help";
var ypim_i18n_AddContact = "Add Contact";
var ypim_i18n_AddCategory = "Add Category";
var ypim_i18n_AddList = "Add List";
var ypim_i18n_ViewContacts = "View Contacts";
var ypim_i18n_ViewLists = "View Lists";
var ypim_i18n_Quickbuilder = "Quickbuilder";
var ypim_i18n_ImportContacts = "Import Contacts";
var ypim_i18n_Synchronize = "Synchronize";
var ypim_i18n_AddressesOptions = "Addresses Options";
var ypim_i18n_AddressesHelp = "Addresses Help";
var ypim_i18n_AddEvent = "Add Event";
var ypim_i18n_AddTask = "Add Task";
var ypim_i18n_AddBirthday = "Add Birthday";
var ypim_i18n_Day = "Day";
var ypim_i18n_Week = "Week";
var ypim_i18n_Month = "Month";
var ypim_i18n_Year = "Year";
var ypim_i18n_EventList = "Event List";
var ypim_i18n_Reminders = "Reminders";
var ypim_i18n_Tasks = "Tasks";
var ypim_i18n_Sharing = "Sharing";
var ypim_i18n_Synchronize = "Synchronize";
var ypim_i18n_CalendarOptions = "Calendar Options";
var ypim_i18n_CalendarHelp = "Calendar Help";
var ypim_i18n_AddNote = "Add Note";
var ypim_i18n_AddFolder = "Add Folder";
var ypim_i18n_ViewNotes = "View Notes";
var ypim_i18n_NotepadOptions = "Notepad Options";
var ypim_i18n_NotepadHelp = "Notepad Help";
</script>
<script src="/lib_web/pulldowns.js"></script>
<div id="mail">
</div>
<div id="addr">
</div>
<div id="cal">
</div>
<div id="note" class=menubg>
</div>
</form>
</body>
</html>

<!-- v2.2.20 1025161582 -->
<!--0.31828-->
<!-- compressed -->

__________________________________________________
Do You Yahoo!?
Yahoo! - Official partner of 2002 FIFA World Cup
http://fifaworldcup.yahoo.com