Mailing List Archive

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher URLLengthFilter.java
cmarschner 2002/10/22 08:17:06

Modified: contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher
URLLengthFilter.java
Log:
added logging

Revision Changes Path
1.3 +17 -2 jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java

Index: URLLengthFilter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- URLLengthFilter.java 22 May 2002 23:09:17 -0000 1.2
+++ URLLengthFilter.java 22 Oct 2002 15:17:06 -0000 1.3
@@ -54,6 +54,8 @@

package de.lanlab.larm.fetcher;

+import de.lanlab.larm.util.*;
+
/**
* kills URLs longer than X characters. Used to prevent endless loops where
* the page contains the current URL + some extension
@@ -80,13 +82,24 @@

int maxLength;

+// URLLengthFilter()
+// {
+// maxLength = 0;
+// }
+ SimpleLogger log;

/**
* Constructor for the URLLengthFilter object
*
* @param maxLength max length of the _total_ URL (protocol+host+port+path)
*/
- public URLLengthFilter(int maxLength)
+ public URLLengthFilter(int maxLength, SimpleLogger log)
+ {
+ this.maxLength = maxLength;
+ this.log = log;
+ }
+
+ public void setMaxLength(int maxLength)
{
this.maxLength = maxLength;
}
@@ -105,6 +118,8 @@
if (file != null && file.length() > maxLength) // path + query
{
filtered++;
+ //log.log("URLLengthFilter: URL " + m.getUrl() + " exceeds maxLength " + this.maxLength);
+ log.log(message.toString());
return null;
}
return message;




--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>