Mailing List Archive

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage LogStorage.java
cmarschner 2002/10/22 08:31:16

Modified: contributions/webcrawler-LARM/src/de/lanlab/larm/storage
LogStorage.java
Log:
write raw bytes to page files instead of characters

Revision Changes Path
1.5 +21 -2 jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java

Index: LogStorage.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/storage/LogStorage.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- LogStorage.java 18 Jun 2002 00:44:22 -0000 1.4
+++ LogStorage.java 22 Oct 2002 15:31:16 -0000 1.5
@@ -73,6 +73,8 @@

File pageFile;
FileOutputStream out;
+ /*OutputStreamWriter outw;*/
+
int pageFileCount;
String filePrefix;
int offset;
@@ -122,6 +124,7 @@
{
this.offset = 0;
out = new FileOutputStream(fileName);
+ /*outw = new OutputStreamWriter(out);*/
isValid = true;
}
catch (IOException io)
@@ -178,6 +181,21 @@
return -1;
}

+/*
+ public synchronized int writeToPageFile(char[] chars)
+ {
+ try
+ {
+ getOutputStream();
+ int oldOffset = this.offset;
+ this.offset += outw.write(chars);
+ new java.io.BufferedWriter().
+
+ }
+
+
+ }
+*/

/**
* Sets the logger attribute of the LogStorage object
@@ -201,9 +219,10 @@
public WebDocument store(WebDocument doc)
{
String docInfo = doc.getInfo();
- if (logContents && isValid && doc.getField("content") != null)
+ byte[] content = (byte[])doc.getField("contentBytes");
+ if (logContents && isValid && content != null && content.length != 0)
{
- int offset = writeToPageFile((byte[])doc.getField("content"));
+ int offset = writeToPageFile(content);
docInfo = docInfo + "\t" + pageFileCount + "\t" + offset;
}
log.logThreadSafe(docInfo);




--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>