Mailing List Archive

Deleting documents from index question.
Good morning all,

I'm trying to delete a set of documents from an index,
and am running into a problem where all the documents are
not deleted. My problem is either the way I am using the API
or it is a bug in lucene...I'm not sure which one it is.

I've included a sample program here that shows the problem. Note
that you will have to change the path at the top to a valid set
of files on your machine. If anybody has any ideas on why I
am not removing the files correctly, please let me know.

Thanks,

-- Rick

/*
* Created by IntelliJ IDEA.
* User: rvestal
* Date: Jun 16, 2002
* Time: 10:23:51 PM
* To change template for new class use
* Code Style | Class Templates options (Tools | IDE Options).
*/
package org.intellij.plugins.docPlugin;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;

import java.io.*;
import java.util.Vector;

public class IndexTest {

// path to ant 1.4.1 docs
private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";

private static String INDEX_DIR = "indexTest";


static private void collectFiles( File dir, Vector files ) {
File[] children = dir.listFiles();
for ( int ix = 0; ix < children.length; ix++ ) {
File child = children[ix];
if ( child.isDirectory() ) {
collectFiles( child, files );
} else {
files.add( child );
}
}
}


public static void main( String[] args ) {
File indexDir = new File( INDEX_DIR );
if ( !indexDir.exists() ) {
indexDir.mkdirs();
}

Vector files = new Vector();
collectFiles( new File( mDirToIndex ), files );

try {
IndexWriter writer = new IndexWriter( INDEX_DIR, new
StandardAnalyzer(), true );

for ( int ix = 0; ix < files.size(); ix++ ) {
File file = ( File ) files.get( ix );
writer.addDocument( IndexTestDocument.createDocument(
file ) );
}
System.out.println( "Added: " + files.size() + " files." );

writer.optimize();
writer.close();
writer = null;

Searcher searcher = new IndexSearcher( INDEX_DIR );
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse( "Ant", "contents", analyzer
);

Hits hits = searcher.search( query );
System.out.println( "Hits after add: " + hits.length() );
searcher.close();

Directory directory = FSDirectory.getDirectory( INDEX_DIR,
false );
IndexReader reader = IndexReader.open( directory );

int count = 0;
for ( int ix = 0; ix < files.size(); ix++ ) {
String path = IndexTestDocument.normalizePath( ( ( File
)
files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );

int numDocs = reader.numDocs();
boolean bDeleted = false;
for ( int ndx = 0; ndx < numDocs; ndx++ ) {
if ( !reader.isDeleted( ndx ) ) {
String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
if ( docPath.equals( path ) ) {
count++;
reader.delete( ndx );
bDeleted = true;
break;
}
}
}
if ( !bDeleted ) {
System.out.println( " Not Deleted: " + path );
for( int ndx = 0; ndx < numDocs; ndx++ ) {
if ( !reader.isDeleted( ndx ) ) {
String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
System.out.println( " path " + ndx + ":
" +
docPath );
}
}
}
}
System.out.println( "Removed " + count + " documents of (" +

files.size() + ")" );
reader.close();

searcher = new IndexSearcher( INDEX_DIR );
analyzer = new StandardAnalyzer();
query = QueryParser.parse( "Ant", "contents", analyzer );

hits = searcher.search( query );
System.out.println( "Hits after remove: " + hits.length() );

} catch ( Exception ex ) {
ex.printStackTrace();
}
}


static class IndexTestDocument {

static public Document createDocument( File f )
throws FileNotFoundException {
Document doc = new Document();
doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
);
Reader reader = new BufferedReader( new InputStreamReader(
new
FileInputStream( f ) ) );
doc.add( Field.Text( "contents", reader ) );
return doc;
}


static public String getPath( Document doc ) {
return ( String ) doc.get( "path" );
}

static public String normalizePath( String path ) {
if ( path == null || path.length() == 0 ) {
return "";
}
path = path.replace( '\\', '/' );
File f = new File( path );
if ( f.isDirectory() ) {
if ( path.charAt( path.length() - 1 ) != '/' ) {
path = path + "/";
}
}
return path;
}
}
}



--
Center for Agile Technology phone: 512.232.4399
The University of Texas at Austin fax: 512.232.6413
3925 West Braker Lane email: rick@cat.utexas.edu
MCC Suite 3.11040 CAT http://cat.utexas.edu/
Austin, TX 78759-5316


--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
Re: Deleting documents from index question. [ In reply to ]
hi, i think you must run writer.optimize after deleting docs before it takes
effect, deleted documents are only marked as deleted until then...


mvh karl øie

On Monday 17 June 2002 15:33, Rick Vestal wrote:
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted. My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem. Note
> that you will have to change the path at the top to a valid set
> of files on your machine. If anybody has any ideas on why I
> am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
> * Created by IntelliJ IDEA.
> * User: rvestal
> * Date: Jun 16, 2002
> * Time: 10:23:51 PM
> * To change template for new class use
> * Code Style | Class Templates options (Tools | IDE Options).
> */
> package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
> // path to ant 1.4.1 docs
> private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";
>
> private static String INDEX_DIR = "indexTest";
>
>
> static private void collectFiles( File dir, Vector files ) {
> File[] children = dir.listFiles();
> for ( int ix = 0; ix < children.length; ix++ ) {
> File child = children[ix];
> if ( child.isDirectory() ) {
> collectFiles( child, files );
> } else {
> files.add( child );
> }
> }
> }
>
>
> public static void main( String[] args ) {
> File indexDir = new File( INDEX_DIR );
> if ( !indexDir.exists() ) {
> indexDir.mkdirs();
> }
>
> Vector files = new Vector();
> collectFiles( new File( mDirToIndex ), files );
>
> try {
> IndexWriter writer = new IndexWriter( INDEX_DIR, new
> StandardAnalyzer(), true );
>
> for ( int ix = 0; ix < files.size(); ix++ ) {
> File file = ( File ) files.get( ix );
> writer.addDocument( IndexTestDocument.createDocument(
> file ) );
> }
> System.out.println( "Added: " + files.size() + " files." );
>
> writer.optimize();
> writer.close();
> writer = null;
>
> Searcher searcher = new IndexSearcher( INDEX_DIR );
> Analyzer analyzer = new StandardAnalyzer();
> Query query = QueryParser.parse( "Ant", "contents", analyzer
> );
>
> Hits hits = searcher.search( query );
> System.out.println( "Hits after add: " + hits.length() );
> searcher.close();
>
> Directory directory = FSDirectory.getDirectory( INDEX_DIR,
> false );
> IndexReader reader = IndexReader.open( directory );
>
> int count = 0;
> for ( int ix = 0; ix < files.size(); ix++ ) {
> String path = IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
> int numDocs = reader.numDocs();
> boolean bDeleted = false;
> for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
> if ( docPath.equals( path ) ) {
> count++;
> reader.delete( ndx );
> bDeleted = true;
> break;
> }
> }
> }
> if ( !bDeleted ) {
> System.out.println( " Not Deleted: " + path );
> for( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
> System.out.println( " path " + ndx + ":
> " +
> docPath );
> }
> }
> }
> }
> System.out.println( "Removed " + count + " documents of (" +
>
> files.size() + ")" );
> reader.close();
>
> searcher = new IndexSearcher( INDEX_DIR );
> analyzer = new StandardAnalyzer();
> query = QueryParser.parse( "Ant", "contents", analyzer );
>
> hits = searcher.search( query );
> System.out.println( "Hits after remove: " + hits.length() );
>
> } catch ( Exception ex ) {
> ex.printStackTrace();
> }
> }
>
>
> static class IndexTestDocument {
>
> static public Document createDocument( File f )
> throws FileNotFoundException {
> Document doc = new Document();
> doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
> );
> Reader reader = new BufferedReader( new InputStreamReader(
> new
> FileInputStream( f ) ) );
> doc.add( Field.Text( "contents", reader ) );
> return doc;
> }
>
>
> static public String getPath( Document doc ) {
> return ( String ) doc.get( "path" );
> }
>
> static public String normalizePath( String path ) {
> if ( path == null || path.length() == 0 ) {
> return "";
> }
> path = path.replace( '\\', '/' );
> File f = new File( path );
> if ( f.isDirectory() ) {
> if ( path.charAt( path.length() - 1 ) != '/' ) {
> path = path + "/";
> }
> }
> return path;
> }
> }
> }


--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
I believe that did the trick!

Thanks for the info.

-- Rick

> -----Original Message-----
> From: Karl Øie [mailto:karl@gan.no]
> Sent: Monday, June 17, 2002 8:38 AM
> To: Lucene Users List
> Subject: Re: Deleting documents from index question.
>
>
> hi, i think you must run writer.optimize after deleting docs
> before it takes
> effect, deleted documents are only marked as deleted until then...
>
>
> mvh karl øie
>
> On Monday 17 June 2002 15:33, Rick Vestal wrote:
> > Good morning all,
> >
> > I'm trying to delete a set of documents from an index,
> > and am running into a problem where all the documents are
> > not deleted. My problem is either the way I am using the
> API or it is
> > a bug in lucene...I'm not sure which one it is.
> >
> > I've included a sample program here that shows the problem.
> Note that
> > you will have to change the path at the top to a valid set
> of files on
> > your machine. If anybody has any ideas on why I am not
> removing the
> > files correctly, please let me know.
> >
> > Thanks,
> >
> > -- Rick
> >
> > /*
> > * Created by IntelliJ IDEA.
> > * User: rvestal
> > * Date: Jun 16, 2002
> > * Time: 10:23:51 PM
> > * To change template for new class use
> > * Code Style | Class Templates options (Tools | IDE Options). */
> > package org.intellij.plugins.docPlugin;
> >
> > import org.apache.lucene.analysis.Analyzer;
> > import org.apache.lucene.analysis.standard.StandardAnalyzer;
> > import org.apache.lucene.document.*;
> > import org.apache.lucene.index.*;
> > import org.apache.lucene.queryParser.QueryParser;
> > import org.apache.lucene.search.*;
> > import org.apache.lucene.store.*;
> >
> > import java.io.*;
> > import java.util.Vector;
> >
> > public class IndexTest {
> >
> > // path to ant 1.4.1 docs
> > private static String mDirToIndex =
> > "c:/utils/ant/docs/manual/api/";
> >
> > private static String INDEX_DIR = "indexTest";
> >
> >
> > static private void collectFiles( File dir, Vector files ) {
> > File[] children = dir.listFiles();
> > for ( int ix = 0; ix < children.length; ix++ ) {
> > File child = children[ix];
> > if ( child.isDirectory() ) {
> > collectFiles( child, files );
> > } else {
> > files.add( child );
> > }
> > }
> > }
> >
> >
> > public static void main( String[] args ) {
> > File indexDir = new File( INDEX_DIR );
> > if ( !indexDir.exists() ) {
> > indexDir.mkdirs();
> > }
> >
> > Vector files = new Vector();
> > collectFiles( new File( mDirToIndex ), files );
> >
> > try {
> > IndexWriter writer = new IndexWriter( INDEX_DIR, new
> > StandardAnalyzer(), true );
> >
> > for ( int ix = 0; ix < files.size(); ix++ ) {
> > File file = ( File ) files.get( ix );
> > writer.addDocument(
> IndexTestDocument.createDocument(
> > file ) );
> > }
> > System.out.println( "Added: " + files.size() +
> " files."
> > );
> >
> > writer.optimize();
> > writer.close();
> > writer = null;
> >
> > Searcher searcher = new IndexSearcher( INDEX_DIR );
> > Analyzer analyzer = new StandardAnalyzer();
> > Query query = QueryParser.parse( "Ant", "contents",
> > analyzer );
> >
> > Hits hits = searcher.search( query );
> > System.out.println( "Hits after add: " +
> hits.length() );
> > searcher.close();
> >
> > Directory directory = FSDirectory.getDirectory(
> INDEX_DIR,
> > false );
> > IndexReader reader = IndexReader.open( directory );
> >
> > int count = 0;
> > for ( int ix = 0; ix < files.size(); ix++ ) {
> > String path = IndexTestDocument.normalizePath( ( (
> > File
> > )
> > files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
> >
> > int numDocs = reader.numDocs();
> > boolean bDeleted = false;
> > for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> > if ( !reader.isDeleted( ndx ) ) {
> > String docPath = IndexTestDocument.getPath(
> > reader.document( ndx ) );
> > if ( docPath.equals( path ) ) {
> > count++;
> > reader.delete( ndx );
> > bDeleted = true;
> > break;
> > }
> > }
> > }
> > if ( !bDeleted ) {
> > System.out.println( " Not Deleted: " + path );
> > for( int ndx = 0; ndx < numDocs; ndx++ ) {
> > if ( !reader.isDeleted( ndx ) ) {
> > String docPath =
> > IndexTestDocument.getPath( reader.document( ndx ) );
> > System.out.println( " path
> " + ndx + ":
> > " +
> > docPath );
> > }
> > }
> > }
> > }
> > System.out.println( "Removed " + count + "
> documents of ("
> > +
> >
> > files.size() + ")" );
> > reader.close();
> >
> > searcher = new IndexSearcher( INDEX_DIR );
> > analyzer = new StandardAnalyzer();
> > query = QueryParser.parse( "Ant", "contents",
> analyzer );
> >
> > hits = searcher.search( query );
> > System.out.println( "Hits after remove: " +
> hits.length()
> > );
> >
> > } catch ( Exception ex ) {
> > ex.printStackTrace();
> > }
> > }
> >
> >
> > static class IndexTestDocument {
> >
> > static public Document createDocument( File f )
> > throws FileNotFoundException {
> > Document doc = new Document();
> > doc.add( Field.Text( "path", normalizePath(
> f.getPath() )
> > ) );
> > Reader reader = new BufferedReader( new
> InputStreamReader(
> > new FileInputStream( f ) ) );
> > doc.add( Field.Text( "contents", reader ) );
> > return doc;
> > }
> >
> >
> > static public String getPath( Document doc ) {
> > return ( String ) doc.get( "path" );
> > }
> >
> > static public String normalizePath( String path ) {
> > if ( path == null || path.length() == 0 ) {
> > return "";
> > }
> > path = path.replace( '\\', '/' );
> > File f = new File( path );
> > if ( f.isDirectory() ) {
> > if ( path.charAt( path.length() - 1 ) != '/' ) {
> > path = path + "/";
> > }
> > }
> > return path;
> > }
> > }
> > }
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>


--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
I run my delete finction without opimizing because it take too long
and because it doesn't inflate the number of files in the index like an
insert does
(it just adds a file, I imagine for exclusion purposes until the next
optimize)
and it works fine.

here's how I delete :

public static synchronized void deleteIndexEntry ( String filePath ,
String fieldValue ) {
String fieldName = "" ;
String indexTag = "" ;

fieldName = "id" ;
IndexReader reader = null;
try {
reader = IndexReader.open(indexPath);
Term targetTerm = new Term(fieldName,fieldValue) ;
reader.delete(targetTerm) ;
reader.close();
} catch (java.io.IOException e) {
errorText = errorText.concat("DeleteIndex :"+e+"\n") ;
}
}


-----Original Message-----
From: Karl Øie [mailto:karl@gan.no]
Sent: Monday, June 17, 2002 5:38 PM
To: Lucene Users List
Subject: Re: Deleting documents from index question.


hi, i think you must run writer.optimize after deleting docs before it takes
effect, deleted documents are only marked as deleted until then...


mvh karl øie

On Monday 17 June 2002 15:33, Rick Vestal wrote:
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted. My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem. Note
> that you will have to change the path at the top to a valid set
> of files on your machine. If anybody has any ideas on why I
> am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
> * Created by IntelliJ IDEA.
> * User: rvestal
> * Date: Jun 16, 2002
> * Time: 10:23:51 PM
> * To change template for new class use
> * Code Style | Class Templates options (Tools | IDE Options).
> */
> package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
> // path to ant 1.4.1 docs
> private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";
>
> private static String INDEX_DIR = "indexTest";
>
>
> static private void collectFiles( File dir, Vector files ) {
> File[] children = dir.listFiles();
> for ( int ix = 0; ix < children.length; ix++ ) {
> File child = children[ix];
> if ( child.isDirectory() ) {
> collectFiles( child, files );
> } else {
> files.add( child );
> }
> }
> }
>
>
> public static void main( String[] args ) {
> File indexDir = new File( INDEX_DIR );
> if ( !indexDir.exists() ) {
> indexDir.mkdirs();
> }
>
> Vector files = new Vector();
> collectFiles( new File( mDirToIndex ), files );
>
> try {
> IndexWriter writer = new IndexWriter( INDEX_DIR, new
> StandardAnalyzer(), true );
>
> for ( int ix = 0; ix < files.size(); ix++ ) {
> File file = ( File ) files.get( ix );
> writer.addDocument( IndexTestDocument.createDocument(
> file ) );
> }
> System.out.println( "Added: " + files.size() + " files." );
>
> writer.optimize();
> writer.close();
> writer = null;
>
> Searcher searcher = new IndexSearcher( INDEX_DIR );
> Analyzer analyzer = new StandardAnalyzer();
> Query query = QueryParser.parse( "Ant", "contents", analyzer
> );
>
> Hits hits = searcher.search( query );
> System.out.println( "Hits after add: " + hits.length() );
> searcher.close();
>
> Directory directory = FSDirectory.getDirectory( INDEX_DIR,
> false );
> IndexReader reader = IndexReader.open( directory );
>
> int count = 0;
> for ( int ix = 0; ix < files.size(); ix++ ) {
> String path = IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
> int numDocs = reader.numDocs();
> boolean bDeleted = false;
> for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
> if ( docPath.equals( path ) ) {
> count++;
> reader.delete( ndx );
> bDeleted = true;
> break;
> }
> }
> }
> if ( !bDeleted ) {
> System.out.println( " Not Deleted: " + path );
> for( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
> System.out.println( " path " + ndx + ":
> " +
> docPath );
> }
> }
> }
> }
> System.out.println( "Removed " + count + " documents of (" +
>
> files.size() + ")" );
> reader.close();
>
> searcher = new IndexSearcher( INDEX_DIR );
> analyzer = new StandardAnalyzer();
> query = QueryParser.parse( "Ant", "contents", analyzer );
>
> hits = searcher.search( query );
> System.out.println( "Hits after remove: " + hits.length() );
>
> } catch ( Exception ex ) {
> ex.printStackTrace();
> }
> }
>
>
> static class IndexTestDocument {
>
> static public Document createDocument( File f )
> throws FileNotFoundException {
> Document doc = new Document();
> doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
> );
> Reader reader = new BufferedReader( new InputStreamReader(
> new
> FileInputStream( f ) ) );
> doc.add( Field.Text( "contents", reader ) );
> return doc;
> }
>
>
> static public String getPath( Document doc ) {
> return ( String ) doc.get( "path" );
> }
>
> static public String normalizePath( String path ) {
> if ( path == null || path.length() == 0 ) {
> return "";
> }
> path = path.replace( '\\', '/' );
> File f = new File( path );
> if ( f.isDirectory() ) {
> if ( path.charAt( path.length() - 1 ) != '/' ) {
> path = path + "/";
> }
> }
> return path;
> }
> }
> }


--
To unsubscribe, e-mail:
<mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail:
<mailto:lucene-user-help@jakarta.apache.org>



--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
PS: try closing the reader after you're done deleating and open a new one
for the search,
kind a like commiting a transaction to a normal DB

-----Original Message-----
From: Rick Vestal [mailto:rick@cat.utexas.edu]
Sent: Monday, June 17, 2002 5:34 PM
To: lucene-user@jakarta.apache.org
Subject: Deleting documents from index question.


Good morning all,

I'm trying to delete a set of documents from an index,
and am running into a problem where all the documents are
not deleted. My problem is either the way I am using the API
or it is a bug in lucene...I'm not sure which one it is.

I've included a sample program here that shows the problem. Note
that you will have to change the path at the top to a valid set
of files on your machine. If anybody has any ideas on why I
am not removing the files correctly, please let me know.

Thanks,

-- Rick

/*
* Created by IntelliJ IDEA.
* User: rvestal
* Date: Jun 16, 2002
* Time: 10:23:51 PM
* To change template for new class use
* Code Style | Class Templates options (Tools | IDE Options).
*/
package org.intellij.plugins.docPlugin;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;

import java.io.*;
import java.util.Vector;

public class IndexTest {

// path to ant 1.4.1 docs
private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";

private static String INDEX_DIR = "indexTest";


static private void collectFiles( File dir, Vector files ) {
File[] children = dir.listFiles();
for ( int ix = 0; ix < children.length; ix++ ) {
File child = children[ix];
if ( child.isDirectory() ) {
collectFiles( child, files );
} else {
files.add( child );
}
}
}


public static void main( String[] args ) {
File indexDir = new File( INDEX_DIR );
if ( !indexDir.exists() ) {
indexDir.mkdirs();
}

Vector files = new Vector();
collectFiles( new File( mDirToIndex ), files );

try {
IndexWriter writer = new IndexWriter( INDEX_DIR, new
StandardAnalyzer(), true );

for ( int ix = 0; ix < files.size(); ix++ ) {
File file = ( File ) files.get( ix );
writer.addDocument( IndexTestDocument.createDocument(
file ) );
}
System.out.println( "Added: " + files.size() + " files." );

writer.optimize();
writer.close();
writer = null;

Searcher searcher = new IndexSearcher( INDEX_DIR );
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse( "Ant", "contents", analyzer
);

Hits hits = searcher.search( query );
System.out.println( "Hits after add: " + hits.length() );
searcher.close();

Directory directory = FSDirectory.getDirectory( INDEX_DIR,
false );
IndexReader reader = IndexReader.open( directory );

int count = 0;
for ( int ix = 0; ix < files.size(); ix++ ) {
String path = IndexTestDocument.normalizePath( ( ( File
)
files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );

int numDocs = reader.numDocs();
boolean bDeleted = false;
for ( int ndx = 0; ndx < numDocs; ndx++ ) {
if ( !reader.isDeleted( ndx ) ) {
String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
if ( docPath.equals( path ) ) {
count++;
reader.delete( ndx );
bDeleted = true;
break;
}
}
}
if ( !bDeleted ) {
System.out.println( " Not Deleted: " + path );
for( int ndx = 0; ndx < numDocs; ndx++ ) {
if ( !reader.isDeleted( ndx ) ) {
String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
System.out.println( " path " + ndx + ":
" +
docPath );
}
}
}
}
System.out.println( "Removed " + count + " documents of (" +

files.size() + ")" );
reader.close();

searcher = new IndexSearcher( INDEX_DIR );
analyzer = new StandardAnalyzer();
query = QueryParser.parse( "Ant", "contents", analyzer );

hits = searcher.search( query );
System.out.println( "Hits after remove: " + hits.length() );

} catch ( Exception ex ) {
ex.printStackTrace();
}
}


static class IndexTestDocument {

static public Document createDocument( File f )
throws FileNotFoundException {
Document doc = new Document();
doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
);
Reader reader = new BufferedReader( new InputStreamReader(
new
FileInputStream( f ) ) );
doc.add( Field.Text( "contents", reader ) );
return doc;
}


static public String getPath( Document doc ) {
return ( String ) doc.get( "path" );
}

static public String normalizePath( String path ) {
if ( path == null || path.length() == 0 ) {
return "";
}
path = path.replace( '\\', '/' );
File f = new File( path );
if ( f.isDirectory() ) {
if ( path.charAt( path.length() - 1 ) != '/' ) {
path = path + "/";
}
}
return path;
}
}
}



--
Center for Agile Technology phone: 512.232.4399
The University of Texas at Austin fax: 512.232.6413
3925 West Braker Lane email: rick@cat.utexas.edu
MCC Suite 3.11040 CAT http://cat.utexas.edu/
Austin, TX 78759-5316


--
To unsubscribe, e-mail:
<mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail:
<mailto:lucene-user-help@jakarta.apache.org>



--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
I had tried this and it had the same incorrect result.

Thanks,

-- Rick

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net]
> Sent: Monday, June 17, 2002 9:09 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
>
>
> PS: try closing the reader after you're done deleating and
> open a new one for the search, kind a like commiting a
> transaction to a normal DB
>
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 5:34 PM
> To: lucene-user@jakarta.apache.org
> Subject: Deleting documents from index question.
>
>
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted. My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem.
> Note that you will have to change the path at the top to a
> valid set of files on your machine. If anybody has any ideas
> on why I am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
> * Created by IntelliJ IDEA.
> * User: rvestal
> * Date: Jun 16, 2002
> * Time: 10:23:51 PM
> * To change template for new class use
> * Code Style | Class Templates options (Tools | IDE
> Options). */ package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
> // path to ant 1.4.1 docs
> private static String mDirToIndex =
> "c:/utils/ant/docs/manual/api/";
>
> private static String INDEX_DIR = "indexTest";
>
>
> static private void collectFiles( File dir, Vector files ) {
> File[] children = dir.listFiles();
> for ( int ix = 0; ix < children.length; ix++ ) {
> File child = children[ix];
> if ( child.isDirectory() ) {
> collectFiles( child, files );
> } else {
> files.add( child );
> }
> }
> }
>
>
> public static void main( String[] args ) {
> File indexDir = new File( INDEX_DIR );
> if ( !indexDir.exists() ) {
> indexDir.mkdirs();
> }
>
> Vector files = new Vector();
> collectFiles( new File( mDirToIndex ), files );
>
> try {
> IndexWriter writer = new IndexWriter( INDEX_DIR,
> new StandardAnalyzer(), true );
>
> for ( int ix = 0; ix < files.size(); ix++ ) {
> File file = ( File ) files.get( ix );
> writer.addDocument(
> IndexTestDocument.createDocument( file ) );
> }
> System.out.println( "Added: " + files.size() + "
> files." );
>
> writer.optimize();
> writer.close();
> writer = null;
>
> Searcher searcher = new IndexSearcher( INDEX_DIR );
> Analyzer analyzer = new StandardAnalyzer();
> Query query = QueryParser.parse( "Ant",
> "contents", analyzer );
>
> Hits hits = searcher.search( query );
> System.out.println( "Hits after add: " + hits.length() );
> searcher.close();
>
> Directory directory = FSDirectory.getDirectory(
> INDEX_DIR, false );
> IndexReader reader = IndexReader.open( directory );
>
> int count = 0;
> for ( int ix = 0; ix < files.size(); ix++ ) {
> String path =
> IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
> int numDocs = reader.numDocs();
> boolean bDeleted = false;
> for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
> if ( docPath.equals( path ) ) {
> count++;
> reader.delete( ndx );
> bDeleted = true;
> break;
> }
> }
> }
> if ( !bDeleted ) {
> System.out.println( " Not Deleted: " + path );
> for( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
> System.out.println( " path "
> + ndx + ":
> " +
> docPath );
> }
> }
> }
> }
> System.out.println( "Removed " + count + "
> documents of (" +
>
> files.size() + ")" );
> reader.close();
>
> searcher = new IndexSearcher( INDEX_DIR );
> analyzer = new StandardAnalyzer();
> query = QueryParser.parse( "Ant", "contents", analyzer );
>
> hits = searcher.search( query );
> System.out.println( "Hits after remove: " +
> hits.length() );
>
> } catch ( Exception ex ) {
> ex.printStackTrace();
> }
> }
>
>
> static class IndexTestDocument {
>
> static public Document createDocument( File f )
> throws FileNotFoundException {
> Document doc = new Document();
> doc.add( Field.Text( "path", normalizePath(
> f.getPath() ) ) );
> Reader reader = new BufferedReader( new
> InputStreamReader( new FileInputStream( f ) ) );
> doc.add( Field.Text( "contents", reader ) );
> return doc;
> }
>
>
> static public String getPath( Document doc ) {
> return ( String ) doc.get( "path" );
> }
>
> static public String normalizePath( String path ) {
> if ( path == null || path.length() == 0 ) {
> return "";
> }
> path = path.replace( '\\', '/' );
> File f = new File( path );
> if ( f.isDirectory() ) {
> if ( path.charAt( path.length() - 1 ) != '/' ) {
> path = path + "/";
> }
> }
> return path;
> }
> }
> }
>
>
>
> --
> Center for Agile Technology phone: 512.232.4399
> The University of Texas at Austin fax: 512.232.6413
> 3925 West Braker Lane email: rick@cat.utexas.edu
> MCC Suite 3.11040 CAT http://cat.utexas.edu/
> Austin, TX 78759-5316
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>


--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
define incorrect .. a crash or a un-desired results

-----Original Message-----
From: Rick Vestal [mailto:rick@cat.utexas.edu]
Sent: Monday, June 17, 2002 6:10 PM
To: 'Lucene Users List'; nsh@bayt.net
Subject: RE: Deleting documents from index question.


I had tried this and it had the same incorrect result.

Thanks,

-- Rick

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net]
> Sent: Monday, June 17, 2002 9:09 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
>
>
> PS: try closing the reader after you're done deleating and
> open a new one for the search, kind a like commiting a
> transaction to a normal DB
>
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 5:34 PM
> To: lucene-user@jakarta.apache.org
> Subject: Deleting documents from index question.
>
>
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted. My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem.
> Note that you will have to change the path at the top to a
> valid set of files on your machine. If anybody has any ideas
> on why I am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
> * Created by IntelliJ IDEA.
> * User: rvestal
> * Date: Jun 16, 2002
> * Time: 10:23:51 PM
> * To change template for new class use
> * Code Style | Class Templates options (Tools | IDE
> Options). */ package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
> // path to ant 1.4.1 docs
> private static String mDirToIndex =
> "c:/utils/ant/docs/manual/api/";
>
> private static String INDEX_DIR = "indexTest";
>
>
> static private void collectFiles( File dir, Vector files ) {
> File[] children = dir.listFiles();
> for ( int ix = 0; ix < children.length; ix++ ) {
> File child = children[ix];
> if ( child.isDirectory() ) {
> collectFiles( child, files );
> } else {
> files.add( child );
> }
> }
> }
>
>
> public static void main( String[] args ) {
> File indexDir = new File( INDEX_DIR );
> if ( !indexDir.exists() ) {
> indexDir.mkdirs();
> }
>
> Vector files = new Vector();
> collectFiles( new File( mDirToIndex ), files );
>
> try {
> IndexWriter writer = new IndexWriter( INDEX_DIR,
> new StandardAnalyzer(), true );
>
> for ( int ix = 0; ix < files.size(); ix++ ) {
> File file = ( File ) files.get( ix );
> writer.addDocument(
> IndexTestDocument.createDocument( file ) );
> }
> System.out.println( "Added: " + files.size() + "
> files." );
>
> writer.optimize();
> writer.close();
> writer = null;
>
> Searcher searcher = new IndexSearcher( INDEX_DIR );
> Analyzer analyzer = new StandardAnalyzer();
> Query query = QueryParser.parse( "Ant",
> "contents", analyzer );
>
> Hits hits = searcher.search( query );
> System.out.println( "Hits after add: " + hits.length() );
> searcher.close();
>
> Directory directory = FSDirectory.getDirectory(
> INDEX_DIR, false );
> IndexReader reader = IndexReader.open( directory );
>
> int count = 0;
> for ( int ix = 0; ix < files.size(); ix++ ) {
> String path =
> IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
> int numDocs = reader.numDocs();
> boolean bDeleted = false;
> for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
> if ( docPath.equals( path ) ) {
> count++;
> reader.delete( ndx );
> bDeleted = true;
> break;
> }
> }
> }
> if ( !bDeleted ) {
> System.out.println( " Not Deleted: " + path );
> for( int ndx = 0; ndx < numDocs; ndx++ ) {
> if ( !reader.isDeleted( ndx ) ) {
> String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
> System.out.println( " path "
> + ndx + ":
> " +
> docPath );
> }
> }
> }
> }
> System.out.println( "Removed " + count + "
> documents of (" +
>
> files.size() + ")" );
> reader.close();
>
> searcher = new IndexSearcher( INDEX_DIR );
> analyzer = new StandardAnalyzer();
> query = QueryParser.parse( "Ant", "contents", analyzer );
>
> hits = searcher.search( query );
> System.out.println( "Hits after remove: " +
> hits.length() );
>
> } catch ( Exception ex ) {
> ex.printStackTrace();
> }
> }
>
>
> static class IndexTestDocument {
>
> static public Document createDocument( File f )
> throws FileNotFoundException {
> Document doc = new Document();
> doc.add( Field.Text( "path", normalizePath(
> f.getPath() ) ) );
> Reader reader = new BufferedReader( new
> InputStreamReader( new FileInputStream( f ) ) );
> doc.add( Field.Text( "contents", reader ) );
> return doc;
> }
>
>
> static public String getPath( Document doc ) {
> return ( String ) doc.get( "path" );
> }
>
> static public String normalizePath( String path ) {
> if ( path == null || path.length() == 0 ) {
> return "";
> }
> path = path.replace( '\\', '/' );
> File f = new File( path );
> if ( f.isDirectory() ) {
> if ( path.charAt( path.length() - 1 ) != '/' ) {
> path = path + "/";
> }
> }
> return path;
> }
> }
> }
>
>
>
> --
> Center for Agile Technology phone: 512.232.4399
> The University of Texas at Austin fax: 512.232.6413
> 3925 West Braker Lane email: rick@cat.utexas.edu
> MCC Suite 3.11040 CAT http://cat.utexas.edu/
> Austin, TX 78759-5316
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>


--
To unsubscribe, e-mail:
<mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail:
<mailto:lucene-user-help@jakarta.apache.org>



--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>
RE: Deleting documents from index question. [ In reply to ]
Undesired results.

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net]
> Sent: Monday, June 17, 2002 9:16 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
>
>
> define incorrect .. a crash or a un-desired results
>
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 6:10 PM
> To: 'Lucene Users List'; nsh@bayt.net
> Subject: RE: Deleting documents from index question.
>
>
> I had tried this and it had the same incorrect result.
>
> Thanks,
>
> -- Rick
>
> > -----Original Message-----
> > From: Nader S. Henein [mailto:nsh@bayt.net]
> > Sent: Monday, June 17, 2002 9:09 AM
> > To: Lucene Users List
> > Subject: RE: Deleting documents from index question.
> >
> >
> > PS: try closing the reader after you're done deleating and
> open a new
> > one for the search, kind a like commiting a transaction to
> a normal DB
> >
> > -----Original Message-----
> > From: Rick Vestal [mailto:rick@cat.utexas.edu]
> > Sent: Monday, June 17, 2002 5:34 PM
> > To: lucene-user@jakarta.apache.org
> > Subject: Deleting documents from index question.
> >
> >
> > Good morning all,
> >
> > I'm trying to delete a set of documents from an index,
> > and am running into a problem where all the documents are
> > not deleted. My problem is either the way I am using the
> API or it is
> > a bug in lucene...I'm not sure which one it is.
> >
> > I've included a sample program here that shows the problem.
> Note that
> > you will have to change the path at the top to a valid set
> of files on
> > your machine. If anybody has any ideas on why I am not
> removing the
> > files correctly, please let me know.
> >
> > Thanks,
> >
> > -- Rick
> >
> > /*
> > * Created by IntelliJ IDEA.
> > * User: rvestal
> > * Date: Jun 16, 2002
> > * Time: 10:23:51 PM
> > * To change template for new class use
> > * Code Style | Class Templates options (Tools | IDE Options). */
> > package org.intellij.plugins.docPlugin;
> >
> > import org.apache.lucene.analysis.Analyzer;
> > import org.apache.lucene.analysis.standard.StandardAnalyzer;
> > import org.apache.lucene.document.*;
> > import org.apache.lucene.index.*;
> > import org.apache.lucene.queryParser.QueryParser;
> > import org.apache.lucene.search.*;
> > import org.apache.lucene.store.*;
> >
> > import java.io.*;
> > import java.util.Vector;
> >
> > public class IndexTest {
> >
> > // path to ant 1.4.1 docs
> > private static String mDirToIndex =
> > "c:/utils/ant/docs/manual/api/";
> >
> > private static String INDEX_DIR = "indexTest";
> >
> >
> > static private void collectFiles( File dir, Vector files ) {
> > File[] children = dir.listFiles();
> > for ( int ix = 0; ix < children.length; ix++ ) {
> > File child = children[ix];
> > if ( child.isDirectory() ) {
> > collectFiles( child, files );
> > } else {
> > files.add( child );
> > }
> > }
> > }
> >
> >
> > public static void main( String[] args ) {
> > File indexDir = new File( INDEX_DIR );
> > if ( !indexDir.exists() ) {
> > indexDir.mkdirs();
> > }
> >
> > Vector files = new Vector();
> > collectFiles( new File( mDirToIndex ), files );
> >
> > try {
> > IndexWriter writer = new IndexWriter( INDEX_DIR, new
> > StandardAnalyzer(), true );
> >
> > for ( int ix = 0; ix < files.size(); ix++ ) {
> > File file = ( File ) files.get( ix );
> > writer.addDocument(
> IndexTestDocument.createDocument(
> > file ) );
> > }
> > System.out.println( "Added: " + files.size() +
> " files."
> > );
> >
> > writer.optimize();
> > writer.close();
> > writer = null;
> >
> > Searcher searcher = new IndexSearcher( INDEX_DIR );
> > Analyzer analyzer = new StandardAnalyzer();
> > Query query = QueryParser.parse( "Ant", "contents",
> > analyzer );
> >
> > Hits hits = searcher.search( query );
> > System.out.println( "Hits after add: " +
> hits.length() );
> > searcher.close();
> >
> > Directory directory = FSDirectory.getDirectory(
> INDEX_DIR,
> > false );
> > IndexReader reader = IndexReader.open( directory );
> >
> > int count = 0;
> > for ( int ix = 0; ix < files.size(); ix++ ) {
> > String path = IndexTestDocument.normalizePath( ( (
> > File
> > )
> > files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
> >
> > int numDocs = reader.numDocs();
> > boolean bDeleted = false;
> > for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> > if ( !reader.isDeleted( ndx ) ) {
> > String docPath = IndexTestDocument.getPath(
> > reader.document( ndx ) );
> > if ( docPath.equals( path ) ) {
> > count++;
> > reader.delete( ndx );
> > bDeleted = true;
> > break;
> > }
> > }
> > }
> > if ( !bDeleted ) {
> > System.out.println( " Not Deleted: " + path );
> > for( int ndx = 0; ndx < numDocs; ndx++ ) {
> > if ( !reader.isDeleted( ndx ) ) {
> > String docPath =
> > IndexTestDocument.getPath( reader.document( ndx ) );
> > System.out.println( " path "
> > + ndx + ":
> > " +
> > docPath );
> > }
> > }
> > }
> > }
> > System.out.println( "Removed " + count + "
> documents of ("
> > +
> >
> > files.size() + ")" );
> > reader.close();
> >
> > searcher = new IndexSearcher( INDEX_DIR );
> > analyzer = new StandardAnalyzer();
> > query = QueryParser.parse( "Ant", "contents",
> analyzer );
> >
> > hits = searcher.search( query );
> > System.out.println( "Hits after remove: " +
> > hits.length() );
> >
> > } catch ( Exception ex ) {
> > ex.printStackTrace();
> > }
> > }
> >
> >
> > static class IndexTestDocument {
> >
> > static public Document createDocument( File f )
> > throws FileNotFoundException {
> > Document doc = new Document();
> > doc.add( Field.Text( "path", normalizePath(
> > f.getPath() ) ) );
> > Reader reader = new BufferedReader( new
> InputStreamReader(
> > new FileInputStream( f ) ) );
> > doc.add( Field.Text( "contents", reader ) );
> > return doc;
> > }
> >
> >
> > static public String getPath( Document doc ) {
> > return ( String ) doc.get( "path" );
> > }
> >
> > static public String normalizePath( String path ) {
> > if ( path == null || path.length() == 0 ) {
> > return "";
> > }
> > path = path.replace( '\\', '/' );
> > File f = new File( path );
> > if ( f.isDirectory() ) {
> > if ( path.charAt( path.length() - 1 ) != '/' ) {
> > path = path + "/";
> > }
> > }
> > return path;
> > }
> > }
> > }
> >
> >
> >
> > --
> > Center for Agile Technology phone: 512.232.4399
> > The University of Texas at Austin fax: 512.232.6413
> > 3925 West Braker Lane email: rick@cat.utexas.edu
> > MCC Suite 3.11040 CAT http://cat.utexas.edu/
> > Austin, TX 78759-5316
> >
> >
> > --
> > To unsubscribe, e-mail:
> > <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> > For
> > additional commands,
> > e-mail: <mailto:lucene-user-help@jakarta.apache.org>
> >
> >
> >
> > --
> > To unsubscribe, e-mail:
> > <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> > For
> > additional commands,
> > e-mail: <mailto:lucene-user-help@jakarta.apache.org>
> >
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <mailto:lucene-user-help@jakarta.apache.org>
>


--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>