Hi,
I am using Apache Lucene 8.5.0 version
I have written a simple program to create index of words with special
character.
Example I have indexed the word *temp/hello*
Now I want to search the word with wildcard query *te*/hello*
I get the error* : *Exception in thread "main"
*org.apache.lucene.queryparser.classic.ParseException*: Cannot parse
'te*/hello': Lexical error at line 1, column 10. Encountered: <EOF> after
: "/hello"
When I escape the query with QueryParser.escape method it doesn’t yield any
results when special characters are part of query
*Can someone suggest the right way for indexing and searching words with
special characters?*
Here’s my simple program
*import* java.io.BufferedReader;
*import* java.io.IOException;
*import* java.io.InputStreamReader;
*import* java.nio.file.Paths;
*import* org.apache.lucene.analysis.Analyzer;
*import* org.apache.lucene.analysis.custom.CustomAnalyzer;
*import** org.apache.lucene.analysis.standard.StandardAnalyzer;*
*import* org.apache.lucene.document.Document;
*import* org.apache.lucene.document.Field;
*import* org.apache.lucene.document.TextField;
*import* org.apache.lucene.index.DirectoryReader;
*import* org.apache.lucene.index.IndexReader;
*import* org.apache.lucene.index.IndexWriter;
*import* org.apache.lucene.index.IndexWriterConfig;
*import* org.apache.lucene.index.IndexWriterConfig.OpenMode;
*import* org.apache.lucene.queryparser.classic.ParseException;
*import* org.apache.lucene.queryparser.classic.QueryParser;
*import* org.apache.lucene.search.IndexSearcher;
*import* org.apache.lucene.search.Query;
*import* org.apache.lucene.search.ScoreDoc;
*import* org.apache.lucene.search.TopDocs;
*import* org.apache.lucene.store.Directory;
*import* org.apache.lucene.store.MMapDirectory;
*public* *class* HelloLucene {
*private* *static* Analyzer buildAnalyzer() *throws* IOException {
*return* CustomAnalyzer.*builder*()
.withTokenizer("keyWord")
.addTokenFilter("lowercase")
.build();
}
*public* *static* *void* main(String[] args) *throws* IOException,
ParseException {
Analyzer analyzer = *buildAnalyzer*();
// 1. create the index
Directory index = *new* MMapDirectory(Paths.*get*("c:\\temp\\index"
));
IndexWriterConfig config = *new* IndexWriterConfig(analyzer);
String indexType = "create";
*if* ("create".equals(indexType)) {
config.setOpenMode(OpenMode.*CREATE*);
} *else* {
config.setOpenMode(OpenMode.*CREATE_OR_APPEND*);
}
IndexWriter w = *new* IndexWriter(index, config);
*long* start = System.*currentTimeMillis*();
*addDoc*(w, "Temp/Hello", "Artifact");
*long* end = System.*currentTimeMillis*();
w.close();
*for* (*int* i = 0; i < 100; i++) {
// 2. query
BufferedReader input = *new* BufferedReader(*new*
InputStreamReader(System.*in*));
String query = input.readLine();
//Prefix Search
QueryParser queryParser = *new* QueryParser("Name",analyzer);
queryParser.setAllowLeadingWildcard(*true*);
Query q = queryParser.parse(QueryParser.*escape*(query));
// 3. search
*int* hitsPerPage = 10;
IndexReader reader = DirectoryReader.*open*(index);
IndexSearcher searcher = *new* IndexSearcher(reader);
TopDocs docs = searcher.search(q, hitsPerPage);
ScoreDoc[] hits = docs.scoreDocs;
// 4. display results
*System.**out*.println("Found " + hits.length + " hits.");
*for* (*int* j = 0; j < hits.length; ++j) {
*int* docId = hits[j].doc;
Document d = searcher.doc(docId);
*System.**out*.println((j + 1) + ". " + d.get("Name") + "\t"
+ d.get("Type"));
}
reader.close();
}
}
*private* *static* *void* addDoc(IndexWriter w, String name, String type)
*throws* IOException {
Document doc = *new* Document();
doc.add(*new* TextField("Name", name, Field.Store.*YES*));
// use a string field for *isbn* because we don't want it
*tokenized*
doc.add(*new* TextField("Type", type, Field.Store.*YES*));
w.addDocument(doc);
}
}
I am using Apache Lucene 8.5.0 version
I have written a simple program to create index of words with special
character.
Example I have indexed the word *temp/hello*
Now I want to search the word with wildcard query *te*/hello*
I get the error* : *Exception in thread "main"
*org.apache.lucene.queryparser.classic.ParseException*: Cannot parse
'te*/hello': Lexical error at line 1, column 10. Encountered: <EOF> after
: "/hello"
When I escape the query with QueryParser.escape method it doesn’t yield any
results when special characters are part of query
*Can someone suggest the right way for indexing and searching words with
special characters?*
Here’s my simple program
*import* java.io.BufferedReader;
*import* java.io.IOException;
*import* java.io.InputStreamReader;
*import* java.nio.file.Paths;
*import* org.apache.lucene.analysis.Analyzer;
*import* org.apache.lucene.analysis.custom.CustomAnalyzer;
*import** org.apache.lucene.analysis.standard.StandardAnalyzer;*
*import* org.apache.lucene.document.Document;
*import* org.apache.lucene.document.Field;
*import* org.apache.lucene.document.TextField;
*import* org.apache.lucene.index.DirectoryReader;
*import* org.apache.lucene.index.IndexReader;
*import* org.apache.lucene.index.IndexWriter;
*import* org.apache.lucene.index.IndexWriterConfig;
*import* org.apache.lucene.index.IndexWriterConfig.OpenMode;
*import* org.apache.lucene.queryparser.classic.ParseException;
*import* org.apache.lucene.queryparser.classic.QueryParser;
*import* org.apache.lucene.search.IndexSearcher;
*import* org.apache.lucene.search.Query;
*import* org.apache.lucene.search.ScoreDoc;
*import* org.apache.lucene.search.TopDocs;
*import* org.apache.lucene.store.Directory;
*import* org.apache.lucene.store.MMapDirectory;
*public* *class* HelloLucene {
*private* *static* Analyzer buildAnalyzer() *throws* IOException {
*return* CustomAnalyzer.*builder*()
.withTokenizer("keyWord")
.addTokenFilter("lowercase")
.build();
}
*public* *static* *void* main(String[] args) *throws* IOException,
ParseException {
Analyzer analyzer = *buildAnalyzer*();
// 1. create the index
Directory index = *new* MMapDirectory(Paths.*get*("c:\\temp\\index"
));
IndexWriterConfig config = *new* IndexWriterConfig(analyzer);
String indexType = "create";
*if* ("create".equals(indexType)) {
config.setOpenMode(OpenMode.*CREATE*);
} *else* {
config.setOpenMode(OpenMode.*CREATE_OR_APPEND*);
}
IndexWriter w = *new* IndexWriter(index, config);
*long* start = System.*currentTimeMillis*();
*addDoc*(w, "Temp/Hello", "Artifact");
*long* end = System.*currentTimeMillis*();
w.close();
*for* (*int* i = 0; i < 100; i++) {
// 2. query
BufferedReader input = *new* BufferedReader(*new*
InputStreamReader(System.*in*));
String query = input.readLine();
//Prefix Search
QueryParser queryParser = *new* QueryParser("Name",analyzer);
queryParser.setAllowLeadingWildcard(*true*);
Query q = queryParser.parse(QueryParser.*escape*(query));
// 3. search
*int* hitsPerPage = 10;
IndexReader reader = DirectoryReader.*open*(index);
IndexSearcher searcher = *new* IndexSearcher(reader);
TopDocs docs = searcher.search(q, hitsPerPage);
ScoreDoc[] hits = docs.scoreDocs;
// 4. display results
*System.**out*.println("Found " + hits.length + " hits.");
*for* (*int* j = 0; j < hits.length; ++j) {
*int* docId = hits[j].doc;
Document d = searcher.doc(docId);
*System.**out*.println((j + 1) + ". " + d.get("Name") + "\t"
+ d.get("Type"));
}
reader.close();
}
}
*private* *static* *void* addDoc(IndexWriter w, String name, String type)
*throws* IOException {
Document doc = *new* Document();
doc.add(*new* TextField("Name", name, Field.Store.*YES*));
// use a string field for *isbn* because we don't want it
*tokenized*
doc.add(*new* TextField("Type", type, Field.Store.*YES*));
w.addDocument(doc);
}
}