Mailing List Archive

cvs commit: jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler FileContentHandler.java FileContentHandlerAdapter.java FileContentHandlerFactory.java GZipHandler.java NestedFileContentHandlerAdapter.java NullHandler.java TARHandler.java T
kelvint 02/05/08 08:52:38

Modified: projects/appex/src/java/search/contenthandler
FileContentHandler.java
FileContentHandlerAdapter.java
FileContentHandlerFactory.java GZipHandler.java
NestedFileContentHandlerAdapter.java
NullHandler.java TARHandler.java TextHandler.java
ZIPHandler.java
Log:
Importing the classes seem to have warped the whitespaces. Here's my attempt to get things back to normal.

Introduced new datasource and contenthandler mechanism. It's quite a major alteration for individual changes to be enumerated.

Revision Changes Path
1.2 +88 -86 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandler.java

Index: FileContentHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- FileContentHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ FileContentHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,86 +1,88 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.util.List;
-
-/**
- * A content handler determines how to index a file's contents.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public interface FileContentHandler
-{
- /**
- * Perform filetype-specific actions to index the file's contents and
- * add it to the {@link org.apache.lucene.document.Document} object.
- */
- public void parse(Document doc, File f);
-
- /**
- * Is this a collection of files?
- */
- public boolean isNested();
-
- /**
- * Return the collection of files contained within the parent file.
- */
- public List getNestedData();
-
- public Object clone();
-}
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * A content handler determines how to index a file's contents.
+ */
+public interface FileContentHandler
+{
+ /**
+ * Do the file contents of this file have any meaning? Should
+ * its contents be indexed?
+ */
+ public boolean fileContentIsReadable();
+
+ /**
+ * Returns a reader for this file's contents.
+ */
+ public Reader getReader();
+
+ /**
+ * Does this file have nested data within?
+ */
+ public boolean containsNestedData();
+
+ /**
+ * Return the datasources contained within the parent file.
+ * This can be URLs contained within a HTML file, files
+ * within a ZIP file, basically anything represented by a
+ * DataSource.
+ */
+ public List getNestedDataSource();
+}



1.2 +88 -81 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerAdapter.java

Index: FileContentHandlerAdapter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerAdapter.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- FileContentHandlerAdapter.java 4 May 2002 15:43:46 -0000 1.1
+++ FileContentHandlerAdapter.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,81 +1,88 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.util.List;
-
-/**
- * A no-op implementation to make FileContentHandler creation easier.
- * <p>
- * Classes which need to implement the FileContentHandler interface should
- * extend this class or {@link NestedFileContentHandlerAdapter}.
- * </p>
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public abstract class FileContentHandlerAdapter implements FileContentHandler
-{
- public void parse(Document doc, File f)
- {
- }
- public List getNestedData()
- {
- return null;
- }
- public abstract Object clone();
-}
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.File;
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * A no-op implementation to make FileContentHandler creation easier.
+ * <p>
+ * Classes which need to implement the FileContentHandler interface should
+ * extend this class or {@link NestedFileContentHandlerAdapter}.
+ * </p>
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public abstract class FileContentHandlerAdapter implements FileContentHandler
+{
+ protected File file;
+
+ protected FileContentHandlerAdapter(File file)
+ {
+ this.file = file;
+ }
+
+ public Reader getReader()
+ {
+ return null;
+ }
+
+ public List getNestedDataSource()
+ {
+ return null;
+ }
+}



1.2 +179 -85 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerFactory.java

Index: FileContentHandlerFactory.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/FileContentHandlerFactory.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- FileContentHandlerFactory.java 4 May 2002 15:43:46 -0000 1.1
+++ FileContentHandlerFactory.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,85 +1,179 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.log4j.Category;
-
-import java.util.Map;
-
-/**
- * Factory responsible for obtaining ContentHandlers.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public abstract class ContentHandlerFactory
-{
- public static final String DEFAULT_HANDLER_KEY = "DEFAULT";
- static Category cat = Category.getInstance(ContentHandlerFactory.class.getName());
- private static Map handlerCache = null;
- public static FileContentHandler getContentHandler(String extension)
- {
- if (handlerCache.containsKey(extension))
- return (FileContentHandler) ((FileContentHandler) handlerCache.get(extension)).clone();
- else if (handlerCache.containsKey(DEFAULT_HANDLER_KEY))
- return (FileContentHandler) ((FileContentHandler) handlerCache.get(DEFAULT_HANDLER_KEY)).clone();
- else
- return NullHandler.getInstance();
- }
-
- public static void setContentHandlers(Map contentHandlers)
- {
- handlerCache = contentHandlers;
- }
-}
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.log4j.Category;
+
+import java.util.Map;
+import java.io.File;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Constructor;
+
+import search.util.IOUtils;
+
+/**
+ * Factory responsible for obtaining ContentHandlers.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public abstract class FileContentHandlerFactory
+{
+ public static final String DEFAULT_HANDLER_KEY = "DEFAULT";
+ static Category cat = Category.getInstance(FileContentHandlerFactory.class.getName());
+ private static Map handlerRegistry;
+
+ public static FileContentHandler getContentHandler(File f)
+ {
+ String extension = IOUtils.getFileExtension(f);
+ if (handlerRegistry.containsKey(extension))
+ {
+ String handlerClassname = (String) handlerRegistry.get(extension);
+ return (FileContentHandler) generateObject(handlerClassname,
+ new Class[]{File.class},
+ new Object[]{f});
+ }
+ else if (handlerRegistry.containsKey(DEFAULT_HANDLER_KEY))
+ {
+ String handlerClassname = (String) handlerRegistry.get(DEFAULT_HANDLER_KEY);
+ return (FileContentHandler) generateObject(handlerClassname);
+ }
+ else
+ {
+ return NullHandler.getInstance();
+ }
+ }
+
+ public static void setHandlerRegistry(Map handlerRegistry)
+ {
+ FileContentHandlerFactory.handlerRegistry = handlerRegistry;
+ }
+
+ /**
+ * Utility method to return an object based on its class name.
+ * The object needs to have a constructor which accepts no parameters.
+ *
+ * @param className Class name of object to be generated
+ * @return Object
+ */
+ private static Object generateObject(String className)
+ {
+ Object o = null;
+ try
+ {
+ Class c = Class.forName(className);
+ o = c.newInstance();
+ }
+ catch (ClassNotFoundException cnfe)
+ {
+ cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
+ }
+ catch (InstantiationException ie)
+ {
+ cat.error(ie.getMessage() + " Class named '" + className + "' could not be instantiated.", ie);
+ }
+ catch (IllegalAccessException iae)
+ {
+ cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
+ }
+ return o;
+ }
+
+ /**
+ * Utility method to return an object based on its class name.
+ *
+ * @param type Class name of object to be generated
+ * @param clazz Class array of parameters.
+ * @param args Object array of arguments.
+ * @return Object
+ */
+ private static Object generateObject(String className,
+ Class[] clazz,
+ Object[] args)
+ {
+ Object o = null;
+ try
+ {
+ Class c = Class.forName(className);
+ Constructor con = c.getConstructor(clazz);
+ if (con != null)
+ {
+ o = con.newInstance(args);
+ }
+ else
+ throw new InstantiationException("Constructor with arguments:" + clazz.toString() + " non-existent.");
+ }
+ catch (ClassNotFoundException cnfe)
+ {
+ cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
+ }
+ catch (InstantiationException ie)
+ {
+ cat.error(ie.getMessage() + " Class named '" + className + "' could not be instantiated.", ie);
+ }
+ catch (IllegalAccessException iae)
+ {
+ cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
+ }
+ catch (NoSuchMethodException nsme)
+ {
+ cat.error(nsme.getMessage() + " No method in class named '" + className + "'.", nsme);
+ }
+ catch (InvocationTargetException ite)
+ {
+ cat.error(ite.getMessage() + " in class named '" + className + "'.", ite);
+ }
+ return o;
+ }
+}



1.2 +129 -123 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/GZipHandler.java

Index: GZipHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/GZipHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- GZipHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ GZipHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,124 +1,130 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.log4j.Category;
-import org.apache.lucene.document.DateField;
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import search.util.IOUtils;
-
-/**
- * Handles GZip content.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public class GZipHandler extends NestedFileContentHandlerAdapter
-{
- static Category cat = Category.getInstance(GZipHandler.class.getName());
-
- public void parse(Document doc, File f)
- {
- if (!f.exists())
- return;
- try
- {
- File tempDir = new File(TEMP_FOLDER);
- tempDir.mkdirs();
- tempDir.deleteOnExit();
- String filename = f.getName();
- File tempFile = new File(tempDir, filename.substring(0, filename.lastIndexOf(".")));
- tempFile.deleteOnExit();
- IOUtils.extractGZip(f, tempFile);
- indexGZipDirectory(tempDir, dataMapList);
- }
- catch (IOException ioe)
- {
- cat.error("IOException ungzipping " + f.toString(), ioe);
- }
- }
-
- // only one file, but let's just treat it like a directory anyway
- private void indexGZipDirectory(File dir, List dataMapList)
- {
- if (dir.isDirectory())
- {
- File[] dirContents = dir.listFiles();
- for (int i = 0; i < dirContents.length; i++)
- {
- indexGZipDirectory(dirContents[i], dataMapList);
- }
- }
- else if (dir.isFile())
- {
- // here create new DataMap for the gzip entry
- Map dataMap = new HashMap();
- dataMap.put("filePath", dir.toString());
- dataMapList.add(dataMap);
- }
- }
-
- public Object clone()
- {
- return new GZipHandler();
- }
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.log4j.Category;
+import search.DataSource;
+import search.FSDataSource;
+import search.util.IOUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * Handles GZip content.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public class GZipHandler extends NestedFileContentHandlerAdapter
+{
+ private static Category cat = Category.getInstance(GZipHandler.class.getName());
+
+ public GZipHandler(File file)
+ {
+ super(file);
+ }
+
+ public Reader getReader()
+ {
+ return null;
+ }
+
+ public List getNestedDataSource()
+ {
+ if (!file.exists())
+ return null;
+ try
+ {
+ File tempDir = new File(TEMP_FOLDER);
+ tempDir.mkdirs();
+ tempDir.deleteOnExit();
+ String filename = file.getName();
+ File tempFile = new File(tempDir, filename.substring(0, filename.lastIndexOf(".")));
+ tempFile.deleteOnExit();
+ IOUtils.extractGZip(file, tempFile);
+ indexGZipDirectory(tempDir);
+ }
+ catch (IOException ioe)
+ {
+ cat.error("IOException ungzipping " + file.toString(), ioe);
+ }
+ return nestedDataSource;
+ }
+
+ public boolean fileContentIsReadable()
+ {
+ return false;
+ }
+
+ // only one file, but let's just treat it like a directory anyway
+ private void indexGZipDirectory(File dir)
+ {
+ if (dir.isDirectory())
+ {
+ File[] dirContents = dir.listFiles();
+ for (int i = 0; i < dirContents.length; i++)
+ {
+ indexGZipDirectory(dirContents[i]);
+ }
+ }
+ else if (dir.isFile())
+ {
+ DataSource ds = new FSDataSource(dir);
+ nestedDataSource.add(nestedDataSource);
+ }
+ }
}



1.2 +90 -92 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NestedFileContentHandlerAdapter.java

Index: NestedFileContentHandlerAdapter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NestedFileContentHandlerAdapter.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- NestedFileContentHandlerAdapter.java 4 May 2002 15:43:46 -0000 1.1
+++ NestedFileContentHandlerAdapter.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,92 +1,90 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A no-op implementation to make FileContentHandler creation easier.
- * <p>
- * Classes which need to implement the FileContentHandler interface
- * and need to handle nested content (example: zip, tar, rar, etc) should
- * extend this class.
- * </p>
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public abstract class NestedFileContentHandlerAdapter
- extends FileContentHandlerAdapter
-{
- protected final String TEMP_FOLDER = "/usr/temp" + '/'
- + Math.random() + '/';
-
- protected List dataMapList = new ArrayList();
-
- public abstract void parse(Document doc, File f);
-
- public boolean isNested()
- {
- return true;
- }
-
- public List getNestedData()
- {
- return this.dataMapList;
- }
-}
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.document.Document;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A no-op implementation to make FileContentHandler creation easier.
+ * <p>
+ * Classes which need to implement the FileContentHandler interface
+ * and need to handle nested content (example: zip, tar, rar, etc) should
+ * extend this class.
+ * </p>
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public abstract class NestedFileContentHandlerAdapter
+ extends FileContentHandlerAdapter
+{
+ protected final String TEMP_FOLDER = "/usr/temp" + '/'
+ + Math.random() + '/';
+
+ protected List nestedDataSource;
+
+ public NestedFileContentHandlerAdapter(File file)
+ {
+ super(file);
+ }
+
+ public boolean containsNestedData()
+ {
+ return true;
+ }
+}



1.2 +93 -80 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NullHandler.java

Index: NullHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/NullHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- NullHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ NullHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,80 +1,93 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-/**
- * Do-nothing content handler.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public class NullHandler extends FileContentHandlerAdapter
-{
- static NullHandler singleton = new NullHandler();
-
- public static FileContentHandler getInstance()
- {
- return singleton;
- }
-
- public Object clone()
- {
- return this;
- }
-
- public boolean isNested()
- {
- return false;
- }
-}
+package search.contenthandler;
+
+import java.io.File;
+import java.io.Reader;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/**
+ * Do-nothing content handler.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public class NullHandler extends FileContentHandlerAdapter
+{
+ private static NullHandler singleton = new NullHandler(null);
+
+ public static FileContentHandler getInstance()
+ {
+ return singleton;
+ }
+
+ private NullHandler(File file)
+ {
+ super(file);
+ }
+
+ public boolean fileContentIsReadable()
+ {
+ return false;
+ }
+
+ public Reader getReader()
+ {
+ return null;
+ }
+
+ public boolean containsNestedData()
+ {
+ return false;
+ }
+}



1.2 +130 -117 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TARHandler.java

Index: TARHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TARHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TARHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ TARHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,118 +1,131 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import search.util.IOUtils;
-import org.apache.log4j.Category;
-import org.apache.lucene.document.DateField;
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Handles Tar files.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public class TARHandler extends NestedFileContentHandlerAdapter
-{
- static Category cat = Category.getInstance(TARHandler.class.getName());
-
- public void parse(Document doc, File f)
- {
- if (!f.exists())
- return;
- try
- {
- File tempDir = new File(TEMP_FOLDER);
- tempDir.deleteOnExit();
- IOUtils.extractTar(f, tempDir);
- indexTarDirectory(tempDir, dataMapList);
- }
- catch (IOException ioe)
- {
- cat.error(ioe.getMessage(), ioe);
- }
- }
-
- private void indexTarDirectory(File dir, List dataMapList)
- {
- if (dir.isDirectory())
- {
- File[] dirContents = dir.listFiles();
- for (int i = 0; i < dirContents.length; i++)
- {
- indexTarDirectory(dirContents[i], dataMapList);
- }
- }
- else if (dir.isFile())
- {
- // here create new DataMap for the tarred file
- Map dataMap = new HashMap();
- dataMap.put("filePath", dir.toString());
- dataMapList.add(dataMap);
- }
- }
-
- public Object clone()
- {
- return new TARHandler();
- }
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.log4j.Category;
+import search.DataSource;
+import search.FSDataSource;
+import search.util.IOUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Handles Tar files.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public class TARHandler extends NestedFileContentHandlerAdapter
+{
+ static Category cat = Category.getInstance(TARHandler.class.getName());
+
+ public TARHandler(File file)
+ {
+ super(file);
+ }
+
+ public Reader getReader()
+ {
+ return null;
+ }
+
+ public boolean fileContentIsReadable()
+ {
+ return false;
+ }
+
+ public List getNestedDataSource()
+ {
+ if (!file.exists())
+ return null;
+ if (nestedDataSource == null)
+ {
+ nestedDataSource = new ArrayList();
+ }
+ try
+ {
+ File tempDir = new File(TEMP_FOLDER);
+ tempDir.deleteOnExit();
+ IOUtils.extractTar(file, tempDir);
+ indexTarDirectory(tempDir);
+ }
+ catch (IOException ioe)
+ {
+ cat.error(ioe.getMessage(), ioe);
+ }
+ return nestedDataSource;
+ }
+
+ private void indexTarDirectory(File dir)
+ {
+ if (dir.isDirectory())
+ {
+ File[] dirContents = dir.listFiles();
+ for (int i = 0; i < dirContents.length; i++)
+ {
+ indexTarDirectory(dirContents[i]);
+ }
+ }
+ else if (dir.isFile())
+ {
+ // here create new DataMap for the tarred file
+ DataSource ds = new FSDataSource(dir);
+ nestedDataSource.add(nestedDataSource);
+ }
+ }
}



1.2 +116 -121 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TextHandler.java

Index: TextHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/TextHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TextHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ TextHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,121 +1,116 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import org.apache.log4j.Category;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import java.io.*;
-
-import search.util.StringUtils;
-
-/**
- * Handles text-based content.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public class TextHandler extends FileContentHandlerAdapter
-{
- static Category cat = Category.getInstance(TextHandler.class.getName());
-
- public void parse(Document doc, File f)
- {
- if (!f.exists())
- {
- cat.error(f.toString() + " doesn't exist! Failing silently...");
- return;
- }
- doc.add(Field.Text("fileContents", getReader(f)));
- }
-
- public boolean isNested()
- {
- return false;
- }
-
- private Reader getReader(File f)
- {
- Reader reader = null;
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(f));
- String s = null;
- StringBuffer strbf = new StringBuffer();
- while ((s = br.readLine()) != null)
- {
- if (s.trim().length() > 0)
- {
- strbf.append(StringUtils.removeUnreadableCharacters(s));
- }
- }
- reader = new StringReader(strbf.toString());
- }
- catch (FileNotFoundException nfe)
- {
- cat.error("File Not Found Exception:" + f.toString(), nfe);
- }
- catch (IOException ioe)
- {
- cat.error(ioe.getMessage(), ioe);
- }
- return reader;
- }
-
- public Object clone()
- {
- return new TextHandler();
- }
-}
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.log4j.Category;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.*;
+
+import search.util.StringUtils;
+
+/**
+ * Handles text-based content.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public class TextHandler extends FileContentHandlerAdapter
+{
+ static Category cat = Category.getInstance(TextHandler.class.getName());
+
+ public TextHandler(File file)
+ {
+ super(file);
+ }
+
+ public Reader getReader()
+ {
+ if (!file.exists())
+ {
+ cat.error(file.toString() + " doesn't exist! Failing silently...");
+ return null;
+ }
+ return getReader(file);
+ }
+
+ public boolean containsNestedData()
+ {
+ return false;
+ }
+
+ public boolean fileContentIsReadable()
+ {
+ return true;
+ }
+
+ private Reader getReader(File f)
+ {
+ Reader reader = null;
+ try
+ {
+ reader = new FileReader(f);
+ }
+ catch (FileNotFoundException nfe)
+ {
+ cat.error("File Not Found Exception:" + f.toString(), nfe);
+ }
+ catch (IOException ioe)
+ {
+ cat.error(ioe.getMessage(), ioe);
+ }
+ return reader;
+ }
+}



1.2 +131 -115 jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/ZIPHandler.java

Index: ZIPHandler.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/contenthandler/ZIPHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- ZIPHandler.java 4 May 2002 15:43:46 -0000 1.1
+++ ZIPHandler.java 8 May 2002 15:52:37 -0000 1.2
@@ -1,116 +1,132 @@
-package search.contenthandler;
-
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Turbine" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Turbine", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-import search.util.IOUtils;
-import org.apache.log4j.Category;
-import org.apache.lucene.document.Document;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipException;
-import java.util.zip.ZipFile;
-
-/**
- * Handles Zip files.
- *
- * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
- */
-public class ZIPHandler extends NestedFileContentHandlerAdapter
-{
- static Category cat = Category.getInstance(ZIPHandler.class.getName());
-
- public void parse(Document doc, File f)
- {
- if (!f.exists())
- return;
- try
- {
- ZipFile zFile = new ZipFile(f);
- for (Enumeration e = zFile.entries(); e.hasMoreElements();)
- {
- ZipEntry entry = (ZipEntry) e.nextElement();
- String entryName = entry.getName();
- IOUtils.writeToTempFile(zFile.getInputStream(entry),
- TEMP_FOLDER + entryName);
- if (!entry.isDirectory())
- {
- // create a new DataMap for each zip entry
- Map dataMap = new HashMap();
- dataMap.put("filePath", TEMP_FOLDER + entryName);
- dataMapList.add(dataMap);
- }
- }
- zFile.close();
- }
- catch (ZipException ze)
- {
- cat.error("ZipException parsing zip:" + ze.getMessage(), ze);
- }
- catch (IOException ioe)
- {
- cat.error("IOException parsing zip:" + ioe.getMessage(), ioe);
- }
- }
-
- public Object clone()
- {
- return new ZIPHandler();
- }
+package search.contenthandler;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.log4j.Category;
+import search.DataSource;
+import search.FSDataSource;
+import search.util.IOUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipFile;
+
+/**
+ * Handles Zip files.
+ *
+ * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
+ */
+public class ZIPHandler extends NestedFileContentHandlerAdapter
+{
+ private static Category cat = Category.getInstance(ZIPHandler.class);
+
+ public ZIPHandler(File file)
+ {
+ super(file);
+ }
+
+ public boolean fileContentIsReadable()
+ {
+ return false;
+ }
+
+ public Reader getReader()
+ {
+ return null;
+ }
+
+ public List getNestedDataSource()
+ {
+ if (!file.exists())
+ return null;
+ if (nestedDataSource == null)
+ {
+ nestedDataSource = new ArrayList();
+ }
+ try
+ {
+ ZipFile zFile = new ZipFile(file);
+ for (Enumeration e = zFile.entries(); e.hasMoreElements();)
+ {
+ ZipEntry entry = (ZipEntry) e.nextElement();
+ String entryName = entry.getName();
+ IOUtils.writeToTempFile(zFile.getInputStream(entry),
+ TEMP_FOLDER + entryName);
+ if (!entry.isDirectory())
+ {
+ // create a new DataMap for each zip entry
+ DataSource ds = new FSDataSource(TEMP_FOLDER + entryName);
+ nestedDataSource.add(ds);
+ }
+ }
+ zFile.close();
+ }
+ catch (ZipException ze)
+ {
+ cat.error("ZipException parsing zip:" + ze.getMessage(), ze);
+ }
+ catch (IOException ioe)
+ {
+ cat.error("IOException parsing zip:" + ioe.getMessage(), ioe);
+ }
+ return nestedDataSource;
+ }
}




--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>