Mailing List Archive

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM run.sh
otis 2002/09/14 11:51:49

Modified: contributions/webcrawler-LARM run.sh
Log:
- Modified to make it usable. This way we don't have to use Ant to run LARM.

Revision Changes Path
1.3 +40 -3 jakarta-lucene-sandbox/contributions/webcrawler-LARM/run.sh

Index: run.sh
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/run.sh,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- run.sh 22 May 2002 23:09:16 -0000 1.2
+++ run.sh 14 Sep 2002 18:51:49 -0000 1.3
@@ -1,4 +1,41 @@
#!/bin/sh
-rm -r logs
-mkdir logs
-java -server -Xmx400mb -classpath classes:libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://your.server.here/ -restrictto http://[^/]*\.your\.server\.here.* -threads 15
+
+#
+# $Id$
+#
+
+BASE_DIR=./runtime
+LOG_DIR=$BASE_DIR/logs
+CACHE_DIR=$BASE_DIR/cachingqueue
+CLASSPATH=build/classes:libs/jakarta-oro-2.0.5.jar:libs/HTTPClient.zip:/usr/local/jakarta-lucene/lucene.jar
+SLEEP_TIME=2
+
+if [ $# -lt 4 ]
+then
+ echo "Usage: `basename $0` <start url> <score regex> <# threads> <max mem>" >&2
+ exit 1
+fi
+
+START_URL=$1
+SCOPE_REGEX=$2
+THREAD_COUNT=$3
+MAX_MEM=$4
+
+
+echo Removing $LOG_DIR...
+sleep $SLEEP_TIME
+rm -r $LOG_DIR
+echo Removing $CACHE_DIR...
+sleep $SLEEP_TIME
+rm -r $CACHE_DIR
+echo Creating $LOG_DIR
+sleep $SLEEP_TIME
+mkdir -p $LOG_DIR
+echo Creating $CACHE_DIR
+sleep $SLEEP_TIME
+mkdir -p $CACHE_DIR
+
+CMD="java -server -Xmx$MAX_MEM -classpath $CLASSPATH de.lanlab.larm.fetcher.FetcherMain -start $START_URL -restrictto $SCOPE_REGEX -threads $THREAD_COUNT"
+echo Starting LARM with: $CMD
+
+$CMD




--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>