注意脚本放在heritrix工程目录下面:
1、heritrix的Windows启动脚本heritrix.bat,其内容如下:
java -classpath ".;lib/*;" -Xmx1024m org.archive.crawler.Heritrix -b 192.168.1.28
pause
2、heritrix的Linux启动脚本heritrix.sh,其内容如下:
export BASE=/opt/heritrix
export LIB_BASE=$BASE/lib
export CP=$CP:$LIB_BASE/ant-1.6.2.jar
export CP=$CP:$LIB_BASE/bsh-2.0b4.jar
export CP=$CP:$LIB_BASE/commons-cli-1.0.jar
export CP=$CP:$LIB_BASE/commons-codec-1.3.jar
export CP=$CP:$LIB_BASE/commons-collections-3.1.jar
export CP=$CP:$LIB_BASE/commons-httpclient-3.1.jar
export CP=$CP:$LIB_BASE/commons-io-1.3.1.jar
export CP=$CP:$LIB_BASE/commons-lang-2.3.jar
export CP=$CP:$LIB_BASE/commons-logging-1.0.4.jar
export CP=$CP:$LIB_BASE/commons-logging.jar
export CP=$CP:$LIB_BASE/commons-net-1.4.1.jar
export CP=$CP:$LIB_BASE/commons-pool-1.3.jar
export CP=$CP:$LIB_BASE/dnsjava-2.0.3.jar
export CP=$CP:$LIB_BASE/fastutil-5.0.3-heritrix-subset-1.0.jar
export CP=$CP:$LIB_BASE/htmlparser.jar
export CP=$CP:$LIB_BASE/itext-1.2.0.jar
export CP=$CP:$LIB_BASE/jasper-compiler-tomcat-4.1.30.jar
export CP=$CP:$LIB_BASE/jasper-runtime-tomcat-4.1.30.jar
export CP=$CP:$LIB_BASE/javaswf-CVS-SNAPSHOT-1.jar
export CP=$CP:$LIB_BASE/jdom.jar
export CP=$CP:$LIB_BASE/je-3.2.74.jar
export CP=$CP:$LIB_BASE/jericho-html-2.3.jar
export CP=$CP:$LIB_BASE/jericho-html-2.5.jar
export CP=$CP:$LIB_BASE/jets3t-0.5.0.jar
export CP=$CP:$LIB_BASE/jetty-4.2.23.jar
export CP=$CP:$LIB_BASE/junit-3.8.2.jar
export CP=$CP:$LIB_BASE/libidn-0.5.9.jar
export CP=$CP:$LIB_BASE/log4j-1.2.8.jar
export CP=$CP:$LIB_BASE/lucene-1104.jar
export CP=$CP:$LIB_BASE/mg4j-1.0.1.jar
export CP=$CP:$LIB_BASE/mysql-connector-java-3.0.17-ga-bin.jar
export CP=$CP:$LIB_BASE/mysql-connector-java-5.0.0-beta-bin.jar
export CP=$CP:$LIB_BASE/poi-2.0-RC1-20031102.jar
export CP=$CP:$LIB_BASE/poi-scratchpad-2.0-RC1-20031102.jar
export CP=$CP:$LIB_BASE/servlet-tomcat-4.1.30.jar
export CP=$CP:$LIB_BASE/swt.jar
java -Xms128m -Xmx1024m -classpath ".:$CP" org.archive.crawler.Heritrix -b 192.168.1.28
(注意: -b 192.168.1.28的作用是设置heritrix的访问ip,否则默认的只能以localhost的方式访问heritrix的UI界面)