@echo off
set JAVA_HEAP_MAX="-Xmx512M"
if not "%1"=="" goto INIT else goto echoMSG
:echoMSG
echo Title: Nutch 运行脚本
echo Author:wolf mail:wangxuliangboy@163.com QQ:39278621
echo Nutch Version: 0.9
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo crawl one-step crawler for intranets
echo inject inject new urls into the database
echo generate generate new segments to fetch
echo fetchlist print the fetchlist of a segment
echo fetch fetch a segment's pages
echo parse parse a segment's pages
echo index run the indexer on a segment's fetcher output
echo merge merge several segment indexes
echo dedup remove duplicates from a set of segment indexes
echo updatedb update db from segments after fetching
echo updatesegs update segments with link data from the db
echo mergesegs merge multiple segments into a single segment
echo analyze adjust database link-analysis scoring
echo segread read, fix and dump segment data
echo segslice append, join and slice segment data
echo server run a search server
echo namenode run the NDFS namenode
echo datanode run an NDFS datanode
echo ndfs run an NDFS admin client
echo jobtracker run the MapReduce job Tracker node
echo tasktracker run a MapReduce task Tracker node
echo or
echo CLASSNAME run the class named CLASSNAME
echo Most commands print help when invoked w/o parameters.
goto end;
:INIT
set NUTCH_HOME=%NUTCH_HOME%
if "%NUTCH_HOME%"=="" echo NUTCH_HOME IN PATH ONT FOUND
set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%\conf;%NUTCH_HOME%\plugin;%NUTCH_HOME%\lib
@echo @echo off>setclasspath.bat
for %%i in (%NUTCH_HOME%\nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%\lib\*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1" == "inject" set CLASS=org.apache.nutch.crawl.Injector
if "%1" == "generate" set CLASS=org.apache.nutch.crawl.Generator
if "%1" == "fetchlist" set CLASS=org.apache.nutch.pagedb.FetchListEntry
if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "fetch2" set CLASS=org.apache.nutch.fetcher.Fetcher2
if "%1" == "convdb" set CLASS=org.apache.nutch.tools.compat.CrawlDbConverter
if "%1" == "parse" set CLASS=org.apache.nutch.parse.ParseSegment
if "%1" == "index" set CLASS=org.apache.nutch.indexer.Indexer
if "%1" == "merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "dedup" set CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "updatedb" set CLASS=org.apache.nutch.crawl.CrawlDb
if "%1" == "mergesegs" set CLASS=org.apache.nutch.segment.SegmentMerger
if "%1" == "readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if "%1" == "segread" echo "[DEPRECATED] Command 'segread' is deprecated, use 'readseg' instead." set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "server" set CLASS=org.apache.nutch.searcher.DistributedSearch$Server
echo %CLASSPATH%
call "%JAVA_HOME%\bin\java" %JAVA_HEAP_MAX% -classpath "%CLASSPATH%" %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end
如是有报错的话,你检查NUTCH配置文件的路径对不对..NUTCH是通过Configuration.class.getResource("")(当前类路径)来读取..