从这里转载的免cygwin命令行:http://hi.baidu.com/haojielyb/blog/item/778318306ecb809da8018ecb.html
多谢这位大牛了。写得很好。但在nutch 1.0中测试无效,错不在这些bat,在Hadoop。
nutch window 版 bat 命令 免 cygwin 中文
0.7.2=============
@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else goto echoMSG
:echoMSG
echo 李永波制作
echo Nutch Version: 0.7.2
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo crawl one-step crawler for intranets 一步式抓取企业内部网
echo admin database administration, including creation 数据库管理(包括建立)
echo inject inject new urls into the database 添加新的 网站url 到数据库
echo generate generate new segments to fetch 生成新的获取数据
echo fetchlist print the fetchlist of a segment 打印数据的存取列表
echo fetch fetch a segment's pages 存取数据段的页
echo parse parse a segment's pages 解析数据段的页
echo index run the indexer on a segment's fetcher output 在段的存取输出上运行索引
echo merge merge several segment indexes 合并几个数据段的索引
echo dedup remove duplicates from a set of segment indexes 从数据段集删除重复的索引
echo updatedb update db from segments after fetching 在获取后从数据段中更新数据库
echo updatesegs update segments with link data from the db 从数据库中更新数据段和链接数据
echo mergesegs merge multiple segments into a single segment 合并多重数据段成一个单一的部分
echo readdb examine arbitrary fields of the database 审查数据库任意字段
echo analyze adjust database link-analysis scoring 调整数据库连接,分析得分
echo prune prune segment index(es) of unwanted content 修剪部分索引不想要的内容
echo segread read, fix and dump segment data 阅读,修理和丢弃数据段
echo segslice append, join and slice segment data 附录,加入和切片部分数据
echo server run a search server 运行搜索的服务器
echo namenode run the NDFS namenode 运行 NDFS 名称 节点
echo datanode run an NDFS datanode 运行ndfs 数据节点
echo ndfs run an NDFS admin client 运行NDFS 管理节点
echo jobtracker run the MapReduce job Tracker node 运行mapreduce作业跟踪节点
echo tasktracker run a MapReduce task Tracker node 运行mapreduce任务跟踪节点
echo or
echo CLASSNAME run the class named CLASSNAME 运行指定类名的类
echo Most commands print help when invoked w/o parameters. 当参数错误时命令会打印帮助
goto end;
:INIT
set NUTCH_HOME=C:/work/nutch-0.7.2
if "%NUTCH_HOME%"=="" set NUTCH_HOME=..
set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%/lib/*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set CLASS=org.apache.nutch.tools.CrawlTool
if "%1" == "admin" set CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set CLASS=org.apache.nutch.db.WebDBInjector
if "%1" == "generate" set CLASS=org.apache.nutch.tools.FetchListTool
if "%1" == "fetchlist" set CLASS=org.apache.nutch.pagedb.FetchListEntry
if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set CLASS=org.apache.nutch.tools.ParseSegment
if "%1" == "index" set CLASS=org.apache.nutch.indexer.IndexSegment
if "%1" == "merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "dedup" set CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "updatedb" set CLASS=org.apache.nutch.tools.UpdateDatabaseTool
if "%1" == "updatesegs" set CLASS=org.apache.nutch.tools.UpdateSegmentsFromDb
if "%1" == "mergesegs" set CLASS=org.apache.nutch.tools.SegmentMergeTool
if "%1" == "readdb" set CLASS=org.apache.nutch.db.WebDBReader
if "%1" == "prune" set CLASS=org.apache.nutch.tools.PruneIndexTool
if "%1" == "segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segslice" set CLASS=org.apache.nutch.segment.SegmentSlicer
if "%1" == "analyze" set CLASS=org.apache.nutch.tools.LinkAnalysisTool
if "%1" == "server" set CLASS=org.apache.nutch.searcher.DistributedSearch$Server
if "%1" == "namenode" set CLASS=org.apache.nutch.ndfs.NDFS$NameNode
if "%1" == "datanode" set CLASS=org.apache.nutch.ndfs.NDFS$DataNode
if "%1" == "ndfs" set CLASS=org.apache.nutch.fs.TestClient
if "%1" == "jobtracker" set CLASS=org.apache.nutch.mapReduce.JobTracker
if "%1" == "tasktracker" set CLASS=org.apache.nutch.mapReduce.TaskTracker
call java %JAVA_HEAP_MAX% -classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end
0.8.1 or 0.9
=============
@echo off
set JAVA_HEAP_MAX=-Xmx512M
if not "%1"=="" goto INIT else goto echoMSG
:echoMSG
echo Title:欢迎使用北京线点科技 Nutch 运行脚本
echo Author:jaddy0302 mail:jaddy0302@126.com QQ:5622928
echo Site:http://www.xd-tech.com.cn 线点科技 专业垂直搜索引擎产品
echo Nutch Version: 0.7.2
echo Usage: nutch COMMAND
echo where COMMAND is one of:
echo crawl one-step crawler for intranets 一步式抓取企业内部网
echo inject inject new urls into the database 添加新的 网站url 到数据库
echo generate generate new segments to fetch 生成新的获取数据
echo fetch fetch a segment's pages 存取数据段的页
echo parse parse a segment's pages 解析数据段的页
ECHO mergedb merge crawldb-s, with optional filtering 合并几个数据段的索引
echo dedup remove duplicates from a set of segment indexes 从数据段集删除重复的索引
echo updatedb update db from segments after fetching 在获取后从数据段中更新数据库
echo updatesegs update segments with link data from the db 从数据库中更新数据段和链接数据
echo mergesegs merge multiple segments into a single segment 合并多重数据段成一个单一的部分
echo readdb examine arbitrary fields of the database 审查数据库任意字段
echo segread read, fix and dump segment data 阅读,修理和丢弃数据段
echo readlinkdb read / dump link db 从数据段集删除重复的索引
echo readseg read / dump segment data
echo invertlinks create a linkdb from parsed segments
echo mergelinkdb merge( 合并) linkdb-s, with optional filtering
echo index run the indexer on parsed segments and linkdb
echo merge merge several segment indexes 合并几个数据段的索引
echo segslice append, join and slice segment data 附录,加入和切片部分数据
echo plugin load a plugin and run one of its classes main() 加载 插件
echo server run a search server 运行搜索的服务器
echo or
echo CLASSNAME run the class named CLASSNAME 运行指定类名的类
echo Most commands print help when invoked w/o parameters. 当参数错误时命令会打印帮助
goto end;
:INIT
ECHO 请注意修改此处 set NUTCH_HOME=e:/cyg/Nutch
set NUTCH_HOME=e:/cyg/Nutch
if "%NUTCH_HOME%"=="" set NUTCH_HOME=..
set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%/conf;%NUTCH_HOME%/plugin
@echo @echo off>setclasspath.bat
for %%i in (%NUTCH_HOME%/nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%/lib/*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;
goto EXEC
:EXEC
call setclasspath
if "%1" == "crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1" == "admin" set CLASS=org.apache.nutch.tools.WebDBAdminTool
if "%1" == "inject" set CLASSorg.apache.nutch.crawl.Injector
if "%1" == "generate" set CLASS=org.apache.nutch.crawl.Generator
if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1" == "parse" set CLASS=org.apache.nutch.parse.ParseSegment
if "%1" == "readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if "%1"=="mergedb" set CLASS= org.apache.nutch.crawl.CrawlDbMerger
if "%1" == "readlinkdb" set CLASS=org.apache.nutch.crawl.LinkDbReader
if "%1" == "readseg" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1" == "segread" echo [DEPRECATED] Command [segread] is deprecated, use [readseg] instead.
if "%1" == "mergesegs" set CLASS=org.apache.nutch.segment.SegmentMerger
if "%1" == "updatedb" set CLASS=org.apache.nutch.crawl.CrawlDb
if "%1" == "invertlinks" set CLASS=org.apache.nutch.crawl.LinkDb
if "%1" == "mergelinkdb" set CLASS=org.apache.nutch.crawl.LinkDbMerger
if "%1" == "index" set CLASS=org.apache.nutch.indexer.Indexer
if "%1" == "dedup" set CLASS=org.apache.nutch.indexer.DeleteDuplicates
if "%1" == "merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1" == "plugin" set CLASS=org.apache.nutch.plugin.PluginRepository
if "%1" == "server" set CLASS=org.apache.nutch.searcher.DistributedSearch$Server
ECHO java %JAVA_HEAP_MAX% -classpath %%CLASSPATH%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
call java %JAVA_HEAP_MAX% -classpath %%classpath%% %CLASS% %2 %3 %4 %5 %6 %7 %8 %9
:end
将以拷贝存为Bat 即可 注意是两个版本的