nutch + hadoop + zookeeper + hbase, linux脚本


vi /etc/sysconfig/network-scripts/ifcfg-eth0

service network restart


bin/hadoop-daemon.sh start datanode


sudo ufw disable

chmod 777 bin/*

hadoop fs -put urls urls

cd /home/nutch_mysql/runtime/deploy/
cd /home/nutch_mysql/runtime/local/
cd /home/nutch_hbase/runtime/local/
cd /home/nutch_hbase/runtime/deploy/

bin/hadoop jar /home/nutch_hbase/runtime/deploy/Nutch-2.0-dev.job org.apache.nutch.crawl.Crawl crawl urls -depth 1

hadoop jar /home/nutch_hbase/runtime/deploy/Nutch-2.0-dev.job org.apache.nutch.crawl.Crawl crawl urls -depth 1


bin/nutch crawl urls -depth 5 -topN 5 -threads 4
bin/nutch generate
bin/nutch inject urls

bin/hadoop dfsadmin -report



/usr/java/jdk1.7.0_03


http://10.10.10.17:60010/master.jsp


bin/hadoop namenode -format

cd /home/hadoop*
bin/start-all.sh
bin/stop-all.sh

关闭安全模式
bin/hadoop dfsadmin -safemode leave

cd /home/zook*
bin/zkServer.sh start
bin/zkServer.sh stop

cd /home/hbase*
bin/start-hbase.sh

bin/hbase shell

bin/hbase rest start -p 8000

bin/stop-hbase.sh


reboot


disable 'webpage_tianya'
drop 'webpage_tianya'

disable 'webpage'
drop 'webpage'

bin/hadoop fs -put urls urls

bin/hadoop jar nutch-2.0.job org.apache.nutch.crawl.Crawl urls -dir
> crawl -depth 3 -topN 50


http://10.10.10.17:50030/jobtracker.jsp
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值