vi /etc/sysconfig/network-scripts/ifcfg-eth0
service network restart
bin/hadoop-daemon.sh start datanode
sudo ufw disable
chmod 777 bin/*
hadoop fs -put urls urls
cd /home/nutch_mysql/runtime/deploy/
cd /home/nutch_mysql/runtime/local/
cd /home/nutch_hbase/runtime/local/
cd /home/nutch_hbase/runtime/deploy/
bin/hadoop jar /home/nutch_hbase/runtime/deploy/Nutch-2.0-dev.job org.apache.nutch.crawl.Crawl crawl urls -depth 1
hadoop jar /home/nutch_hbase/runtime/deploy/Nutch-2.0-dev.job org.apache.nutch.crawl.Crawl crawl urls -depth 1
bin/nutch crawl urls -depth 5 -topN 5 -threads 4
bin/nutch generate
bin/nutch inject urls
bin/hadoop dfsadmin -report
/usr/java/jdk1.7.0_03
http://10.10.10.17:60010/master.jsp
bin/hadoop namenode -format
cd /home/hadoop*
bin/start-all.sh
bin/stop-all.sh
关闭安全模式
bin/hadoop dfsadmin -safemode leave
cd /home/zook*
bin/zkServer.sh start
bin/zkServer.sh stop
cd /home/hbase*
bin/start-hbase.sh
bin/hbase shell
bin/hbase rest start -p 8000
bin/stop-hbase.sh
reboot
disable 'webpage_tianya'
drop 'webpage_tianya'
disable 'webpage'
drop 'webpage'
bin/hadoop fs -put urls urls
bin/hadoop jar nutch-2.0.job org.apache.nutch.crawl.Crawl urls -dir
> crawl -depth 3 -topN 50
http://10.10.10.17:50030/jobtracker.jsp