1)安装Hadoop环境
2)写MapReduce任务,将项目打包 OperateAyx-1.3.1.jar 入口如下:HBaseMR mian() 方法的编写
3)进入 cd /var/spool/cron 路径,用hdfs用户上传到服务器 chown hdfs:hdfs OperateAyx-1.3.1.jar
4)启用 Linux 定时任务 crontab -e
0 17 * * * /var/lib/hadoop-hdfs/operateAyx_processor_info.sh
5)operateAyx_processor_info.sh具体如下:
#!/bin/bash
# test-index raw_fin_info raw_fin_info_source info
# cia_index raw_fin_info raw_fin_info
DATE_N=`date "+%Y-%m-%d_%H:%M:%S"`
JAR_FILE="OperateAyx-1.3.1.jar"
EXE_USER="hdfs"
CUR_USER=`whoami`
if [ "$EXE_USER" != "$CUR_USER" ];then
echo "ERROR: Must be executed under the hdfs user"
exit 1
fi
# test
echo "starting......"
#nohup hadoop jar $JAR_FILE com.yonyou.spider.content.processor.hbase.HBaseMR $INDEX $TYPE $HBASE_TBL $TEST $TEST_PREFIX >> $LOG_FILE 2>&1 &
# product
nohup hadoop jar $JAR_FILE com.yonyou.spider.content.processor.hbase.HBaseMR