hadoop实例

#!/bin/bash

source "../yew_functions.sh"

hdp_input="/file/stat.bz2";
hdp_output="/user/out";
hadoop="$HADOOP_HOME/bin/hadoop"
$hadoop fs -rmr $hdp_output

mapper_cmd="python27/bin/python mapper.py"
mapper_file="parseuv_mapper.py"
reducer_cmd="python27/bin/python parseuv_reducer.py"
reducer_file="reducer.py"

$hadoop jar $HADOOP_STREAMING_HOME/$STREAMING_JAR \
   -D mapred.job.name="[test]" \
   -D mapred.reduce.tasks="1" \
   -cacheArchive "${PYTHON_LIB}/python27.tar.gz#python27" \
   -mapper "${mapper_cmd}" \
   -reducer "${reducer_cmd}" \
   -input "${hdp_input}" \
   -output "${hdp_output}" \
   -file "${mapper_file}" \
   -file "${reducer_file}" 

# coding:utf8

import sys, re
dic = {}
for line in sys.stdin:
  line = line.strip()
  cols = line.split()
  for item in cols:
    if item.startswith("id"):
      key_value = item.split(":")
      if(len(key_value) != 2):
        break
      uid = key_value[1]
      print uid
      break

# coding:utf8

import sys
cnt=0
current_uid = ""
dic = {}
for line in sys.stdin:
  line = line.strip()
  if current_uid != line:
    cnt += 1
    current_uid = line
print "Number of records:%s" % (cnt)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值