MapReduce演练DBCountPageView

1、DBCount类

使用oracle数据库,注意添加对应jar包。

public class DBCount extends Configured implements Tool {
	//input table HADOOP_ACCESS("url", "referrer", "atime")
	private static final String[] AccessFieldNames = { "url", "referrer", "atime" };
	//output table HADOOP_PAGEVIEW("url", "pageview")
	private static final String[] PageviewFieldNames = { "url", "pageview" };
	Connection connection = null;
	PreparedStatement statement = null;
	String DRIVER_CLASS = "oracle.jdbc.driver.OracleDriver";
	String DB_URL = "jdbc:oracle:thin:@10.xx.xx.43:1521:xe";

	@Override
	public int run(String[] args) {
		boolean result = false;
		try {
			Configuration conf = getConf();
			DBConfiguration.configureDB(conf, DRIVER_CLASS, DB_URL, "orcl", "orcl");

			Job job = Job.getInstance(conf, "Count Pageviews of URLs");
			job.setJarByClass(DBCount.class);
			job.setMapperClass(PageviewMapper.class);
			job.setCombinerClass(LongSumReducer.class);
			job.setReducerClass(PageviewReducer.class);

			DBInputFormat.setInput(job, AccessRecord.class, "HADOOP_ACCESS",
					null, "url", AccessFieldNames);
			DBOutputFormat
					.setOutput(job, "HADOOP_PAGEVIEW", PageviewFieldNames);

			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(LongWritable.class);

			job.setOutputKeyClass(PageviewRecord.class);
			job.setOutputValueClass(NullWritable.class);

			result = job.waitForCompletion(true);
			if (result) {
				System.out.println("waitForCompletion is true");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		return result ? 0 : 1;
	}

	public static void main(String[] args) {
		 Configuration conf = new Configuration();
		 int ret = 0;
		 try {
		 	ret = ToolRunner.run(conf, new DBCount(), args);
		 } catch (Exception e) {
		 	e.printStackTrace();
		 }
		 System.exit(ret);
	}
}

初始化数据可以使用populateAccess方法

public void populateAccess() {
		try {
			Class.forName(DRIVER_CLASS);
			connection = DriverManager.getConnection(DB_URL, "orcl", "orcl");
			connection.setAutoCommit(false);

			statement = connection
					.prepareStatement("INSERT INTO HADOOP_ACCESS(url, referrer, atime)"
							+ " VALUES (?, ?, ?)");

			Random random = new Random();
			int time = random.nextInt(50) + 50;

			final int PROBABILITY_PRECISION = 100; // 1 / 100
			final int NEW_PAGE_PROBABILITY = 15; // 15 / 100

			// Pages in the site :
			String[] pages = { "/a", "/b", "/c", "/d", "/e", "/f", "/g", "/h",
					"/i", "/j" };
			// linkMatrix[i] is the array of pages(indexes) that page_i links
			// to.
			int[][] linkMatrix = { { 1, 5, 7 }, { 0, 7, 4, 6, },
					{ 0, 1, 7, 8 }, { 0, 2, 4, 6, 7, 9 }, { 0, 1 },
					{ 0, 3, 5, 9 }, { 0 }, { 0, 1, 3 }, { 0, 2, 6 },
					{ 0, 2, 6 } };

			// a mini model of user browsing a la pagerank
			int currentPage = random.nextInt(pages.length);
			String referrer = null;

			for (int i = 0; i < time; i++) {
				statement.setString(1, pages[currentPage]);
				statement.setString(2, referrer);
				statement.setLong(3, i);
				statement.execute();

				int action = random.nextInt(PROBABILITY_PRECISION);

				// go to a new page with probability
				// NEW_PAGE_PROBABILITY / PROBABILITY_PRECISION
				if (action < NEW_PAGE_PROBABILITY) {
					currentPage = random.nextInt(pages.length); // a random page
					referrer = null;
				} else {
					referrer = pages[currentPage];
					action = random.nextInt(linkMatrix[currentPage].length);
					currentPage = linkMatrix[currentPage][action];
				}
			}

			connection.commit();

		} catch (SQLException | ClassNotFoundException ex) {
			try {
				connection.rollback();
			} catch (SQLException e) {
				e.printStackTrace();
			}
			ex.printStackTrace();
		} finally {
			if (statement != null) {
				try {
					statement.close();
				} catch (SQLException e) {
					e.printStackTrace();
				}
			}
		}
	}

2、PageviewMapper类

public class PageviewMapper extends Mapper<LongWritable, AccessRecord, Text, LongWritable>{
	LongWritable ONE = new LongWritable(1L);

	@Override
    public void map(LongWritable key, AccessRecord value, Context context)
        throws IOException, InterruptedException {
      Text oKey = new Text(value.urls);
      context.write(oKey, ONE);
    }
}

3、PageviewReducer类

public class PageviewReducer extends Reducer<Text, LongWritable, PageviewRecord, NullWritable>{
	NullWritable n = NullWritable.get();

	@Override
    public void reduce(Text key, Iterable<LongWritable> values, 
        Context context) throws IOException, InterruptedException {
      
      long sum = 0L;
      for(LongWritable value: values) {
        sum += value.get();
      }
      context.write(new PageviewRecord(key.toString(), sum), n);
    }
}

4、AccessRecord类(DBInputFormat)

/**
 * Holds a <url, referrer, time> tuple
 * @author cent
 *
 */
public class AccessRecord implements Writable, DBWritable {
//Objects that are read from/written to a database should implement DBWritable.
//Implementations are responsible for writing the fields of the object to PreparedStatement, 
//and reading the fields of the object from the ResultSet.
    String urls;
    String referrer;
    long time;

	@Override
	public void write(PreparedStatement statement) throws SQLException {
		statement.setString(1, urls);
		statement.setString(2, referrer);
		statement.setLong(3, time);		
	}

	@Override
	public void readFields(ResultSet resultSet) throws SQLException {
		this.urls = resultSet.getString(1);
		this.referrer = resultSet.getString(2);
		this.time = resultSet.getLong(3);		
	}

	@Override
	/**
	 * 从流中读取java基本数据转化为二进制数据
	 * The DataOutput interface provides for converting data from any of the Java primitive types to a series of bytes and writing these bytes to a binary stream
	 */
	public void write(DataOutput out) throws IOException {
		//Writes every character in the string s, to the output stream, in order, two bytes per character
		Text.writeString(out, urls);
		Text.writeString(out, referrer);
		//Writes a long value, which is comprised of eight bytes, to the output stream
		out.writeLong(time);		
	}

	@Override
	/**
	 * The DataInput interface provides for reading bytes from a binary stream and reconstructing from them data in any of the Java primitive types
	 */
	public void readFields(DataInput in) throws IOException {
		this.urls = Text.readString(in);
		this.referrer = Text.readString(in);
		//Reads eight input bytes and returns a long value
		this.time = in.readLong();
	}

}

5、PageviewRecord类(OutputKeyClass)

/**
 * Holds a <url, pageview> tuple
 * @author cent
 *
 */
public class PageviewRecord implements Writable, DBWritable{
	String url;
    long pageview;
    
    public PageviewRecord(String url, long pageview) {
        this.url = url;
        this.pageview = pageview;
    }
    
    @Override
    public String toString() {
      return url + " " + pageview;
    }

	@Override
	public void write(PreparedStatement statement) throws SQLException {
		statement.setString(1, url);
		statement.setLong(2, pageview);
	}

	@Override
	public void readFields(ResultSet resultSet) throws SQLException {
		this.url = resultSet.getString(1);
		this.pageview = resultSet.getLong(2);
	}

	@Override
	public void write(DataOutput out) throws IOException {
		Text.writeString(out, url);
		out.writeLong(pageview);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.url = Text.readString(in);
		this.pageview = in.readLong();
	}

}

6、查看结果

select

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值