1、DBCount类
使用oracle数据库,注意添加对应jar包。
public class DBCount extends Configured implements Tool {
//input table HADOOP_ACCESS("url", "referrer", "atime")
private static final String[] AccessFieldNames = { "url", "referrer", "atime" };
//output table HADOOP_PAGEVIEW("url", "pageview")
private static final String[] PageviewFieldNames = { "url", "pageview" };
Connection connection = null;
PreparedStatement statement = null;
String DRIVER_CLASS = "oracle.jdbc.driver.OracleDriver";
String DB_URL = "jdbc:oracle:thin:@10.xx.xx.43:1521:xe";
@Override
public int run(String[] args) {
boolean result = false;
try {
Configuration conf = getConf();
DBConfiguration.configureDB(conf, DRIVER_CLASS, DB_URL, "orcl", "orcl");
Job job = Job.getInstance(conf, "Count Pageviews of URLs");
job.setJarByClass(DBCount.class);
job.setMapperClass(PageviewMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(PageviewReducer.class);
DBInputFormat.setInput(job, AccessRecord.class, "HADOOP_ACCESS",
null, "url", AccessFieldNames);
DBOutputFormat
.setOutput(job, "HADOOP_PAGEVIEW", PageviewFieldNames);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(PageviewRecord.class);
job.setOutputValueClass(NullWritable.class);
result = job.waitForCompletion(true);
if (result) {
System.out.println("waitForCompletion is true");
}
} catch (Exception e) {
e.printStackTrace();
}
return result ? 0 : 1;
}
public static void main(String[] args) {
Configuration conf = new Configuration();
int ret = 0;
try {
ret = ToolRunner.run(conf, new DBCount(), args);
} catch (Exception e) {
e.printStackTrace();
}
System.exit(ret);
}
}
初始化数据可以使用populateAccess方法
public void populateAccess() {
try {
Class.forName(DRIVER_CLASS);
connection = DriverManager.getConnection(DB_URL, "orcl", "orcl");
connection.setAutoCommit(false);
statement = connection
.prepareStatement("INSERT INTO HADOOP_ACCESS(url, referrer, atime)"
+ " VALUES (?, ?, ?)");
Random random = new Random();
int time = random.nextInt(50) + 50;
final int PROBABILITY_PRECISION = 100; // 1 / 100
final int NEW_PAGE_PROBABILITY = 15; // 15 / 100
// Pages in the site :
String[] pages = { "/a", "/b", "/c", "/d", "/e", "/f", "/g", "/h",
"/i", "/j" };
// linkMatrix[i] is the array of pages(indexes) that page_i links
// to.
int[][] linkMatrix = { { 1, 5, 7 }, { 0, 7, 4, 6, },
{ 0, 1, 7, 8 }, { 0, 2, 4, 6, 7, 9 }, { 0, 1 },
{ 0, 3, 5, 9 }, { 0 }, { 0, 1, 3 }, { 0, 2, 6 },
{ 0, 2, 6 } };
// a mini model of user browsing a la pagerank
int currentPage = random.nextInt(pages.length);
String referrer = null;
for (int i = 0; i < time; i++) {
statement.setString(1, pages[currentPage]);
statement.setString(2, referrer);
statement.setLong(3, i);
statement.execute();
int action = random.nextInt(PROBABILITY_PRECISION);
// go to a new page with probability
// NEW_PAGE_PROBABILITY / PROBABILITY_PRECISION
if (action < NEW_PAGE_PROBABILITY) {
currentPage = random.nextInt(pages.length); // a random page
referrer = null;
} else {
referrer = pages[currentPage];
action = random.nextInt(linkMatrix[currentPage].length);
currentPage = linkMatrix[currentPage][action];
}
}
connection.commit();
} catch (SQLException | ClassNotFoundException ex) {
try {
connection.rollback();
} catch (SQLException e) {
e.printStackTrace();
}
ex.printStackTrace();
} finally {
if (statement != null) {
try {
statement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
2、PageviewMapper类
public class PageviewMapper extends Mapper<LongWritable, AccessRecord, Text, LongWritable>{
LongWritable ONE = new LongWritable(1L);
@Override
public void map(LongWritable key, AccessRecord value, Context context)
throws IOException, InterruptedException {
Text oKey = new Text(value.urls);
context.write(oKey, ONE);
}
}
3、PageviewReducer类
public class PageviewReducer extends Reducer<Text, LongWritable, PageviewRecord, NullWritable>{
NullWritable n = NullWritable.get();
@Override
public void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {
long sum = 0L;
for(LongWritable value: values) {
sum += value.get();
}
context.write(new PageviewRecord(key.toString(), sum), n);
}
}
4、AccessRecord类(DBInputFormat)
/**
* Holds a <url, referrer, time> tuple
* @author cent
*
*/
public class AccessRecord implements Writable, DBWritable {
//Objects that are read from/written to a database should implement DBWritable.
//Implementations are responsible for writing the fields of the object to PreparedStatement,
//and reading the fields of the object from the ResultSet.
String urls;
String referrer;
long time;
@Override
public void write(PreparedStatement statement) throws SQLException {
statement.setString(1, urls);
statement.setString(2, referrer);
statement.setLong(3, time);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
this.urls = resultSet.getString(1);
this.referrer = resultSet.getString(2);
this.time = resultSet.getLong(3);
}
@Override
/**
* 从流中读取java基本数据转化为二进制数据
* The DataOutput interface provides for converting data from any of the Java primitive types to a series of bytes and writing these bytes to a binary stream
*/
public void write(DataOutput out) throws IOException {
//Writes every character in the string s, to the output stream, in order, two bytes per character
Text.writeString(out, urls);
Text.writeString(out, referrer);
//Writes a long value, which is comprised of eight bytes, to the output stream
out.writeLong(time);
}
@Override
/**
* The DataInput interface provides for reading bytes from a binary stream and reconstructing from them data in any of the Java primitive types
*/
public void readFields(DataInput in) throws IOException {
this.urls = Text.readString(in);
this.referrer = Text.readString(in);
//Reads eight input bytes and returns a long value
this.time = in.readLong();
}
}
5、PageviewRecord类(OutputKeyClass)
/**
* Holds a <url, pageview> tuple
* @author cent
*
*/
public class PageviewRecord implements Writable, DBWritable{
String url;
long pageview;
public PageviewRecord(String url, long pageview) {
this.url = url;
this.pageview = pageview;
}
@Override
public String toString() {
return url + " " + pageview;
}
@Override
public void write(PreparedStatement statement) throws SQLException {
statement.setString(1, url);
statement.setLong(2, pageview);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
this.url = resultSet.getString(1);
this.pageview = resultSet.getLong(2);
}
@Override
public void write(DataOutput out) throws IOException {
Text.writeString(out, url);
out.writeLong(pageview);
}
@Override
public void readFields(DataInput in) throws IOException {
this.url = Text.readString(in);
this.pageview = in.readLong();
}
}