hadoop API 读/写数据库

利用Hadoop API 从数据库中读出数据 简单处理 并写入数据库中

package dbio;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;

public class DBIO extends Configured {
	static String driverClassName = "oracle.jdbc.driver.OracleDriver";
	static String url = "jdbc:oracle:thin:@10.10.31.81:1521/oradev"; 
	static String username = "scott";
	static String password = "test001";

	/**
	 * 用户自定义对象 保存
	 * 
	 * @author Administrator
	 * 
	 */
	public static class AccessRecord implements
			WritableComparable<AccessRecord>, DBWritable {
		int prodid; // 商品编码
		int price; // 商品价格
		int count; // 商品销售数量

		@Override
		public void write(PreparedStatement statement) throws SQLException {
			statement.setInt(1, prodid);
			statement.setInt(2, price);
			statement.setInt(3, count);
		}

		@Override
		public void readFields(ResultSet resultSet) throws SQLException {
			this.prodid = resultSet.getInt(1);
			this.price = resultSet.getInt(2);
			this.count = resultSet.getInt(3);
		}

		/**
		 * Set the prodId and price and count values.
		 */
		public void set(int prodid, int price, int count) {
			this.prodid = prodid;
			this.price = price;
			this.count = count;
		}

		public int getProdid() {
			return prodid;
		}

		public int getPrice() {
			return price;
		}

		public int getCount() {
			return count;
		}

		@Override
		// 反序列化,从流中的二进制转换成AccessRecord 
		public void readFields(DataInput in) throws IOException {
			prodid = in.readInt();
			price = in.readInt();
			count = in.readInt();
		}

		@Override
		// 序列化,将AccessRecord 转化成使用流传送的二进制
		public void write(DataOutput out) throws IOException {
			out.writeInt(prodid);
			out.writeInt(price);
			out.writeInt(count);
		}

		@Override
		// key的比较
		public int compareTo(AccessRecord o) {
			if (o.count == count) {
				if (o.count == 0) {
					return o.prodid - prodid;
				}
				return o.price - price;
			}
			return o.count - count;
		}

		// 新定义类应该重写的两个方法
		@Override
		public int hashCode() {
			return count + prodid * 3;
		}

		@Override
		public boolean equals(Object right) {
			if (right == null)
				return false;
			if (this == right)
				return true;
			if (right instanceof AccessRecord) {
				AccessRecord r = (AccessRecord) right;
				return r.prodid == prodid && r.price == price
						&& r.count == count;
			} else {
				return false;
			}
		}
	}

	static class MapDataBaseInHDFS extends
			Mapper<Object, AccessRecord, LongWritable , AccessRecord> {
		
		LongWritable writeKey = new LongWritable();
		
		public void map(Object key, AccessRecord value, Context context)
				throws IOException, InterruptedException {
			context.write(writeKey,value ); // 输出到Reduce
		}
	}

	public static class ReduceHDFSInDataBase extends
			Reducer<LongWritable , AccessRecord,AccessRecord , LongWritable> {
		
		LongWritable writKey = new LongWritable();

		public void reduce(LongWritable key, Iterable<AccessRecord> values,
				Context context) throws IOException, InterruptedException {
			for (AccessRecord writValue : values) {
				context.write(writValue, writKey);
			}
		}
	}

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		DBConfiguration.configureDB(conf, driverClassName, url, username,password);
		
		Job job = new Job(conf, "dbio");
		job.setJarByClass(DBIO.class);
		
		// 设置mapper 和 reduce
		job.setMapperClass(MapDataBaseInHDFS.class);
		job.setReducerClass(ReduceHDFSInDataBase.class);
		
		// 设置输出类型 
		// map 输出Key的类型
		job.setMapOutputKeyClass(LongWritable.class);
		// map输出Value的类型
		job.setMapOutputValueClass(AccessRecord.class);
		
		// rduce输出Key的类型
		job.setOutputKeyClass(AccessRecord.class);
		// rduce输出Value的类型
		job.setOutputValueClass(LongWritable.class);
		
		// 设置输入输出路径
		job.setInputFormatClass(DBInputFormat.class);
		DBInputFormat.setInput(job, AccessRecord.class,
				"SELECT PRODID ,PRICE ,COUNT FROM TMP_HADOOP WHERE rownum<100",
				"SELECT count(PRODID) FROM TMP_HADOOP");
		// DBInputFormat.setInput(job, AccessRecord.class, "CUST_CLUB",
		// "rownum<10", "PRODID", new String[] { "PRODID","PRICE","COUNT"});

		job.setOutputFormatClass(DBOutputFormat.class);
		DBOutputFormat.setOutput(job, "TMP_HADOOP", new String[] { "PRODID","PRICE", "COUNT" });

		job.waitForCompletion(true);
	}
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值