需求
从stu表中读取数据,统计各个年龄段的人数,并输出到stu_res表中。
CREATE TABLE `stu` (
`id` int(11) NOT NULL DEFAULT '0',
`student_id` int(11) DEFAULT NULL,
`name` varchar(45) DEFAULT NULL,
`age` int(1) DEFAULT NULL,
`sex` varchar(1) DEFAULT NULL,
`birthday` varchar(45) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
数据:
INSERT INTO `stu` VALUES ('1', '1', 'zs', '12', '1', '2019-07-17');
INSERT INTO `stu` VALUES ('2', '2', 'goudan', '13', '1', '2019-07-17');
INSERT INTO `stu` VALUES ('3', '3', 'gg', '12', '2', '2019-07-17');
INSERT INTO `stu` VALUES ('4', '4', 'hh', '12', '2', '2019-07-17');
INSERT INTO `stu` VALUES ('5', '5', 'mm', '13', '1', '2019-07-17');
INSERT INTO `stu` VALUES ('6', '6', 'tt', '12', '1', '2019-07-17');
结果表:
CREATE TABLE `stu_res` (
`age` int(11) NOT NULL DEFAULT '0',
`cnt` int(11) DEFAULT 0
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
结果数据:
12 4
13 2
student类
因为结果只需要统计年龄,所以其他字段不在写。
public class Student {
private int id;
private int age;
public Student(int id, int age) {
this.id = id;
this.age = age;
}
public Student() {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
@Override
public String toString() {
return this.id+" "+this.age;
}
}
Utils(用于c3p0连接数据库)
import com.mchange.v2.c3p0.ComboPooledDataSource;
import org.apache.commons.dbutils.QueryRunner;
import org.apache.commons.dbutils.handlers.BeanListHandler;
import java.sql.SQLException;
import java.util.List;
public class Utils {
private static ComboPooledDataSource dataSource;
static{
dataSource = new ComboPooledDataSource();
}
public static QueryRunner getQueryRunner(){
return new QueryRunner(dataSource);
}
public static void main(String[] args) throws SQLException {
QueryRunner queryRunner = getQueryRunner();
String sql = "select * from stu";
List<Student> query = queryRunner.query(sql, new BeanListHandler<Student>(Student.class));
System.out.println(query.size());
System.out.println(queryRunner);
}
}
测试主类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
import java.io.IOException;
public class WordCount {
public static class MyMapper extends Mapper<LongWritable, Text,Text,IntWritable>{
IntWritable iw = new IntWritable(1);
Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] datas = line.split(" ");
text.set(datas[1]);
context.write(text,iw);
}
}
//TextInputFormat
public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
IntWritable iw = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int num = 0;
for(IntWritable value:values){
num+=value.get();
}
iw.set(num);
context.write(key,iw);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
BasicConfigurator.configure();
Job job = Job.getInstance(conf);
job.setJarByClass(WordCount.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(MySqlInputFormat.class);
job.setOutputFormatClass(MySqlOutputFormat.class);
boolean res = job.waitForCompletion(true);
System.exit(res?0:1);
}
}
自定义MysqlInputFormat
import org.apache.commons.dbutils.QueryRunner;
import org.apache.commons.dbutils.handlers.BeanListHandler;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class MySqlInputFormat extends InputFormat <LongWritable, Text>{
public MySqlInputFormat(){
}
public static class MySqlInputSplit extends InputSplit implements Writable{
private long start;
private long end;
public MySqlInputSplit(){
}
public MySqlInputSplit(long start, long end) {
this.start = start;
this.end = end;
}
public long getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public long getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public long getLength() throws IOException, InterruptedException {
return this.end-this.start;
}
public String[] getLocations() throws IOException, InterruptedException {
return new String[0];
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(start);
dataOutput.writeLong(end);
}
public void readFields(DataInput dataInput) throws IOException {
this.start=dataInput.readLong();
this.end=dataInput.readLong();
}
}
public List<InputSplit> getSplits(JobContext context) {
List<InputSplit> list = new ArrayList<InputSplit>();
long count = 0;
QueryRunner queryRunner = Utils.getQueryRunner();
try {
count = queryRunner.query("select * from stu",new BeanListHandler<Student>(Student.class)).size();
} catch (SQLException e) {
e.printStackTrace();
}
long chunk = 2;
long chunksize = (count/2);
//判断是否数据足够。
//将数据进行切片,也就是一个map里面有一个切片,一个切片有上面定义的chunk = 2 条数据。
for(int i = 0;i<chunksize;i++){
MySqlInputSplit mi = null;
if(i+1==chunksize){
mi = new MySqlInputSplit(i*chunk,count);
list.add(mi);
}else{
mi = new MySqlInputSplit(i*chunk,i*chunk+chunk);
list.add(mi);
}
}
//切片集合。
return list;
}
public static class MySqlRecordReader extends RecordReader<LongWritable, Text>{
private MySqlInputSplit split;
//从MySql中查出来的结果集
private Iterator<Student> dbcursor;
//定义索引,每次都会被初始化成0,也就是只能读取自己切片中的 k,v
private int index;
private LongWritable k; //偏移量,再下面会自动封装成切片数据的开始,就会知道读多少行 ,对应map泛型的第一个值。
private Text v; //每次读到的结果,会通过返回出去,对应 map泛型的第二个。
public MySqlRecordReader(InputSplit split,TaskAttemptContext context) throws IOException, InterruptedException{
super();
initialize(split,context);;
}
public MySqlRecordReader(){
}
//初始化,将一些对象new出来,并把得到的切片(1个)强转。
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
this.split = (MySqlInputSplit)split;
this.k = new LongWritable();
v = new Text();
}
//读取数据,并把数据封装到当前MySqlRecordReader的k v中。
public boolean nextKeyValue() throws IOException, InterruptedException {
if(this.dbcursor == null) {
QueryRunner runner = Utils.getQueryRunner();
String sql = "select * from stu limit " + this.split.start + "," + this.split.getLength();
try {
dbcursor = runner.query(sql, new BeanListHandler<Student>(Student.class)).iterator();
} catch (SQLException e) {
e.printStackTrace();
}
}
boolean hasNext = this.dbcursor.hasNext();
if(hasNext){
//获取游标的下一个值
Student stu = this.dbcursor.next();
this.k.set(this.split.start+index);
index ++;
this.v = new Text(stu.toString());
}
return hasNext;
}
public LongWritable getCurrentKey() throws IOException, InterruptedException {
return this.k;
}
public Text getCurrentValue() throws IOException, InterruptedException {
return this.v;
}
public float getProgress() throws IOException, InterruptedException {
return 0;
}
public void close() throws IOException {
}
}
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
return new MySqlRecordReader(split,context);
}
}
自定义MySQLOutputFormat
import org.apache.commons.dbutils.QueryRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import java.io.IOException;
import java.sql.SQLException;
public class MySqlOutputFormat <K,V> extends OutputFormat<K,V> {
public static class MySqlRecordWriter<K,V> extends RecordWriter<K, V>{
public QueryRunner runner = null;
public MySqlRecordWriter(){
}
public MySqlRecordWriter(TaskAttemptContext context){
//获取MySql的连接
runner = Utils.getQueryRunner();
}
public void write(K key, V value) throws IOException, InterruptedException {
try {
runner.update("insert into stu_res(age,cnt) values (?,?)",Integer.parseInt(key.toString()),Integer.parseInt(value.toString()));
} catch (SQLException e) {
e.printStackTrace();
}
}
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
}
}
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
return new MySqlRecordWriter<K, V>(context);
}
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
}
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
return new FileOutputCommitter(null, context);
}
}