mapreduce自定义输入为mysql_hadoop mapreduce v1接口实现自定义inputformat,mysql作为输入...

package com.demo7;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import java.sql.Connection;

import java.sql.DriverManager;

import java.sql.ResultSet;

import java.sql.Statement;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.ArrayList;

import java.util.Date;

import java.util.HashMap;

import java.util.Map;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.InputFormat;

import org.apache.hadoop.mapred.InputSplit;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapred.RecordReader;

import org.apache.hadoop.mapred.Reporter;

import org.apache.log4j.Logger;

import com.google.common.base.Joiner;

public class MysqlInputformat implements InputFormat{

private static Logger logger = Logger.getLogger(MysqlInputformat.class);

private String beginTradeDay = Config.getConfig().getProperty("dealday.begin.mysqlinputformat");

private String endTradeDay = Config.getConfig().getProperty("dealday.end.mysqlinputformat");

private int oneTaskStocks = Integer.valueOf(Config.getConfig().getProperty("stocknumber_permap.mysqlinputformat"));

@Override

public RecordReader getRecordReader(InputSplit split,

JobConf conf, Reporter reporter) throws IOException {

MysqlInputSplit mysqlSplit = (MysqlInputSplit)split;

logger.info("---------------------ln------------------------");

logger.info(mysqlSplit.tradeDay);

logger.info(mysqlSplit.stockcodes);

return new MysqlRecordReader(mysqlSplit.tradeDay, mysqlSplit.stockcodes);

}

@Override

public InputSplit[] getSplits(JobConf arg0, int arg1) throws IOException {

ArrayList splits = new ArrayList();

logger.info(String.format("begin generate map task, from %s to %s", beginTradeDay, endTradeDay));

HashMap> dayStocks = new HashMap>(); //key为交易日,value为股票列表

Connection conn = null;

try {

conn = DriverManager.getConnection(MysqlProxy.getProxoolUrl(), MysqlProxy.getProxoolConf());

// 创建一个Statement对象

Statement stmt = conn.createStatement(); // 创建Statement对象

String sql = String.format(

"select date,stock_code from gushitong.s_kline_day_complexbeforeright_ln " +

" where date>='%s' and date<='%s'",

beginTradeDay, endTradeDay);

ResultSet rs = stmt.executeQuery(sql);// 创建数据对象

String date = null;

String stockcode = null;

while (rs.next()) {

date = rs.getString("date");

stockcode = rs.getString("stock_code");

if(dayStocks.containsKey(date) == false){

dayStocks.put(date, new ArrayList(3300));

}

dayStocks.get(date).add(stockcode);

}

rs.close();

stmt.close();

conn.close();

} catch (Exception e) {

logger.error(e);

}

Joiner joiner = Joiner.on(":").useForNull("");

SimpleDateFormat sdf_1 = new SimpleDateFormat("yyyyMMdd");

SimpleDateFormat sdf_2 = new SimpleDateFormat("yyyy-MM-dd");

for(Map.Entry> dayStockEntry : dayStocks.entrySet()){

String tradeDay = dayStockEntry.getKey();

for(int i=0; i

int endindex;

if(i+oneTaskStocks<=dayStockEntry.getValue().size()){

endindex = i+oneTaskStocks;

}else{

endindex = dayStockEntry.getValue().size();

}

String stocks = joiner.join(dayStockEntry.getValue().subList(i, endindex));

i = endindex;

try {

MysqlInputSplit split = new MysqlInputSplit();

split.tradeDay = sdf_2.format(sdf_1.parse(tradeDay));

split.stockcodes = stocks;

splits.add(split);

} catch (ParseException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

}

InputSplit[] rtn = splits.toArray(new InputSplit[splits.size()]);

return rtn;

}

public static class MysqlInputSplit implements InputSplit{

public String getTradeDay() {

return tradeDay;

}

public void setTradeDay(String tradeDay) {

this.tradeDay = tradeDay;

}

public String getStockcodes() {

return stockcodes;

}

public void setStockcodes(String stockcodes) {

this.stockcodes = stockcodes;

}

private String tradeDay = null;

private String stockcodes = null;

@Override

public void readFields(DataInput in) throws IOException {

this.tradeDay = Text.readString(in);

this.stockcodes = Text.readString(in);

}

@Override

public void write(DataOutput out) throws IOException {

Text.writeString(out, tradeDay);

Text.writeString(out, stockcodes);

}

@Override

public long getLength() throws IOException {

// TODO Auto-generated method stub

return 0;

}

@Override

public String[] getLocations() throws IOException {

String[] arr = {"aa"}; //必须有,因为不管有没有用,框架都要用。

return arr;

}

}

public static class MysqlRecordReader implements RecordReader{

public String tradeDay = null;

public String stockcodes = null;

private boolean isDeal = false;

private long begintimeLong = new Date().getTime();

public MysqlRecordReader(String tradeDay, String stockcodes){

this.tradeDay = tradeDay;

this.stockcodes = stockcodes;

}

@Override

public void close() throws IOException {

// TODO Auto-generated method stub

}

@Override

public Text createKey() {

return new Text();

}

@Override

public Text createValue() {

return new Text();

}

@Override

public long getPos() throws IOException {

// TODO Auto-generated method stub

return 0;

}

/**

* 预计15小时为100%

*/

@Override

public float getProgress() throws IOException {

logger.info(String.format("get process is %f", ((float)(new Date().getTime() - this.begintimeLong))/(float)(15*3600*1000)));

return Math.min(0.9f, ((float)(new Date().getTime() - this.begintimeLong))/(float)(15*3600*1000));

}

@Override

public synchronized boolean next(Text key, Text value) throws IOException {

if(this.isDeal == true){

return false;

}else{

this.isDeal = true;

}

key.set(this.tradeDay);

value.set(this.stockcodes);

return true;

}

}

}

分享到:

18e900b8666ce6f233d25ec02f95ee59.png

72dd548719f0ace4d5f9bca64e1d7715.png

2016-03-24 14:17

浏览 659

评论

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值