企业岗位需求决策----数据清洗
第一关: 使用jdbc进行数据插入
package net.educoder;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.DriverManager;
public class App {
private static final String driver = "com.mysql.jdbc.Driver";
private static final String url = "jdbc:mysql://localhost:3306/mydb?useUnicode=true&characterEncoding=UTF-8";
private static final String username = "root";
private static final String password = "123123";
private static Connection conn = null;
static {
try {
/**
* 加载驱动
*/
/*----------begin----------*/
Class.forName(driver);
/*----------end----------*/
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* 获取jdbc连接
*
* @return connection
*/
public static Connection getConnection() {
/**
* 1.对conn进行排空处理
* 2.如果为空,则重新创建connection
* 3.如果不为空,则直接返回connection
*/
/*----------begin----------*/
if (conn == null) {
try {
conn = DriverManager.getConnection(url, username, password);
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
return conn;
/*----------end----------*/
}
public static void main(String[] args) {
/**
* 1.调用getConnection()方法,获取connection
* 2.编写SQL语句 --》insert into users (username,password) values (?,?)
* 3.创建预处理对象
* 4.设置值
* 5.执行SQL,
* 6.执行完毕之后,返回Int类型的i(返回变量),如果i =1 ,请在控制台打印字符串success,反之,请打印error
* 7.关闭资源
* 注意:记得要进行异常处理
*/
/*----------begin----------*/
Connection connection = getConnection();
String sql = "insert into users (username,password) values (?,?)";
PreparedStatement stmt = null;
try {
stmt = connection.prepareStatement(sql);
stmt.setString(1, "Alice"); //此处的1代表一个SQL语句中的一个?
stmt.setString(2, "123456");
int i = stmt.executeUpdate();
System.out.println(i == 1 ? "success" : "error");
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
stmt.close();
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
/*----------end----------*/
}
}
第二关:使用c3p0连接池完成数据插入
package net.educoder;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import java.sql.Connection;
import java.sql.PreparedStatement;
public class App {
private static String url = "jdbc:mysql://localhost:3306/mydb";
private static String user = "root";
private static String password = "123123";
private static String driverClass = "com.mysql.jdbc.Driver";
public static void main(String[] args) throws Exception {
/**---------------begin-----------------*/
//1)创建连接池对象
ComboPooledDataSource cds = new ComboPooledDataSource();
//2)设置连接参数
cds.setJdbcUrl(url);
cds.setUser(user);
cds.setPassword(password);
cds.setDriverClass(driverClass);
//3)设置连接池相关的参数
cds.setInitialPoolSize(5);//初始化连接数
cds.setMaxPoolSize(10);//最大连接数
cds.setCheckoutTimeout(3000);//最大等待时间
cds.setMinPoolSize(3); //最小连接数
//4)获取连接
Connection connection = cds.getConnection();
//5)构建预处理对象
String SQL = "insert into users(username,password) values (?,?)";
PreparedStatement pstm = connection.prepareStatement(SQL);
//6)设置SQL参数
pstm.setString(1, "alice");
pstm.setString(2, "123456");
//7)运行SQL,返回Int,请使用变量i
int i = pstm.executeUpdate();
/**---------------end-----------------*/
String result = i == 1 ? "数据插入成功" : "数据插入失败";
File file = new File(args[0]);
OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file), "utf-8");
oStreamWriter.append(result);
oStreamWriter.close();
}
}
第三关:MapReduce数据清洗
package net.educoder;
import net.educoder.entity.positionInfo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.PreparedStatement;
public class mr {
static class mapper extends Mapper<LongWritable, Text, positionInfo, NullWritable> {
private positionInfo positionInfo = new positionInfo();
Connection connection = DBHelper.getConnection();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
/**
* 1.将value转换成String --> str
* 2.将str进行切割,切割标志为:\00,返回数组split
* 3.对数组split进行长度判断,如果长度为6,则进行下一步处理,反之,过滤掉
* 4.将数组的split的元素逐个取出,并封装到positionInfo对象中
说明:
split[0] -->position(职位)
split[1] -->require (职位要求)
split[2] -->type(职位类型)
split[3] -->num(人数)
split[4] -->address(地址)
split[5] -->time(发布时间)
* 5.判断position字符串中是否包含“大数据”,如果包含,则给 positionInfo.setLabel("大数据");反之,则 positionInfo.setLabel("其他");
* 6.编写SQL
* 7.执行SQL
*/
/**--------------------begin----------------------**/
String str = value.toString();
String[] split = str.split("\00");
if (split.length == 6) {
String position = split[0];
String require = split[1];
String type = split[2];
String num = split[3];
String address = split[4];
String time = split[5];
positionInfo.setPosition(position);
positionInfo.setRequire(require);
positionInfo.setAddress(address);
positionInfo.setNum(num);
positionInfo.setTime(time);
positionInfo.setType(type);
boolean contains = position.contains("大数据");
if (contains) {
positionInfo.setLabel("大数据");
} else {
positionInfo.setLabel("其他");
}
String sql = "insert into positioninfo (position,type,num,address,time,require_,label) values(?,?,?,?,?,?,?)";
try {
PreparedStatement prepareStatement = connection.prepareStatement(sql);
prepareStatement.setString(1, positionInfo.getPosition());
prepareStatement.setString(2, positionInfo.getType());
prepareStatement.setString(3, positionInfo.getNum());
prepareStatement.setString(4, positionInfo.getAddress());
prepareStatement.setString(5, positionInfo.getTime());
prepareStatement.setString(6, positionInfo.getRequire());
prepareStatement.setString(7, positionInfo.getLabel());
prepareStatement.executeUpdate();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
context.write(positionInfo, NullWritable.get());
}
/**--------------------end------------------------**/
}
}
public static void main(String[] args) throws Exception {
/**--------------------begin----------------------**/
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(mr.class);
job.setMapperClass(mapper.class);
job.setMapOutputKeyClass(positionInfo.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
/**--------------------end------------------------**/
}
}