【企业岗位需求决策--数据清洗】

企业岗位需求决策----数据清洗

第一关: 使用jdbc进行数据插入

package net.educoder;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.DriverManager;

public class App {
    private static final String driver = "com.mysql.jdbc.Driver";
    private static final String url = "jdbc:mysql://localhost:3306/mydb?useUnicode=true&characterEncoding=UTF-8";
    private static final String username = "root";
    private static final String password = "123123";
    private static Connection conn = null;

    static {
        try {
            /**
             * 加载驱动
             */
            /*----------begin----------*/
            Class.forName(driver);
            /*----------end----------*/
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    /**
     * 获取jdbc连接
     *
     * @return connection
     */
    public static Connection getConnection() {
        /**
         * 1.对conn进行排空处理
         * 2.如果为空,则重新创建connection
         * 3.如果不为空,则直接返回connection
         */
        /*----------begin----------*/
         if (conn == null) {
            
                try {
            
                    conn = DriverManager.getConnection(url, username, password);
                } catch (SQLException e) {
            
                    e.printStackTrace();
                }
                return conn;
            }
         return conn;
        /*----------end----------*/
    }

    public static void main(String[] args) {
        /**
         * 1.调用getConnection()方法,获取connection
         * 2.编写SQL语句 --》insert into users (username,password) values (?,?)
         * 3.创建预处理对象
         * 4.设置值
         * 5.执行SQL,
         * 6.执行完毕之后,返回Int类型的i(返回变量),如果i =1 ,请在控制台打印字符串success,反之,请打印error
         * 7.关闭资源
         *      注意:记得要进行异常处理
         */
        
        /*----------begin----------*/
        Connection connection = getConnection();
        String sql = "insert into users (username,password) values (?,?)";
        PreparedStatement stmt = null;
        try {
            
            stmt = connection.prepareStatement(sql);
            stmt.setString(1, "Alice");  //此处的1代表一个SQL语句中的一个?
            stmt.setString(2, "123456");
            int i = stmt.executeUpdate();
            System.out.println(i == 1 ? "success" : "error");
        } catch (SQLException e) {
            
            e.printStackTrace();
        } finally {
            
            try {
            
                stmt.close();
                connection.close();
            } catch (SQLException e) {
            
                e.printStackTrace();
            }
        }
        /*----------end----------*/
    }
}

第二关:使用c3p0连接池完成数据插入

package net.educoder;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import com.mchange.v2.c3p0.ComboPooledDataSource;
import java.sql.Connection;
import java.sql.PreparedStatement;
public class App {
    private static String url = "jdbc:mysql://localhost:3306/mydb";
    private static String user = "root";
    private static String password = "123123";
    private static String driverClass = "com.mysql.jdbc.Driver";
    public static void main(String[] args) throws Exception {
        /**---------------begin-----------------*/
        //1)创建连接池对象
        ComboPooledDataSource cds = new ComboPooledDataSource();
        //2)设置连接参数
        cds.setJdbcUrl(url);
        cds.setUser(user);
        cds.setPassword(password);
        cds.setDriverClass(driverClass);
        //3)设置连接池相关的参数
        cds.setInitialPoolSize(5);//初始化连接数
        cds.setMaxPoolSize(10);//最大连接数
        cds.setCheckoutTimeout(3000);//最大等待时间
        cds.setMinPoolSize(3); //最小连接数
        //4)获取连接
        Connection connection = cds.getConnection();
        //5)构建预处理对象
        String SQL = "insert into users(username,password) values (?,?)";
        PreparedStatement pstm = connection.prepareStatement(SQL);
        //6)设置SQL参数
        pstm.setString(1, "alice");
        pstm.setString(2, "123456");
        //7)运行SQL,返回Int,请使用变量i
        int i = pstm.executeUpdate();
        /**---------------end-----------------*/
        String result = i == 1 ? "数据插入成功" : "数据插入失败";
        File file = new File(args[0]);
        OutputStreamWriter oStreamWriter = new OutputStreamWriter(new FileOutputStream(file), "utf-8");
        oStreamWriter.append(result);
        oStreamWriter.close();
    }
}

第三关:MapReduce数据清洗

package net.educoder;
import net.educoder.entity.positionInfo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.PreparedStatement;
public class mr {
    static class mapper extends Mapper<LongWritable, Text, positionInfo, NullWritable> {
        private positionInfo positionInfo = new positionInfo();
        Connection connection = DBHelper.getConnection();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        /**
        *    1.将value转换成String --> str
        *    2.将str进行切割,切割标志为:\00,返回数组split
        *    3.对数组split进行长度判断,如果长度为6,则进行下一步处理,反之,过滤掉
        *    4.将数组的split的元素逐个取出,并封装到positionInfo对象中
                说明:
                    split[0] -->position(职位)
                    split[1] -->require (职位要求)
                    split[2] -->type(职位类型)
                    split[3] -->num(人数)
                    split[4] -->address(地址)
                    split[5] -->time(发布时间)
        *    5.判断position字符串中是否包含“大数据”,如果包含,则给 positionInfo.setLabel("大数据");反之,则 positionInfo.setLabel("其他");
        *    6.编写SQL
        *    7.执行SQL
        */
        /**--------------------begin----------------------**/
            String str = value.toString();
            String[] split = str.split("\00");
            if (split.length == 6) {
                String position = split[0];
                String require = split[1];
                String type = split[2];
                String num = split[3];
                String address = split[4];
                String time = split[5];
                positionInfo.setPosition(position);
                positionInfo.setRequire(require);
                positionInfo.setAddress(address);
                positionInfo.setNum(num);
                positionInfo.setTime(time);
                positionInfo.setType(type);
                boolean contains = position.contains("大数据");
                if (contains) {
                    positionInfo.setLabel("大数据");
                } else {
                    positionInfo.setLabel("其他");
                }
                String sql = "insert into positioninfo (position,type,num,address,time,require_,label) values(?,?,?,?,?,?,?)";
                try {
                    PreparedStatement prepareStatement = connection.prepareStatement(sql);
                    prepareStatement.setString(1, positionInfo.getPosition());
                    prepareStatement.setString(2, positionInfo.getType());
                    prepareStatement.setString(3, positionInfo.getNum());
                    prepareStatement.setString(4, positionInfo.getAddress());
                    prepareStatement.setString(5, positionInfo.getTime());
                    prepareStatement.setString(6, positionInfo.getRequire());
                    prepareStatement.setString(7, positionInfo.getLabel());
                    prepareStatement.executeUpdate();
                } catch (SQLException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                context.write(positionInfo, NullWritable.get());
            }
        /**--------------------end------------------------**/
        }
    }
    public static void main(String[] args) throws Exception {
    /**--------------------begin----------------------**/
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(mr.class);
        job.setMapperClass(mapper.class);
        job.setMapOutputKeyClass(positionInfo.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(0);
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    /**--------------------end------------------------**/
    }
}

  • 4
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值