大数据实战练习(二、模拟数据)

一、集群启动

1.启动zookeeper

2.启动hdfs

3.启动standalone

./sbin/start-all.sh

4.启动hive

./hive --service metastore &

二、编写代码

1.模拟生成数据

public class Data2File {
	public static String MONITOR_FLOW_ACTION ="./monitor_flow_action";
	public static String MONITOR_CAMERA_INFO ="./monitor_camera_info";
	public static void main(String[] args) {
		CreateFile(MONITOR_FLOW_ACTION );
		CreateFile(MONITOR_CAMERA_INFO );
		System.out.println("running... ...");
		mock();
		System.out.println("finished");
	}
	
	/**
	 * 创建文件
	 * @param args
	 */
	public static Boolean CreateFile(String pathFileName){
		try {
			File file = new File(pathFileName);
			if(file.exists()){
				file.delete();
			}
			boolean createNewFile = file.createNewFile();
			System.out.println("create file "+pathFileName+" success!");
			return createNewFile;
		} catch (IOException e) {
			e.printStackTrace();
		}
		return false;
	}
	
	/**
	 * 向文件中写入数据
	 * @param args
	 */
	public static void WriteDataToFile(String pathFileName,String newContent){
		FileOutputStream fos = null ;
		OutputStreamWriter osw = null;
		PrintWriter pw = null ;
		try {
			//产生一行模拟数据
			String content = newContent;
			File file = new File(pathFileName);
			fos=new FileOutputStream(file,true);
	        osw=new OutputStreamWriter(fos, "UTF-8");
	        pw =new PrintWriter(osw);
	        pw.write(content+"\n");
	        //注意关闭的先后顺序,先打开的后关闭,后打开的先关闭
	        pw.close();
	        osw.close();
	        fos.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	
	
	//生成模拟数据
	public static void mock() {
    	List<Row> dataList = new ArrayList<Row>();
    	Random random = new Random();
    	
    	String[] locations = new String[]{"鲁","京","京","京","沪","京","京","深","京","京"}; 
    	String date = DateUtils.getTodayDate();
    	
    	/**
    	 * 模拟3000个车辆
    	 */
    	for (int i = 0; i < 3000; i++) {
        	String car = locations[random.nextInt(10)] + (char)(65+random.nextInt(26))+ StringUtils.fulfuill(5,random.nextInt(100000)+"");
        	
        	//baseActionTime 模拟24小时
        	String baseActionTime = date + " " + StringUtils.fulfuill(random.nextInt(24)+"");
        	/**
        	 * 这里的for循环模拟每辆车经过不同的卡扣不同的摄像头 数据。
        	 */
        	for(int j = 0 ; j < random.nextInt(300)+1 ; j++){
        		//模拟每个车辆每被30个摄像头拍摄后 时间上累计加1小时。这样做使数据更加真实。
        		if(j % 30 == 0 && j != 0){
        			 baseActionTime = date + " " + StringUtils.fulfuill((Integer.parseInt(baseActionTime.split(" ")[1])+1)+"");
        		}
        		
        		String actionTime = baseActionTime + ":" 
        				+ StringUtils.fulfuill(random.nextInt(60)+"") + ":" 
        				+ StringUtils.fulfuill(random.nextInt(60)+"");//模拟经过此卡扣开始时间 ,如:2017-10-01 20:09:10
        		
        		String monitorId = StringUtils.fulfuill(4, random.nextInt(9)+"");//模拟9个卡扣monitorId
        		
        		String speed = random.nextInt(260)+1+"";//模拟速度
        		
        		String roadId = random.nextInt(50)+1+"";//模拟道路id 【1~50 个道路】
        		
        		String cameraId = StringUtils.fulfuill(5, random.nextInt(100000)+"");//模拟摄像头id cameraId
        		
        		String areaId = StringUtils.fulfuill(2,random.nextInt(8)+1+"");//模拟areaId 【一共8个区域】
        		
        		
        		//将数据写入到文件中
        		String content = date+"\t"+monitorId+"\t"+cameraId+"\t"+car+"\t"+actionTime+"\t"+speed+"\t"+roadId+"\t"+areaId;
        		WriteDataToFile(MONITOR_FLOW_ACTION,content);
        		Row row = RowFactory.create(date,monitorId,cameraId,car,actionTime,speed,roadId,areaId);
        		dataList.add(row);
        	}
		}
    	
    	/**
    	 * 生成 monitor_id 对应camera_id表
    	 */
    	Map<String,Set<String>> monitorAndCameras = new HashMap<>();
    	
    	int index = 0;
    	for(Row row : dataList){
    		//row.getString(1) monitor_id
    		Set<String> sets = monitorAndCameras.get(row.getString(1));
    		if(sets == null){
    			sets = new HashSet<>();
    			monitorAndCameras.put((String)row.getString(1), sets);
    		}
    		index++;
    		//这里每隔1000条数据随机插入一条数据,模拟出来标准表中卡扣对应摄像头的数据。这个摄像头的数据不一定会在车辆数据中有。
    		if(index % 1000 == 0){
    			sets.add(StringUtils.fulfuill(5, random.nextInt(100000)+""));
    		} 
    		//row.getString(2) camera_id
    		sets.add(row.getString(2)); 
    	}
    	
    	Set<Entry<String,Set<String>>> entrySet = monitorAndCameras.entrySet();
    	for (Entry<String, Set<String>> entry : entrySet) {
    		String monitor_id = entry.getKey();
    		Set<String> sets = entry.getValue();
    		for (String val : sets) {
    			//将数据写入到文件
    			String content = monitor_id+"\t"+val;
        		WriteDataToFile(MONITOR_CAMERA_INFO,content);
			}
		}
    }
}

2.向hive中存储数据

2.1通过运行sql创建hive表

CREATE TABLE IF NOT EXISTS traffic.monitor_flow_action(  
date_time string ,  
monitor_id string ,  
camera_id string ,  
car string ,  
action_time string ,
speed string  ,
road_id string,
area_id string
)  
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ; 

load data local inpath '/usr/local/monitor_flow_action' into table traffic.monitor_flow_action; 

CREATE TABLE IF NOT EXISTS traffic.monitor_camera_info(  
monitor_id string ,  
camera_id string 
)  
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ; 

load data local inpath '/usr/local/monitor_camera_info' into table traffic.monitor_camera_info; 
​./hive -f /usr/local/createHiveTab.sql

 

2.2提交代码运行

./spark-submit --master spark://hadoop1:7077,hadoop2:7077 --class com.liu.spark.Data2Hive /usr/local/TrafficStudy-1.0-SNAPSHOT.jar

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值