# 2024年6月14日Blog

Hadoop实现电商商单分析(3)

上一篇文章链接:
Hadoop实现电商商单分析(2)

三、utils文件

1.load_ip
package com.task.ds;


import com.task.ds.utils.IPParser;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

public class load_ip {
    public static void main(String[] args) {
        String inputFilePath = "A:\\TASK\\code\\data\\project\\src\\main\\java\\com\\data\\trackInfo.txt";
        // 输入文件的路径
        String tempFilePath = "A:\\TASK\\code\\data\\project\\src\\main\\java\\com\\data\\ipInfo.txt";
        try (BufferedReader reader = new BufferedReader(new FileReader(inputFilePath));
             BufferedWriter writer = new BufferedWriter(new FileWriter(tempFilePath))) {
            IPParser ipParse = IPParser.getInstance();
            String line;
            while ((line = reader.readLine()) != null) {

                String[] split = line.split(",");
                String ip = split[3];
                IPParser.RegionInfo regionInfo = ipParse.analyseIp(ip);
                String country = regionInfo.getCountry();
                String province = regionInfo.getProvince();
                String city = regionInfo.getCity();
                writer.write(ip +','+ country +','+ province +','+ city);
                writer.newLine();
            }

            System.out.println("done");

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

2.load_track
package com.task.ds;


import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;

public class load_track {
    public static void main(String[] args) {
        String inputFilePath = "A:\\trackinfo_20130721.txt";
        // 输入文件的路径
        String tempFilePath = "A:\\TASK\\code\\data\\project\\src\\main\\java\\com\\data\\trackInfo.txt";

        try (BufferedReader reader = new BufferedReader(new FileReader(inputFilePath));
             BufferedWriter writer = new BufferedWriter(new FileWriter(tempFilePath))) {

            String line;
            while ((line = reader.readLine()) != null) {
                // 替换连续的SOH字符为一个空格
//                String replacedLine = line.replaceAll("\001", "\t");

                String[] split = line.split("\001");
                String url = split[1].replace(","," ");
                writer.write(split[0]+","+url+","+split[10]+","+split[13]+","+split[17]);
                writer.newLine();
            }

            System.out.println("连续的SOH字符已被替换");

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
3.test
package com.task.ds;

import com.task.ds.utils.IPParser;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

public class test {
    public static void main(String[] args) throws IOException {
        String filePath = "A:\\trackinfo_20130721.txt";
        FileInputStream fin = new FileInputStream(filePath);
        InputStreamReader reader = new InputStreamReader(fin);
        BufferedReader buffReader = new BufferedReader(reader);
        String strTmp = "";
        while((strTmp = buffReader.readLine())!=null){
        System.out.println(strTmp);
       }
       System.out.println(buffReader.readLine());
       buffReader.close();

        String aa = "20947290187\u0001http://www.yihaodian.com/ctg/s2/c5287-%E5%A4%A7%E7%B1%B3/b/a28186,28184-s1-v0-p1-price-d0-f0-m1-rt0-pid-k/1/\u0001http://www.yihaodian.com/ctg/s2/c5287-%E5%A4%A7%E7%B1%B3/b1879/a28186\u0001\u00012\u0001SKAPHD3JZYH9EE9ACB1NGA9VDQHNJMX1NY9T\u0001\u0001\u0001\u0001\u0001PPG4SWG71358HGRJGQHQQBXY9GF96CVU\u00012225501\u00011\u0001124.79.172.232\u0001\u0001msessionid:YR9H5YU7RZ8Y94EBJNZ2P5W8DT37Q9JH,uname:chen45311,unionKey:2225501\u0001\u00012013-07-21 09:32:30\u0001134885852\u0001http://www.baidu.com/s?wd=1%E5%8F%B7%E5%BA%97&rsv_bp=0&ch=&tn=baidu&bar=&rsv_spt=3&ie=utf-8&rsv_sug3=5&rsv_sug=0&rsv_sug1=4&rsv_sug4=313&inputT=4235\u000111\u00010\u0001\\N\u00011\u0001-10\u0001\u0001\u0001\u0001\u0001Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MATP; Media Center PC 6.0; .NET4.0C; InfoPath.2; .NET4.0E)\u0001Win32\u0001\u0001 93 \u00011\u0001\u0001search_order_2\u0001上海市\u00011\u0001null\u0001\u0001上海市\u0001\u000166\u0001\u0001\u0001\u0001\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u00012013-07-21";
        String bb = "20966425286\u0001http://www.yihaodian.com/4/?tracker_u=7439964&type=1\u0001http://www.2345.com/?kskypw\u0001\u00013\u0001EUBCC9CG8G7G3Z331PWEV3UNWD7QZA1F3Q2Q\u0001\u0001\u0001\u0001\u0001PPH24C5RHH3GE4RUY1UKQW84PVYBXZFT\u00017439964\u0001\\N\u0001101.18.106.107\u0001\u0001msessionid:WUEYCTYUHP7CUP4UD7Y2VHNZABPCYJ18,unionKey:7439964\u0001\u00012013-07-21 15:09:39\u0001\\N\u0001http://www.2345.com/?kskypw\u00011\u0001\u0001\\N\u00014\u0001-1\u0001\u0001\u0001\u0001\u0001Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML\u0001Win32\u0001\u0001\u0001\u0001\u0001\u0001河北省\u00014\u0001\u0001\u0001\u0001\u00013\u0001\u0001\u0001\u0001\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u00012013-07-21";
        String[] split = bb.split("\001");
        String[] split1 = aa.split("\001");
        System.out.println(split1.length);
        System.out.println(split.length);
        System.out.println(aa);
        System.out.println(split1[13]);
        String s = split1[13];

        IPParser ipParse = IPParser.getInstance();
        IPParser.RegionInfo regionInfo = ipParse.analyseIp(s);
        String city = regionInfo.getCity();
        String country = regionInfo.getCountry();
        String province = regionInfo.getProvince();
        System.out.println(country);
        System.out.println(province);

    }
}


四.Hadoop运行结果

1.提取关键信息

在这里插入图片描述

2.热点搜索统计在这里插入图片描述##### 3.省流量统计

在这里插入图片描述

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值