正则提取文件信息,输出到控制台上

思路:
/**
* 把文件中的内容提取出来 取出我们需要查询的几个属性 并把这些属性打印到控制台上
* 1我们要建一个能接收这么多目标属性的类 entity
* 2创建转换类对象的同时就创建好能装 3个map的 属性 + entity.class
* 1)把一个 文件滚来的数据 set 赋值 给对象属性
* 3把文章解析出来
* 4写一个读行的 并比较正则 因为是一行一行读的
* 5在写一个读所有行 返回对象集合的最终方法
*/

package core;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


class Commodity{
    private Long commodityId;
    private String commodityName;
    private Double commodityPrice;
    private Double soldCount;
    private Long shopId;
    private String shopName;
    private String address;

    public Long getCommodityId() {
        return commodityId;
    }

    public void setCommodityId(Long commodityId) {
        this.commodityId = commodityId;
    }

    public String getCommodityName() {
        return commodityName;
    }

    public void setCommodityName(String commodityName) {
        this.commodityName = commodityName;
    }

    public Double getCommodityPrice() {
        return commodityPrice;
    }

    public void setCommodityPrice(Double commodityPrice) {
        this.commodityPrice = commodityPrice;
    }

    public double getSoldCount() {
        return soldCount;
    }

    public void setSoldCount(String soldCount) {
        soldCount = soldCount.endsWith("+") ?
                soldCount.substring(0,soldCount.length()-1)
                : soldCount;
        this.soldCount = soldCount.endsWith("万") ?
                Double.parseDouble(soldCount.substring(0,soldCount.length()-1))*10000
                : Double.parseDouble(soldCount);
    }

    public Long getShopId() {
        return shopId;
    }

    public void setShopId(Long shopId) {
        this.shopId = shopId;
    }

    public String getShopName() {
        return shopName;
    }

    public void setShopName(String shopName) {
        this.shopName = shopName;
    }

    public String getAddress() {
        return address;
    }

    public void setAddress(String address) {
        this.address = address;
    }

    @Override
    public String toString() {
        return "Commodity{" +
                "commodityId=" + commodityId +
                ", commodityName='" + commodityName + '\'' +
                ", commodityPrice=" + commodityPrice +
                ", soldCount=" + soldCount +
                ", shopId=" + shopId +
                ", shopName='" + shopName + '\'' +
                ", address='" + address + '\'' +
                '}';
    }
}

public class JavaReptile {



    private static class Parser<T>{
        private Class<T> c;
        private Map<String, Method> mapAttr;                //属性名    方法
        private Map<String, Constructor> mapConstructor;   // 属性名    属性类型的构造
        private Map<Integer,String> mapGroupAttr;          // 索引    属性


        public int groupCount(){
            return mapGroupAttr.size();
        }

        public Parser(Class<T> c,Map<Integer,String> mapGroupAttr) throws Exception {
            this.c = c;
            this.mapGroupAttr = mapGroupAttr;
            this.mapAttr = new HashMap<>();
            this.mapConstructor = new HashMap<>();
            Method[] ms = c.getMethods();
            int size = 0;       //用来统计有几个set方法  如果没有set方法 这个装数据的类 就没有意义
            for (Method m : ms) {
                String name = m.getName();
                if(!name.startsWith("set"))continue;
                name = name.substring(3);
                name = name.substring(0,1).toLowerCase()+name.substring(1);
                mapAttr.put(name,m);
                mapConstructor.put(name,m.getParameterTypes()[0].getConstructor(String.class));
                size++;
            }
            if(size == 0)throw new Exception("no attribute or setter exception");
        }

        // 给对象赋值     需要正则判断  存储的map 中的 1   属性     和属性名称
        public T newT(Matcher m ) throws IllegalAccessException, InstantiationException, InvocationTargetException {
            T t = c.newInstance();

            for (Map.Entry<Integer, String> e : mapGroupAttr.entrySet()) {
//                Method set = mapAttr.get(e.getValue());
//                Constructor con = mapConstructor.get(e.getValue());
//                String v = vs[e.getKey()-1];
//                Object obj = con.newInstance(v);
//                set.invoke(t,obj);
                mapAttr.get(e.getValue()).invoke(t,mapConstructor.get(e.getValue()).newInstance(m.group(e.getKey())));
            }//键值进行遍历
            return t;
        }

        //文章     文件
        //

    }

    private static String read(String path) throws IOException {
        StringBuilder builder =  new StringBuilder();
        BufferedReader br = null;
        try {//try不是为了捕获异常, 而是为了finally释放
            br = new BufferedReader(new FileReader(path));
            String line = null;
            while(null!= (line = br.readLine())){
                builder.append(line.trim());
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } finally {
            if(null != br){
                br.close();
            }
        }
        return builder.toString();
    }

    /*正则的对象     正则字符串
    * */
    private static Pattern pat(String regex){
        return Pattern.compile(regex);
    }

    /*
    * 正则的匹配对象    正则字符串    一段
    * */
    private static Matcher mat(String regex, String cnt){
        return pat(regex).matcher(cnt);
    }

    /*一行变成一个对象
    * 对象       @par
    * */
    private static <T> T lineToT(Parser<T> par, Pattern pat ,String cnt) throws Exception {
        Matcher m = pat.matcher(cnt);
        if(m.find() && m.groupCount()>=par.groupCount()){
            return par.newT(m);
        }
        throw new Exception("unmatched or wrong group count exception");
    }


    /**根据参数c,生成解析对象parser
     * 将路径path下的文件清洗拼接成长字符串content
     * 根据参数regexLines将content
     * 根据regexLine和parser将每行生成泛型对象t,并最佳到集合List中并返回
     * @param c 泛型类型信息
     * @param regexLines 行解析正则
     * @param regexLine 行分组提取正则
     * @param path  文件路径
     * @param mapGroupAttr 分区编号和属性映射Map
     * @param <T> 泛型
     * @return List<T>    对象集合
     */
    public static <T>List<T> linesTolist(Class<T> c,String regexLines,String regexLine,
                                         String path,Map<Integer,String> mapGroupAttr){
        List<T> list = new ArrayList<>();
        try {
            Parser<T> parser = new Parser<>(c, mapGroupAttr);
            String content = read(path);
            Matcher mLines = mat(regexLines,content);
            Pattern pLine = pat(regexLine);
            int matchPos = 0;
            while (mLines.find(matchPos)) {
                String line = mLines.group(1);
                list.add(lineToT(parser,pLine,line));
                matchPos += line.length()+1;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return list;
    }

    public static void main(String[] args) {
       // ("D:\\e\\downOfSoft\\QQ\\taobao.txt");
        final String REGEX_LINES = "<div class=\"item(.*?)</div></div></div></div>";      //一件商品的左右边界
       final String REGEX_LINE = "alt=\"(.*?)\".*?<strong>(.*?)</strong>.*?deal-cnt\">(.*?)人付款</div>.*?data-userid=\"(.*?)\" data-nid=\"(.*?)\".*?dsrs.*?</span></span><span>(.*?)</span>.*?\"location\">(.*?)<";
        final  String PATH = "D:\\e\\downOfSoft\\QQ\\taobao.txt";
        Map<Integer,String> mapGroupAttr = new HashMap<>();
//        private Long commodityId;
//        private String commodityName;
//        private Double commodityPrice;
//        private Double soldCount;
//        private Long shopId;
//        private String shopName;
//        private String address;
        mapGroupAttr.put(1,"commodityName");
        mapGroupAttr.put(2,"commodityPrice");
        mapGroupAttr.put(3,"soldCount");
        mapGroupAttr.put(4,"shopId");
        mapGroupAttr.put(5,"commodityId");
        mapGroupAttr.put(6,"shopName");
        mapGroupAttr.put(7,"address");
        List<Commodity> cs = linesTolist(Commodity.class, REGEX_LINES, REGEX_LINE, PATH, mapGroupAttr);
        for (Commodity c : cs) {
            System.out.println(c);
        }
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值