类似淘宝识别地址联系人

最近在上班的时候遇到了一个需求,搜了很多发现好像要么需要调用第三方接口(付费),然后要么就需要付费的,总之就是要钱.之后就突然想到结合DFA算法能实现智能匹配,可能精确度略有差异.

大概介绍一下思路,利用DFA算法来实现对地址的过滤,然后再这里也可以使用类似思想,先过滤对应的省市区等地址,然后再去过滤人名,可能精度不是很高,如果想完善的可以自己试试.

过滤代码

/**
 * @Author: lemon
 * @Description: 地址过滤
 */

@Component
public class SensitivewordFilter implements InitializingBean {

	public static Map sensitiveWordMap = null;
	public static int minMatchTYpe = 1;      //最小匹配规则
	public static int maxMatchType = 2;      //最大匹配规则
	


	
	/**
	 * 判断文字是否包含词库中的词字符
	 */
	public boolean isContaintSensitiveWord(String txt,int matchType){
		boolean flag = false;
		for(int i = 0 ; i < txt.length() ; i++){
			//判断是否包含字符
			int matchFlag = this.CheckSensitiveWord(txt, i, matchType);
			//大于0存在,返回true
			if(matchFlag > 0){
				flag = true;
			}
		}
		return flag;
	}
	
	/**
	 * 获取文字中的对应词库中的词
	 */
	public Set<String> getSensitiveWord(String txt , int matchType){
		Set<String> sensitiveWordList = new HashSet<String>();
		
		for(int i = 0 ; i < txt.length() ; i++){
			//判断是否包含字符
			int length = CheckSensitiveWord(txt, i, matchType);
			//存在,加入list中
			if(length > 0){
				sensitiveWordList.add(txt.substring(i, i+length));
				//减1的原因,是因为for会自增
				i = i + length - 1;
			}
		}
		
		return sensitiveWordList;
	}
	/**
	 * 构造函数,初始化省市区词库
	 */
	@Override
	public void afterPropertiesSet() throws Exception {
		sensitiveWordMap = new SensitiveWordInit().initKeyWord();
	}

初始化代码

/**
 * 利用DFA算法,词库初始化
 * @Author : lemon
 */
public class SensitiveWordInit {
	private String ENCODING = "UTF-8";

	public static HashMap sensitiveWordMap;
	
	public SensitiveWordInit(){
		super();
	}


	public Map initKeyWord(){
		try {

			Set<String> keyWordSet = readSensitiveWordFile();

			addSensitiveWordToHashMap(keyWordSet);

		} catch (Exception e) {
			e.printStackTrace();
		}
		return sensitiveWordMap;
	}


	private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
		sensitiveWordMap = new HashMap(keyWordSet.size());
		String key = null;  
		Map nowMap = null;
		Map<String, String> newWorMap = null;

		Iterator<String> iterator = keyWordSet.iterator();
		while(iterator.hasNext()){
			key = iterator.next();
			nowMap = sensitiveWordMap;
			for(int i = 0 ; i < key.length() ; i++){
				char keyChar = key.charAt(i);
				Object wordMap = nowMap.get(keyChar);
				
				if(wordMap != null){
					nowMap = (Map) wordMap;
				}
				else{
					newWorMap = new HashMap<String,String>();
					newWorMap.put("isEnd", "0");
					nowMap.put(keyChar, newWorMap);
					nowMap = newWorMap;
				}
				
				if(i == key.length() - 1){
					nowMap.put("isEnd", "1");
				}
			}
		}
	}


	@SuppressWarnings("resource")
	private Set<String> readSensitiveWordFile() throws Exception{
		Set<String> set = null;
		ClassPathResource resource = new ClassPathResource("static/省市区.txt");
		// 获取文件
		File file = resource.getFile();
		InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING);
		try {
			if(file.isFile() && file.exists()){
				set = new HashSet<String>();
				BufferedReader bufferedReader = new BufferedReader(read);
				String txt = null;
				while((txt = bufferedReader.readLine()) != null){
					set.add(txt);
			    }
			}
			else{
				throw new Exception("");
			}
		} catch (Exception e) {
			throw e;
		}finally{
			read.close();
		}
		return set;
	}
}

这里是对应的resource下的词库,结合自己的做修改在这里插入图片描述

.

我的词库整理的可能不全,大家可以自己完善

要的可以私信我,发给你
在这里插入图片描述

实现类

@Override
    public ExpressAdress getIntelligentIdentification(String contactInformation) {
        ExpressAdress expressAdress = new ExpressAdress();
        contactInformation = contactInformation.replaceAll("[\\p{Punct}\\pP]","");
        //*识别电话号码*/
        Pattern pattern = Pattern.compile("([1][3-9][\\d]{9})|(0\\d{2,4}-\\d{7,8})");
        Matcher matcher = pattern.matcher(contactInformation);
        StringBuffer ph = new StringBuffer(64);
        while (matcher.find()) {
            ph.append(matcher.group());
        }
        String phone = ph.toString();
        contactInformation = contactInformation.replace(phone, "");
        expressAdress.setPhone(phone);
        Set<String> filterWord = sensitivewordFilter.getSensitiveWord(contactInformation, SensitivewordFilter.maxMatchType);
        StringBuffer province = null;
        StringBuffer city = null;
        String area = null;
        String connectAddress = "";
        String address = "";
        for (String s : filterWord) {
            if(s.contains("省") || s.contains("自治区") ||s.contains("行政区")){
                province = new StringBuffer(s);
            }
            if(s.contains("市")||s.contains("州")||s.contains("地区") ){
                city = new StringBuffer(s);
            }
            if (s.contains("区")||s.contains("县")){
                area = s;
            }
        }
        if(StringUtils.isNotNull(city) &&  StringUtils.isNotNull(area)){
            contactInformation = contactInformation.replace(city, "");
            city = city.append(area);
            connectAddress = city.toString();
        }
        if (StringUtils.isNotNull(province)){
            contactInformation = contactInformation.replace(province, "");
            connectAddress = province.append(city).toString();
        }
        expressAdress.setConnectAddress(connectAddress);
        if(StringUtils.isNotNull(area)) {

            address = contactInformation.substring(contactInformation.indexOf(area)+area.length());
            contactInformation = contactInformation.replace(address, "");
            contactInformation = contactInformation.replace(area, "");
            expressAdress.setAddress(address);
        }
        expressAdress.setSender(contactInformation);
        return expressAdress;
    }

实体类

public class ExpressAdress extends BaseEntity {
    private static final long serialVersionUID = 1L;

    /**用户姓名*/
    private String sender;

    /**电话*/
    private String phone;

    /** 省 */
    private String province;

    /** 市 */
    private String city;

    /** 区 */
    private String area;

    /** 详细地址 */
    private String address;

    /**省市区合并*/
    private String connectAddress;

    public String getConnectAddress() {
        return connectAddress;
    }

    public void setConnectAddress(String connectAddress) {
        this.connectAddress = connectAddress;
    }

    public String getSender() {
        return sender;
    }

    public void setSender(String sender) {
        this.sender = sender;
    }

    public String getPhone() {
        return phone;
    }

    public void setPhone(String phone) {
        this.phone = phone;
    }

    public String getProvince() {
        return province;
    }

    public void setProvince(String province) {
        this.province = province;
    }

    public String getCity() {
        return city;
    }

    public void setCity(String city) {
        this.city = city;
    }

    public String getArea() {
        return area;
    }

    public void setArea(String area) {
        this.area = area;
    }

    public String getAddress() {
        return address;
    }

    public void setAddress(String address) {
        this.address = address;
    }
}

基本都可以识别,有不足,大家可以自己完善或者提出来,一起探讨

  • 3
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值