简易假名字数据生成

import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with Intellij Idea
 * Name PaChong
 * Author kvaic
 * Date 2024/7/6
 */
public class paChongTest {
    public static void main(String[] args) throws IOException {
        //地址
        String familyNameNet = "http://www.baijiaxing.net.cn/";
        String boyNameNet = "https://ratenn.com/boy-name.html";
        String girlNameNet = "https://ratenn.com/girl-name.html";

        //获取姓氏并处理
        String familyNameWeb = webCrawler(familyNameNet);
        String regexFamilyName = ".{4}(?=,|。)";
        ArrayList<String> Date1 = getDate(familyNameWeb, regexFamilyName);
        ArrayList<Character> familyNameDate = new ArrayList<>();
        for(String x:Date1){
            for (int i = 0; i < 4; i++) {
                familyNameDate.add(x.charAt(i));
            }
        }

        //获取男孩名并处理
        String boyNameWeb = webCrawler(boyNameNet);
        String regexBoyName = "<td>.{2}</td>";
        ArrayList<String> date2 = getDate(boyNameWeb, regexBoyName);
        ArrayList<String> boyNameDate = new ArrayList<>();
        for(String x:date2){
            boyNameDate.add(x.substring(4, 6));
        }

        //获取女孩名并处理
        String girlNameWeb = webCrawler(girlNameNet);
        String regexGirlName = "<td>.{2}</td>";
        ArrayList<String> date3 = getDate(girlNameWeb, regexGirlName);
        ArrayList<String> girlNameDate = new ArrayList<>();
        for(String x:date3){
            girlNameDate.add(x.substring(4, 6));
        }

        //获取所以名字(无重复,可自己更改)
        ArrayList<String> personName = getPersonName(familyNameDate, boyNameDate, girlNameDate, 100,100);

        writeName(personName);
    }

    //从网站上爬取所有内容
    public static String webCrawler(String net) throws IOException {
        StringBuffer sb = new StringBuffer();
        URL url = new URL(net);
        URLConnection urlCon = url.openConnection();
        InputStreamReader isr = new InputStreamReader(urlCon.getInputStream());
        int ch;
        while((ch=isr.read())!=-1){
             sb.append((char)ch);
        }
        isr.close();
        return sb.toString();
    }

    //用正则表达式筛选
    public static ArrayList<String> getDate(String str,String regex){
        ArrayList<String> list = new ArrayList<>();
        Pattern compile = Pattern.compile(regex);
        Matcher matcher = compile.matcher(str);
        while(matcher.find()){
            list.add(matcher.group());
        }
        return list;
    }

    //合并姓和名
    public static ArrayList<String> getPersonName(ArrayList<Character> familyName,
                                                ArrayList<String> boyName,
                                                ArrayList<String> girlName,
                                                int boyNumber,
                                                int girlNumber){

        ArrayList<String> allName = new ArrayList<>();
        Collections.shuffle(familyName);
        Collections.shuffle(boyName);
        for (int i = 0; i < boyNumber; i++) {
            allName.add(familyName.get(i)+boyName.get(i));
        }

        Collections.shuffle(familyName);
        Collections.shuffle(girlName);
        for (int i = 0; i < girlNumber; i++) {
            allName.add(familyName.get(i)+girlName.get(i));
        }
        return allName;
    }
    public static void writeName(ArrayList<String> allName) throws IOException {
        //将名字写入文件夹中
        File file = new File("D:\\DaiMa_File\\JAVA代码\\fakeDate\\fakeDate.txt");
        FileWriter fw = new FileWriter(file,true);
        for(String x:allName){
            fw.write(x+" ");
        }
        fw.close();
    }

在生成时使用了爬虫从三个网站中爬取一些公开资源,仅供学习。

在实际使用时需根据自己的网站进行筛选。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

kvaic

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值