import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created with Intellij Idea
* Name PaChong
* Author kvaic
* Date 2024/7/6
*/
public class paChongTest {
public static void main(String[] args) throws IOException {
//地址
String familyNameNet = "http://www.baijiaxing.net.cn/";
String boyNameNet = "https://ratenn.com/boy-name.html";
String girlNameNet = "https://ratenn.com/girl-name.html";
//获取姓氏并处理
String familyNameWeb = webCrawler(familyNameNet);
String regexFamilyName = ".{4}(?=,|。)";
ArrayList<String> Date1 = getDate(familyNameWeb, regexFamilyName);
ArrayList<Character> familyNameDate = new ArrayList<>();
for(String x:Date1){
for (int i = 0; i < 4; i++) {
familyNameDate.add(x.charAt(i));
}
}
//获取男孩名并处理
String boyNameWeb = webCrawler(boyNameNet);
String regexBoyName = "<td>.{2}</td>";
ArrayList<String> date2 = getDate(boyNameWeb, regexBoyName);
ArrayList<String> boyNameDate = new ArrayList<>();
for(String x:date2){
boyNameDate.add(x.substring(4, 6));
}
//获取女孩名并处理
String girlNameWeb = webCrawler(girlNameNet);
String regexGirlName = "<td>.{2}</td>";
ArrayList<String> date3 = getDate(girlNameWeb, regexGirlName);
ArrayList<String> girlNameDate = new ArrayList<>();
for(String x:date3){
girlNameDate.add(x.substring(4, 6));
}
//获取所以名字(无重复,可自己更改)
ArrayList<String> personName = getPersonName(familyNameDate, boyNameDate, girlNameDate, 100,100);
writeName(personName);
}
//从网站上爬取所有内容
public static String webCrawler(String net) throws IOException {
StringBuffer sb = new StringBuffer();
URL url = new URL(net);
URLConnection urlCon = url.openConnection();
InputStreamReader isr = new InputStreamReader(urlCon.getInputStream());
int ch;
while((ch=isr.read())!=-1){
sb.append((char)ch);
}
isr.close();
return sb.toString();
}
//用正则表达式筛选
public static ArrayList<String> getDate(String str,String regex){
ArrayList<String> list = new ArrayList<>();
Pattern compile = Pattern.compile(regex);
Matcher matcher = compile.matcher(str);
while(matcher.find()){
list.add(matcher.group());
}
return list;
}
//合并姓和名
public static ArrayList<String> getPersonName(ArrayList<Character> familyName,
ArrayList<String> boyName,
ArrayList<String> girlName,
int boyNumber,
int girlNumber){
ArrayList<String> allName = new ArrayList<>();
Collections.shuffle(familyName);
Collections.shuffle(boyName);
for (int i = 0; i < boyNumber; i++) {
allName.add(familyName.get(i)+boyName.get(i));
}
Collections.shuffle(familyName);
Collections.shuffle(girlName);
for (int i = 0; i < girlNumber; i++) {
allName.add(familyName.get(i)+girlName.get(i));
}
return allName;
}
public static void writeName(ArrayList<String> allName) throws IOException {
//将名字写入文件夹中
File file = new File("D:\\DaiMa_File\\JAVA代码\\fakeDate\\fakeDate.txt");
FileWriter fw = new FileWriter(file,true);
for(String x:allName){
fw.write(x+" ");
}
fw.close();
}
在生成时使用了爬虫从三个网站中爬取一些公开资源,仅供学习。
在实际使用时需根据自己的网站进行筛选。