packageIO_LastTest;importjava.io.*;importjava.net.URL;importjava.net.URLConnection;importjava.util.ArrayList;importjava.util.Collections;importjava.util.Random;importjava.util.regex.Matcher;importjava.util.regex.Pattern;publicclassDome1{publicstaticvoidmain(String[] args)throwsIOException{//获取要爬取的地址String familyNameNet ="https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&qq-pf-to=pcqq.c2c";StringBoyNameNet="http://www.haoming8.cn/baobao/10881.html";StringGrilNameNet="http://www.haoming8.cn/baobao/7641.html";//调用爬取函数String familyName =getNet(familyNameNet);StringBoyName=getNet(BoyNameNet);StringGrilName=getNet(GrilNameNet);//通过正则表达式提取想要获取的数据String familyNameregex ="(.{4})(,|。)";ArrayList<String> familyNameTemplist =getString(familyName, familyNameregex,1);// /^[\u4E00-\u9FA5]+$/StringBoyNameregex="([\\u4E00-\\u9FA5]{2})(、)";ArrayList<String>BoyNameTempList=getString(BoyName,BoyNameregex,1);StringGrilNameregex="([\\u4E00-\\u9FA5]{2})(\\s)";ArrayList<String>GrilNameTempList=getString(GrilName,GrilNameregex,1);ArrayList<String> familyNameList =newArrayList<>();for(String s : familyNameTemplist){char[] chars = s.toCharArray();for(int i =0; i < chars.length; i++){
familyNameList.add(chars[i]+"");}}ArrayList<String>BoyNameList=newArrayList<>();for(String s :BoyNameTempList){if(!BoyNameList.contains(s)){BoyNameList.add(s);}}ArrayList<String>GrilNameList=newArrayList<>();for(String s :GrilNameTempList){if(!GrilNameList.contains(s)){GrilNameList.add(s);}}ArrayList<String> name =getName(familyNameList,BoyNameList,GrilNameList,50,20);Collections.shuffle(name);BufferedWriter bw =newBufferedWriter(newFileWriter("D:\\Java\\IDEA\\代码文件\\After_pintu_again\\a.txt"));for(String s : name){
bw.write(s);
bw.newLine();}
bw.close();}//将男女生姓名包装好放到集合中publicstaticArrayList<String>getName(ArrayList<String> familyNameList,ArrayList<String>BoyNameList,ArrayList<String>GrilNameList,int boyNum,int grilNum){ArrayList<String> boyname =newArrayList<>();while(true){if(boyname.size()==boyNum){break;}Collections.shuffle(familyNameList);Collections.shuffle(BoyNameList);
boyname.add(familyNameList.get(0)+BoyNameList.get(0));}ArrayList<String> grilname =newArrayList<>();while(true){if(grilname.size()==grilNum){break;}Collections.shuffle(familyNameList);Collections.shuffle(GrilNameList);
grilname.add(familyNameList.get(0)+GrilNameList.get(0));}ArrayList<String> list =newArrayList<>();Random r =newRandom();for(String s : boyname){int age = r.nextInt(10)+18;
list.add(s+"-男-"+age);}for(String s : grilname){int age = r.nextInt(8)+18;
list.add(s+"-女-"+age);}return list;}/*
作用:将爬到的信息切割好之后装到集合中
* 参数一:
* 需要进行获取数据的网址字符串
参数二:
正则表达式规则
参数三:
用正则表达式划分后获取的某一组数据
* */publicstaticArrayList<String>getString(String name,String regex,int i){//先创建一个集合存放数据ArrayList<String> list =newArrayList<>();//按照正则表达式的规则获取数据Pattern pattern =Pattern.compile(regex);//字符串按照正则表达式的方式分割Matcher matcher = pattern.matcher(name);while(matcher.find()){
list.add(matcher.group(i));}return list;}/*
* 参数一:
* 网络地址
* */publicstaticStringgetNet(String str)throwsIOException{//定义一个容器拼接爬取的字符串StringBuilder sb =newStringBuilder();//创建一个URL对象URL url =newURL(str);//连接上这个网址//保证网络畅通URLConnection coon = url.openConnection();//读取数据InputStreamReader isr =newInputStreamReader(coon.getInputStream());int len;while((len=isr.read())!=-1){
sb.append((char) len);}
isr.close();return sb.toString();}}
1.2、利用胡涂包生成数据
胡涂包里面的使用规则
//请求列表页String listContent =HttpUtil.get("https://www.oschina.net/action/ajax/get_more_news_list?newsType=&p=2");//使用正则获取所有标题List<String> titles =ReUtil.findAll("<span class=\"text-ellipsis\">(.*?)</span>", listContent,1);for(String title : titles){//打印标题Console.log(title);}
packageIO_LastTest;importcn.hutool.core.io.FileUtil;importcn.hutool.core.util.ReUtil;importcn.hutool.http.HttpUtil;importjava.io.IOException;importjava.util.ArrayList;importjava.util.Collections;importjava.util.List;importjava.util.Random;publicclassDome2{publicstaticvoidmain(String[] args)throwsIOException{//获取要爬取的地址String familyNameNet ="https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&qq-pf-to=pcqq.c2c";StringBoyNameNet="http://www.haoming8.cn/baobao/10881.html";StringGrilNameNet="http://www.haoming8.cn/baobao/7641.html";//爬取地址里面的数据String familyNamestr =HttpUtil.get(familyNameNet);StringBoyNamestr=HttpUtil.get(BoyNameNet);StringGrilNamestr=HttpUtil.get(GrilNameNet);//按照正则表达式的方式将爬取的数据进行切割List<String> familyNameTemplist =ReUtil.findAll("(.{4})(,|。)", familyNamestr,1);List<String>BoyNameTempList=ReUtil.findAll("([\\u4E00-\\u9FA5]{2})(、)",BoyNamestr,1);List<String>GrilNameTempList=ReUtil.findAll("([\\u4E00-\\u9FA5]{2})(\\s)",GrilNamestr,1);ArrayList<String> familyNameList =newArrayList<>();for(String s : familyNameTemplist){char[] chars = s.toCharArray();for(int i =0; i < chars.length; i++){
familyNameList.add(chars[i]+"");}}ArrayList<String>BoyNameList=newArrayList<>();for(String s :BoyNameTempList){if(!BoyNameList.contains(s)){BoyNameList.add(s);}}ArrayList<String>GrilNameList=newArrayList<>();for(String s :GrilNameTempList){if(!GrilNameList.contains(s)){GrilNameList.add(s);}}ArrayList<String> name =getName(familyNameList,BoyNameList,GrilNameList,50,20);Collections.shuffle(name);//"D:\\Java\\IDEA\\代码文件\\After_pintu_again\\b.txt"//写出数据FileUtil.writeLines(name,"D:\\Java\\IDEA\\代码文件\\After_pintu_again\\b.txt","UTF-8");}publicstaticArrayList<String>getName(ArrayList<String> familyNameList,ArrayList<String>BoyNameList,ArrayList<String>GrilNameList,int boyNum,int grilNum){ArrayList<String> boyname =newArrayList<>();while(true){if(boyname.size()==boyNum){break;}Collections.shuffle(familyNameList);Collections.shuffle(BoyNameList);
boyname.add(familyNameList.get(0)+BoyNameList.get(0));}ArrayList<String> grilname =newArrayList<>();while(true){if(grilname.size()==grilNum){break;}Collections.shuffle(familyNameList);Collections.shuffle(GrilNameList);
grilname.add(familyNameList.get(0)+GrilNameList.get(0));}ArrayList<String> list =newArrayList<>();Random r =newRandom();for(String s : boyname){int age = r.nextInt(10)+18;
list.add(s+"-男-"+age);}for(String s : grilname){int age = r.nextInt(8)+18;
list.add(s+"-女-"+age);}return list;}}
1.3、随机点名器的四个需求
packageIO_LastTest;importjava.io.BufferedReader;importjava.io.FileReader;importjava.io.IOException;importjava.util.ArrayList;importjava.util.Random;publicclassDome3{publicstaticvoidmain(String[] args)throwsIOException{BufferedReader br =newBufferedReader(newFileReader("D:\\Java\\IDEA\\代码文件\\After_pintu_again\\b.txt"));ArrayList<String> list =newArrayList<>();String len;while((len = br.readLine())!=null){
list.add(len);}
br.close();Random r =newRandom();int index = r.nextInt(list.size());String str = list.get(index).split("-")[0];System.out.println(str);}}