笔记:
jsoup.jar包常用语制作网页爬虫,它的使用只需要导入jsoup.jar这一个包就行,它的使用参考点击打开链接。通常解析web也伴随着正则表达式的使用,正则表达式group的概念参考点击打开链接
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTest {
int count;
int OPM;
int Cplusplus;
int SystemOperationEngineer;
int DataDevelopmentEngineer;
int PM;
int VisualDesigner;
int WebReconstructionEngineer;
int InteractionDesigner;
int BusinessSpecialist;
int city;
ArrayList <WHUT> WHUTStudent;
public void get(int city) throws IOException {
count=1;
OPM=0;
Cplusplus=0;
SystemOperationEngineer=0;
DataDevelopmentEngineer=0;
PM=0;
VisualDesigner=0;
WebReconstructionEngineer=0;
InteractionDesigner=0;
BusinessSpecialist=0;
this.city=city;
WHUTStudent=new ArrayList <WHUT>();
Document doc = Jsoup.connect("http://svr.campus.xunlei.com/viewlist?callback=jQuery110209096807448659092_1444288837792&city="+city+"&from=0&to=100000&name=-1&_=0").get();
String value=doc.toString();
Pattern pattern_name = Pattern.compile(""name":"(.+?)",");
Matcher macher_name = pattern_name.matcher(value);
Pattern pattern_position = Pattern.compile("position":"(.+?)",");
Matcher macher_position = pattern_position.matcher(value);
Pattern pattern_school = Pattern.compile("school":"(.+?)",");
Matcher macher_school = pattern_school.matcher(value);
Pattern pattern_major = Pattern.compile("major":"(.+?)",");
Matcher macher_major = pattern_major.matcher(value);
Pattern pattern_time = Pattern.compile("time":"(.+?)",");
Matcher macher_time = pattern_time.matcher(value);
while(macher_name.find()&&macher_position.find()&&macher_school.find()&&macher_major.find()&&macher_time.find())
{
if(city==-1){
System.out.println("编号:"+count);
System.out.println(macher_name.group(1));
System.out.println(macher_position.group(1));
System.out.println(macher_school.group(1));
System.out.println(macher_major.group(1));
System.out.println(macher_time.group(1));
System.out.println("************************************");
}
count++;
if(macher_position.group(1).equals("运营产品经理")) OPM++;
if(macher_position.group(1).equals("C++开发工程师")) Cplusplus++;
if(macher_position.group(1).equals("系统运维工程师")) SystemOperationEngineer++;
if(macher_position.group(1).equals("数据开发工程师")) DataDevelopmentEngineer++;
if(macher_position.group(1).equals("产品经理")) PM++;
if(macher_position.group(1).equals("视觉设计师")) VisualDesigner++;
if(macher_position.group(1).equals("网页重构工程师")) WebReconstructionEngineer++;
if(macher_position.group(1).equals("交互设计师")) InteractionDesigner++;
if(macher_position.group(1).equals("商务专员")) BusinessSpecialist++;
if(macher_school.group(1).equals("武汉理工大学")) {
WHUT tmp=new WHUT();
tmp.name=macher_name.group(1);
tmp.position=macher_position.group(1);
tmp.school=macher_school.group(1);
tmp.major=macher_major.group(1);
tmp.time=macher_time.group(1);
WHUTStudent.add(tmp);
}
}
}
public void print(){
switch(city){
case -1:System.out.println("全国:");break;
case 12:System.out.println("西安:");break;
case 11:System.out.println("成都:");break;
case 8:System.out.println("武汉:");break;
case 4: System.out.println("广州:");break;
}
System.out.println("商务专员:"+BusinessSpecialist+"人");
System.out.println("产品经理:"+PM+"人");
System.out.println("视觉设计师:"+VisualDesigner+"人");
System.out.println("交互设计师:"+InteractionDesigner+"人");
System.out.println("运营产品经理:"+OPM+"人");
System.out.println("C++开发工程师:"+Cplusplus+"人");
System.out.println("网页重构工程师:"+WebReconstructionEngineer+"人");
System.out.println("系统运维工程师:"+SystemOperationEngineer+"人");
System.out.println("数据开发工程师:"+DataDevelopmentEngineer+"人");
System.out.println("************************************");
if(city==8){
System.out.println("其中武汉理工大学的学生有:"+WHUTStudent.size()+"人");
for(int i=0;i<WHUTStudent.size();i++)
{System.out.println(WHUTStudent.get(i).name);
System.out.println(WHUTStudent.get(i).position);
System.out.println(WHUTStudent.get(i).major);
System.out.println(WHUTStudent.get(i).school);
System.out.println("#########################");
}
System.out.println("************************************");
}
}
public static void main(String[] args) throws Exception {
JsoupTest wholeCountry=new JsoupTest ();
wholeCountry.get(-1);//全国
wholeCountry.print();
wholeCountry.get(12);//西安
wholeCountry.print();
wholeCountry.get(11);//成都
wholeCountry.print();
wholeCountry.get(8);//武汉
wholeCountry.print();
wholeCountry.get(4);//广州
wholeCountry.print();
}
}
public class WHUT {
String name;
String position;
String school;
String major;
String time ;
}