import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test2 {
private Pattern pattern2;
public Test2() {
// TODO Auto-generated constructor stub
pattern2=Pattern.compile("<span[^>]+>(主楼[^<]*)</span>");
}
public void download(String string) throws IOException
{
URL url=new URL(string);
HttpURLConnection httpURLConnection=(HttpURLConnection) url.openConnection();
BufferedReader bufferedReader=new BufferedReader(new InputStreamReader(httpURLConnection.getInputStream(),"utf8"));
String dst="content.txt";
BufferedWriter bufferedWriter=new BufferedWriter(new FileWriter(dst));
StringBuffer content=new StringBuffer();
String line=null;
while ((line=bufferedReader.readLine())!=null) {
content.append(line);
}
bufferedWriter.write(content.toString());
bufferedWriter.close();
bufferedReader.close();
}
public void visit(String string,String name) throws IOException
{
URL url=new URL(string);
HttpURLConnection httpURLConnection=(HttpURLConnection) url.openConnection();
BufferedReader bufferedReader=new BufferedReader(new InputStreamReader(httpURLConnection.getInputStream(),"utf-8"));
StringBuffer content=new StringBuffer();
String line=null;
while ((line=bufferedReader.readLine())!=null) {
content.append(line);
}
bufferedReader.close();
// <span style="mso-spacerun:'yes'; font-size:10.5000pt; font-family:'楷体_GB2312'; ">主楼二区137</span>
// <span style="mso-spacerun:'yes'; font-size:14.0000pt; font-family:'楷体_GB2312'; ">导师姓名</span>
Matcher res = pattern2.matcher(content);
if(res.find()) {
System.out.println(name+"\t"+res.group(1));
}
}
public void test() throws IOException
{
String dst="content.txt";
BufferedReader bufferedReader=new BufferedReader(new FileReader(dst));
String line=null;
StringBuffer content=new StringBuffer();
while ((line=bufferedReader.readLine())!=null) {
content.append(line);
}
bufferedReader.close();
Pattern pattern=Pattern.compile("<a\\s.*?href=\"(/plus/view.php[^\"]+)\"[^>]*>(.*?)</a>");
Matcher res = pattern.matcher(content);
while(res.find()) {
visit("http://ste.xidian.edu.cn"+res.group(1), res.group(2));
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
new Test2().test();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}