packagewebTools;importjava.io.BufferedReader;importjava.io.IOException;importjava.io.InputStreamReader;importjava.io.UnsupportedEncodingException;importjava.net.MalformedURLException;importjava.net.URL;importjava.util.ArrayList;importjava.util.HashMap;importjava.util.List;importjava.util.regex.Matcher;importjava.util.regex.Pattern;importdbTools.DBTools;publicclassIOTOWeb {publicString getHtmlContent(String htmlURL) {
URL url=null;
String rowContent="";
StringBuffer htmlContent=newStringBuffer();try{
url=newURL(htmlURL);
BufferedReader in=newBufferedReader(newInputStreamReader(url
.openStream(),"gb2312"));while((rowContent=in.readLine())!=null) {
htmlContent.append(rowContent);
}
in.close();
}catch(MalformedURLException e) {//TODO Auto-generated catch blocke.printStackTrace();
}catch(UnsupportedEncodingException e) {//TODO Auto-generated catch blocke.printStackTrace();
}catch(IOException e) {//TODO Auto-generated catch blocke.printStackTrace();
}returnhtmlContent.toString();
}publicList getLink(String htmlContent) {
ArrayList listLink=newArrayList();
String regex="
]*>[\\(]* ]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)[\\)]*[\\s]*";Pattern pattern=Pattern.compile(regex, Pattern.DOTALL);Matcher matcher=pattern.matcher(htmlContent);while(matcher.find()) {
listLink.add(matcher.group());
}returnlistLink;
}publicListgetHref(String htmlContent) {
String regex;
List listtHref=newArrayList();
regex="href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))\"";Pattern pa=Pattern.compile(regex, Pattern.DOTALL);
Matcher ma=pa.matcher(htmlContent);while(ma.find()) {
listtHref.add(ma.group().replaceFirst("href=\"","").replace("\"",""));
}returnlisttHref;
}publicListgetPerson(String htmlContent) {
String regex;
List list=newArrayList();
regex="\\(]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\)";Pattern pa=Pattern.compile(regex, Pattern.DOTALL);
Matcher ma=pa.matcher(htmlContent);while(ma.find()) {
list.add(ma.group().replaceFirst("href=\"","").replace("\"",""));
}returnlist;
}publicListgetSongName(String htmlContent) {
String regex;
List listPerson=newArrayList();
regex="]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)\\s";Pattern pa=Pattern.compile(regex, Pattern.DOTALL);
Matcher ma=pa.matcher(htmlContent);while(ma.find()) {
listPerson.add(ma.group());
}returnlistPerson;
}publicString getMainContent(String htmlContent) {
String regex="
";StringBuffer mainContent=newStringBuffer();
Pattern pattern=Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher=pattern.matcher(htmlContent);while(matcher.find()) {
mainContent.append(matcher.group());
}returnmainContent.toString();
}publicString outTag(finalString s) {returns.replaceAll("<.>","");
}
DBTools dbTools=newDBTools();publicvoidgetFromBaiduMap3(String htmlURL)throwsThrowable {
HashMap htmlContentMap=newHashMap();
String htmlContent=getHtmlContent(htmlURL);
String mainContent=getMainContent(htmlContent);
List listLink=getLink(mainContent);for(intj=0; j
String tdTag=listLink.get(j).toString();
List songNameList=getSongName(tdTag);
String songName=outTag(songNameList.get(0).toString());
List personList=getPerson(tdTag);
String songPerson="";if(personList.size()!=0) {for(intn=0; n
}
}else{
songPerson="无";
}//System.out.print(songNameList.get(0).toString());List hrefList=getHref(songNameList.get(0).toString());
String songHref=hrefList.get(0).toString();
System.out.println();
String sql="insert into song(songName,songPerson,songHref) values(?,?,?)";
ArrayList list_values=newArrayList();
list_values.add(songName);
list_values.add(songPerson);
list_values.add(songHref);
dbTools.update(sql, list_values);
}
}
}