//pom.xml
<dependencies>
<dependency>
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
</dependencies>
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class Test {
public static void main(String[] args) throws Exception{
// http://car.bitauto.com/qichepinpai/ 里的一个系列品牌的里
Document doc = Jsoup.parse(new URL("http://car.bitauto.com/qichepinpai/"),(int) TimeUnit.SECONDS.toMillis(60));
Elements lis = doc.select("ul.list_pic li a:first-child");
// System.out.println(lis.size());
List<List<String>> carType = new ArrayList<>();
int max = 0;
for (Element li : lis) {
String name = li.attr("title");
String href = li.attr("href");
// System.out.println(name+" "+href);
List<String> oneType = new ArrayList<>();
oneType.add(name);
oneType.add(null);
Document oneTypeDoc = Jsoup.parse(new URL(href), (int) TimeUnit.SECONDS.toMillis(60));
// System.out.println(oneTypeDoc);
Elements typelis = oneTypeDoc.select("#data_table_MasterSerialList_0 li.name a");
// System.out.println(typelis.size());
for(Element typeli:typelis){
String typename = typeli.text();
oneType.add(typename);
}
// System.out.println(oneType);
carType.add(oneType);
if (oneType.size()>max) max = oneType.size();
}
FileOutputStream fos = new FileOutputStream("cattype.txt");
OutputStreamWriter w = new OutputStreamWriter(fos, "UTF-8");
BufferedWriter bw = new BufferedWriter(w);
for (int i = 0; i < max; i++) {
for (int j = 0; j < carType.size(); j++) {
List<String> oneType = carType.get(j);
String one;
if (i<oneType.size() && oneType.get(i) != null){
one = oneType.get(i)+'\t';
}else{
one = String.valueOf('\t');
}
bw.write(one);
}
bw.newLine();
}
bw.flush();
bw.close();
}
}