话不多说直接上代码:
#coding=utf-8
from bs4 import BeautifulSoup
import urllib2
url = 'http://www.pythontab.com/html/pythonhexinbiancheng/index.html'
url_list = [url]
for i in range(2,19):
url_list.append('http://www.pythontab.com/html/pythonhexinbiancheng/%s.html'%i)
source_list = []
for j in url_list:
request = urllib2.urlopen(j)
html = request.read()
suop = BeautifulSoup(html,'lxml')
titles = suop.select('#catlist > li > a')
links = suop.select('#catlist > li > a')
for title, link in zip(titles, links):
data = {
"title" : title.get_text(),
"link" : link.get('href')
}
source_list.append(data)
for l in source_list:
request = urllib2.urlopen(l['link'])
html = request.read()
suop = Bea