import urllib.request
import urllib.parse
data = urllib.parse.urlencode(query).encode(encoding = 'utf8')
post = urllib.request.Request(url,data)
import urllib.parse
from lxml import etree
#链接
url = 'http://www'
#循环得到分页
for i in range(1,26):
#查到到页数关键词
query = {'Page':'i'}data = urllib.parse.urlencode(query).encode(encoding = 'utf8')
post = urllib.request.Request(url,data)
response = urllib.request.urlopen(post)
#获得源代码
page = response.read()s = etree.HTML(page)
#查找需到的信息
file = s.xpath('//a[@class="link-01"][1]/text()')print(file)
走了很多坑,python3.x和python2.x有些不同,2.x有urllib、urllib2,3.x只有urllib: