写了个简单爬虫工具!!
python 爬 HTML 指定内容 注意 python 3的 平台下使用
# -*- coding: utf-8-*-
import urllib.request
import re
num = 1
while num<1915:
num += 1
url= "http://www.ynshangji.com/shen-huangye/"
url= url + str(num)
data = urllib.request.urlopen(url).read()#
data = data.decode('gbk')
allfinds2 = re.findall(r'<a href="/......./" target="_blank">(.+?)</a>',data,re.S)
i = 1
print (num)
while i<21:
print(allfinds2[i].strip())
i += 2
pass
运行python test.py