#coding=utf-8
import re
import urllib
list=[]
def fangwen(url):
global list
for x in xrange(1,11):
a= url +str(x)
list.append(a)
else:
list.append('http://www.xici.net.co/nn/')
print list
return list
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
htmls = html.decode("utf-8")
# print htmls
return htmls
def jiexi(html):
myItems=[]
myItems = re.findall('<td>(.*?)</td>',html,re.S)
for item in myItems:
print item.encode('utf-8')
if __name__ == '__main__':
giveurl=fangwen('http://www.xici.net.co/nn/')
for y in giveurl:
# print '************'+y+'******************'
jiexi(getHtml(y))
# print s
python抓取代理(目前没有进行解析)
最新推荐文章于 2024-05-27 08:52:59 发布