# -*- coding: cp936 -*-
import urllib2
from BeautifulSoup import BeautifulSoup
import re
content = urllib2.urlopen('http://www.baidu.com').read()
p = re.compile(r'<p\sid="lk">(.*?)</p>')
m = p.search(unicode(content,"utf-8").encode("cp936"))
# print m.group(1)
urls = re.findall(r'<a\shref=(.*?)>',m.group(1))
for i in urls:
print i
print urls
# k = m.group(1).split(" ")
# print m.group(0)
python 抓取网页网址信息
最新推荐文章于 2024-07-21 21:39:32 发布