<span style="font-size:18px;"># -*- coding:utf8 -*-
import urllib2
import bs4
from bs4 import BeautifulSoup
import sys
def main():
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request('http://websvr03.qd.lucent.com/index.php', headers = headers)
content = urllib2.urlopen(req).read()
#content.close()
soup = BeautifulSoup(content)
type = sys.getfilesystemencoding()
html = soup.decode("UTF-8").encode(type)
print html
if __name__=='__main__':
main()</span>
python抓取网页的html
最新推荐文章于 2024-04-26 15:09:14 发布