#-*- coding:utf8 -*-
import urllib.parse
import urllib.request
'''
根据返回数据获取网页编码格式
'''
def getCharSet(response) :
contype = response.headers['Content-Type']
pos = contype.find('=')
if -1 != pos:
contype = contype[pos+1:len(contype)]
return contype
'''
根据URL获取网页的字符串内容
'''
def getData(url, *params) :
theurl=url
if params :
data=urllib.parse.urlencode(values)
theurl=url+"?"+data
req=urllib.request.Request(theurl)
response=urllib.request.urlopen(req)
contype = getCharSet(response)
return response.read().decode(contype,'ignore')
if __name__ == '__main__':
url='http://list.taobao.com/browse/cat-0.htm'
data=getData(url)
print(data)
python抓取网页
最新推荐文章于 2024-05-17 08:30:00 发布