HTTP请求库的使用
urllib库
- urllib是URL处理模块,包含几个用于处理URL的模块。
- urllib.request用来打开和读取URL链接;
- urllib.error包含了urllib.request抛出的异常;
- urllib.parse用于解析URL链接;
- urllib.robotparser用于解析robots.txt文件。
- urllib库的使用
import urllib.request
url = 'http://www.baidu.com/s?wd=ip'
headers = {
'user-agent': 'xxx',
'cookie': 'xxx'
}
req = urllib.request.Request(url=url, headers=headers)
handler = urllib.requst.HTTPHandler()
opener = urllib.requst.build_opener(handler)
resp = opener.open(req)
content = resp.read().decode('utf-8')
print(content)
- 获取豆瓣热门电影封面
import urllib.request
import json
import csv
url = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&page_limit=50&page_start=0'
headers = {'User-agent': 'xxx'}
req = urllib.request.Request(url=url, headers=headers)
resp = urllib.request.urlopen(req)
content = resp.read().decode('utf-8')
jsonObj = json.loads(content)
movices = jsonObj.get("subjects")
csv_items = []
items = []
for movie in movies:
items.append(movie.get("title"))
cover = movie.get("cover")
items.append(cover)
urllib.request.urlretrieve(cover, cover.split("/")[-1])
csv_items.append(items)
items = []
with open('豆瓣热门电影.csv', 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerows(csv_items)
- 使用百度翻译单词
import urllib.request
import urllib.parse
import json
url = 'https://fanyi.baidu.com/sug'
headers = {'User-agent': 'xxx'}
data = {'kw': 'apple'}
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url=url, data=data, headers=headers)
resp = urllib.request.urlopen(req)
content = resp.read().decode('utf-8')
obj = json.loads(content)
print(obj)
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
headers = {
'Cookie': 'xxx'
}
data = {
'from': 'en',
'to': 'zh',
'query': 'spider',
'transtype': 'realtime',
'simple_means_flag': '3',
'sign': '63766.268839',
'token': 'xxx',
'domain': 'common'
}
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url=url, data=data, headers=headers)
resp = urllib.request.urlopen(req)
content = resp.read().decode('utf-8')
obj = json.loads(content)
print(obj)
requests库
- requests是使用python实现的http库。
- requests库的使用
import requests
url = 'http://www.baidu.com'
resp = requests.get(url)
print(type(resp))
resp.encoding = 'utf-8'
print(resp.text)
print(resp.url)
print(resp.content)
print(resp.status_code)
print(resp.headers)
requests.get(url=url, params=data, headers=headers)
- 使用百度翻译单词
import requests
import json
url = 'https://fanyi.baidu.com/sug'
headers = {'User-agent': 'xxx'}
data = {'kw': 'apple'}
resp = requests.post(url, data, headers)
content = resp.text
obj = json.loads(content)
print(obj)