Python-爬虫之urllib
一、requests返回的对象
print(type(response))
response.encoding = 'utf=8'
print(response.text)
print(response.url)
print(response.content)
print(response.content.decode('utf-8'))
print(response.status_code)
print(response.headers)
二、通用的get请求
import requests
from requests import RequestException
try:
url = 'https://www.bilibili.com'
data = {
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}
response = requests.get(url=url, data=data, headers=headers)
response.encoding = 'utf-8'
con = response.text
print(con)
except RequestException:
print('错误!!!')
三、通用的post请求
import json
import requests
from requests import RequestException
try:
post_url = 'https://fanyi.baidu.com/sug'
data = {
'kw': 'love'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
response = requests.post(url=post_url, data=data, headers=headers)
response.encoding = 'utf-8'
con = response.text
print(con)
obj = json.loads(con)
print(obj)
except RequestException:
print('错误!!!')
四、通用的get请求使用代理池
import requests
from random import choice
from requests import RequestException
try:
url = 'https://www.bilibili.com'
data = {
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}
proxise_pool = [
{'http': '47.242.66.236:4818'},
{'http': '47.242.190.60:11573'},
]
response = requests.get(url=url, data=data, headers=headers, proxies=choice(proxise_pool))
response.encoding = 'utf-8'
con = response.text
print(con)
except RequestException:
print('错误!!!')
五、通用的post请求使用代理池
import json
import requests
from random import choice
from requests import RequestException
try:
post_url = 'https://fanyi.baidu.com/sug'
data = {
'kw': 'love'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
proxise_pool = [
{'http': '47.242.66.236:4818'},
{'http': '47.242.190.60:11573'},
]
response = requests.post(url=post_url, data=data, headers=headers, proxies=choice(proxise_pool))
response.encoding = 'utf-8'
con = response.text
print(con)
obj = json.loads(con)
print(obj)
except RequestException:
print('错误!!!')
六、会话保持
- requests.session():维持会话,可以让我们在跨请求时保存某些参数
session = requests.session()
response = session.get()
response = session.post()