# 用法一:requestst
案例一
import requests
# 爬取百度的页面源代码
url = "http://www.baidu.com"
resp = requests.get(url)
resp.encoding = "utf-8"
print(resp.text) # 拿到百度页面源代码
上篇案例是调用lib库来爬取百度网页信息,跟这次会有差异,其它略略……
案例二
import requests
content = input("请输入你要检索的内容:")
url = f"https://www.sogou.com/web?query={content}"
# url = "https://www.sogou.com/web?query="
headers = {
# 添加一个请求头信息。 UA
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
# 处理一个小小的反爬
resp = requests.get(url, headers=headers)
# print(resp.text)
print(resp.json())
print(resp.request.headers) # 可以查看到请求头信息
图片说明:
案例三
import requests
import json
url = "https://fanyi.baidu.com/sug" # 网址
data = {
"kw":input("请输入一个单词:")
}
resp = requests.post(url,data=data) # 发送请求
# print(resp.text)#text 拿到的是文本字符串
print(resp.json()['data']) # json 拿到的是字典json数据
运行结果:
和网页上的信息对比
案例四
import requests
# url = "https://movie.douban.com/j/chart/top_list"
# url = "https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%89%A7%E6%83%85,%E7%BB%BC%E8%89%BA"
url = "https://movie.douban.com/chart"
data ={
"channel": "notification:user:253044743",
"auth": "253044743_1643091007:ce03f9d9f2e9f941f6052a7ca79b098dbeea74b6"
}
# data = { # 一堆参数下
# "sort": "U"
# "range": "0:10"
# "tags": ""
# "start": "0"
# "genres": "剧情"
#
# }
# data = {
# "type": "13",
# "interval_id":"100:90",
# "action":"",
# "start": "0",
# "limit": "20"
# }
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
resp = requests.get(url,params=data,headers=headers) # 处理一个小小的反爬
# print(resp.text)
print(resp.json())
print(resp.request.url)
# https://movie.douban.com/j/chart/top_list 爬取此网站容易被封ip,需要用到代理待后续,请求次数过多的情况下…………