import json
import requests
from requests.exceptions import RequestException
import faker
from multiprocessing import Pool
fake = faker.Factory.create()
headers = {
'Connection': 'keep-alive',
'User-Agent': fake.user_agent()
}
# 获取网页代码通用框架
def get_html(url):
try:
resopnse = requests.get(url=url, headers=headers)
if resopnse.status_code == 200:
resopnse.encoding = resopnse.apparent_encoding
html = resopnse.text
return html
except RequestException:
return None
if __name__ == '__main__':
url = 'http://www.httpbin.org/'
content = get_html(url)
# requests库学习
# 控制访问的参数**kwargs(13个)
# 重点
# params 字典或字节序列,作为参数增加到url中
payload = {'key1': 'value1', 'key2': 'value2'}
r_get = requests.get(url='http://www.httpbin.org/get', params=payload)
# data 字典,字节序列或文件对象,作为request的内容
kv = {'key1': 'value1', 'key2': 'value2'}
r_post = requests.post(url='http://www.httpbin.org/post', data=kv)
# body = '主题内容'
r_post = requests.post(url='http://www.httpbin.org/post', data=body)
# json json格式的数据,作为request的内容
kv = {'key1': 'value1', 'key2': 'value2'}
r_post = requests.post(url='http://www.httpbin.org/post', json=kv)
# headers 字典http定制头
fake = faker.Factory.create()
headers = {
'Connection': 'keep-alive',
'User-Agent': fake.user_agent()
}
r_get = requests.get(url='http://www.httpbin.org/post', headers=headers)
# cookies 字典或CookieJar,Request中的cooies
# auth 元组,支持http认证功能
# files 字典类型,传输文件
# timeout 设定超时时间,秒为单位
# proxies 字典类型,设定访问代理服务器,可以增加登录认证
pxs = {'http': 'http://user:pass@10.10.10.1:1234',
'https': 'https://10.10.10.1:1234'
}
r = requests.get('http://www.baidu.com', proxies=pxs)
# 高级
# allow_redirects True/False,默认为True,重定向开关
# stream Ture/False,默认为True,获取内容立即下载开关
# verify True/False,默认为True,认证ssl则证书开关
# cert 本地ssl证书路径
# get 请求
payload = {'key1': 'value1', 'key2': 'value2'}
r_get = requests.get(url+'get', headers=headers, params=payload)
print(r_get.url)
print(r_get.status_code)
print(r_get.headers)
print(r_get.text)
print(r_get.json())
print(r_get.content)
print(r_get.cookies)
print(r_get.apparent_encoding)
# # post 请求
payload = {'key1': 'value1', 'key2': 'value2'}
r_post = requests.post(url=url+'post', headers=headers, params=json.dumps(payload))
print(r_post.url)
comp = requests.post(url=url+'post', headers=headers, params=payload)
print(comp.url)
print(r_post.status_code)
print(r_post.headers)
print(r_post.text)
print(r_post.json())
print(r_post.content)
print(r_post.apparent_encoding)
# 搜索引擎关键词提交接口
# 谷歌 https://www.google.com/search?q=keyword
# 百度 http://www.baidu.com/s?wd=keyword
url1 = 'https://www.ip138.com/iplookup.asp?ip=113.247.70.95&action=2'
html = get_html(url1)
print(html)
爬虫学习--requests库
最新推荐文章于 2024-01-24 13:37:53 发布