## 爬取“个人中心”
1、添加cookie
登陆成功 手动登录
点击Network珠宝 粘贴复制 登录成功后的cookie
headers = {‘cookie’:’’}
Request(headers=)
"""
爬取“个人中心”
1、添加cookie
登陆成功 手动登录
点击Network珠宝 粘贴复制 登录成功后的cookie
headers = {'cookie':''}
Request(headers=)
"""
import urllib.request
#1、数据url
url = "http://i.chaoxing.com/base?t=1615027880850"
#2、添加请求头
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Cookie':'lv=1; fid=503; _uid=102913687; uf=569b376a64ccf0319dbf35210ecb9145d898ba29a96cda28224255f355a068ba013809f6f4d9043e52afb3131a181a68c49d67c0c30ca5047c5a963e85f11099478c43796c177fc4ce71fc6e59483dd3ab844373f99cf9c30c5132526d0c9906bf567a51442a09c4; _d=1619181597552; UID=102913687; vc=999ABC6281CE2CC4C21D18C2FEFCA239; vc2=486AA56C7CF895ACB5BCF665E8674965; vc3=a64h8zf28TQsQsQrh594%2F8M%2Fo0wA5TE4da4NEKCkjKTeYQ5nOcq73ajCG%2FLvkrFzrF2Z53l%2FJ9dPkdsTLxLwU9DkA%2B%2FzgX7cVplff3FnOlHG0dp28JRR%2F5S71mWzUisruP4ZgXknY58niOs8W5stuB7VGCy2iPnGJC3M8nOO7Ig%3D9e00dbc50c688478e9c3beb3772ccb59; xxtenc=9059e9987e91f4b3db82100ec0af5978; DSSTASH_LOG=C_38-UN_328-US_102913687-T_1619181597553; JSESSIONID=051D4234D31BDBCEA569BEBA14EBC1AB; spaceFid=503; spaceRoleId=""; route=0d3ee366e02d727b7b6bb10aae7f99cf; rt=-2; tl=0'
}
#3、构建请求对象
requst = urllib.request.Request(url,headers=headers)
#4、发送请求对象
response = urllib.request.urlopen(requst)
#5、读取数据
data = response.read()
#保存到文件中 验证数据
with open('01cookie.html','wb') as f:
f.write(data)
2、第二种方式
代码登录 登录成功 cookie(有效)
自动带着cookie 去请求个人中心
"""
获取个人中心的页面
2、第二种方式
代码登录 登录成功 cookie(有效)
自动带着cookie 去请求个人中心
"""
import urllib.request
from http import cookiejar
from urllib import parse
# 登录之前的 登录页的网址https://www.yaozh.com/login/
# 找登录 参数
# 后台 根据你发送的请求方式来判断的 如果你是get(登录页面),如果POST(登录结果)
# 1. 代码登录
# 1.1 登录的网址
login_url = 'https://www.yaozh.com/login'
# 1.2 登录的参数
login_form_data = {
"username": "xiaomaoera12",
"pwd": "lina081012",
"formhash": "CE3ADF28C5",
"backurl": "https%3A%2F%2Fwww.yaozh.com%2F"
}
# 1.3 发送登录请求POST
cook_jar = cookiejar.CookieJar()
# 定义有添加 cook 功能的 处理器
cook_hanlder = urllib.request.HTTPCookieProcessor(cook_jar)
# 根据处理器 生成 opener
opener = urllib.request.build_opener(cook_hanlder)
# 带着参数 发送post请求
# 添加请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
# 1.参数 将来 需要转译 转码; 2. post请求的 data要求是bytes
login_str = parse.urlencode(login_form_data).encode('utf-8')
login_request = urllib.request.Request(login_url, headers=headers, data=login_str)
# 如果登录成功, cookjar自动保存cookie
opener.open(login_request)
# 2. 代码带着cooke去访问 个人中心
center_url = 'https://www.yaozh.com/member/'
center_request = urllib.request.Request(center_url, headers=headers)
response = opener.open(center_url)
# bytes -->str
data = response.read().decode()
with open('02cook.html', 'wb') as f:
f.write(data)
# 一个用户 在不同的地点(IP(福建,上海, 杭州, 河南)) 不同浏览器 上面 不停的登录 非人为操作
# 封你的账号
# N 个 账号
##request 模块
requests网址 https://docs.python-requests.org/en/master/
1、基础
#一、基础
# import requests
#
# url = "http://www.baidu.com"
# response = requests.get(url)
# print(response)
# #content属性 返回色类型 是bytes
# data = response.content.decode('utf-8')
# print(data)
2、requests方法
#二、requests方法
"""
import requests
class RequestSpider(object):
def __init__(self):
url = "http://www.baidu.com"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
self.response = requests.get(url,headers=headers)
def run(self):
data = self.response.content
#1、获取请求头
request_headers = self.response.request.headers
print(request_headers)
#2、获取响应头
response_headers = self.response.headers
print(response_headers)
#3、响应状态码
code = self.response.status_code
print(code)
#4、请求的cookie
request_cookie = self.response.request._cookies
print(request_cookie)
#5、响应的cookie
response_cookie = self.response.cookies
print(response_cookie)
RequestSpider().run()
"""
3、 参数 自动转译
# 3、 参数 自动转译
"""
import requests
# url = 'https://www.baidu.com/s?ie=UTF-8&wd=美女'
url = 'https://www.baidu.com/s'
params = {
'wd':"美女"
}
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
#params自动转译,字典形式传递
# response = requests.get(url,headers=headers)
response = requests.get(url,headers=headers, params=params)
data = response.content.decode()
#注意加encoding='utf-8'。否则报错'gbk' codec can't decode byte
with open('baidu.html', 'w',encoding='utf-8') as f:
f.write(data)
# 发送post 和添加参数
#requests.post(url,data=(参数{}),json=(参数))
"""
#json
#json
import requests
import json
#这个网址返回的内容不是html 而是标准的json
url ='https://api.github.com/user'
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
response = requests.get(url,headers=headers)
# #str
# data = response.content.decode()
# print(data)
# #str --->dict
# data_dict = json.loads(data)
#json()自动将json自负床 转换成Python中dict list
data_dict = response.json()
print(data_dict['message'])