爬虫DAY5

免费代理
#encoding:utf-8
import requests

#1.请求url
url = "http://www.baidu.com"
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
#免费代理书写形式 {"http":"IP:port"}
free_proxy= {"http":"27.17.45.90:43411"}
response = requests.get(url,headers = headers,proxies =free_proxy )
print(response.status_code)
忽略证书认证SSL的操作
#encoding:utf-8
import requests

#1.请求url
url = "http://www.baidu.com"
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
#免费代理书写形式 {"http":"IP:port"}
free_proxy= {"http":"27.17.45.90:43411"}
response = requests.get(url,headers = headers,proxies =free_proxy )
print(response.status_code)
request的cookie请求

手动抓包获取cookie时,可以将cookie放在header里面,也可以在发送get请求时加上cookies的字典参数。注意:接受的参数是字典类型的,这就需要我们将cookie字符串转化为字典形式。手动转化或者正则都可。

方法一:cookie放在header里面

#encoding:utf-8
import requests

#请求数据url
member_url = 'https://www.yaozh.com/member/'

headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
"Cookie":"_ga=GA1.2.453819300.1605698635; _gid=GA1.2.1545737704.1605698635; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1605699991; yaozh_uidhas=1; acw_tc=2f624a2c16056986318106268e7b5c5780592359df428fea4543999c74f920; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1605698635%2C1605699868%2C1605699919; UtzD_f52b_saltkey=TuAyAS9a; UtzD_f52b_lastvisit=1605697511; _ga=GA1.1.1391703708.1605701117; _gid=GA1.1.363878918.1605701117; UtzD_f52b_ulastactivity=1605699987%7C0; UtzD_f52b_creditrule=%E6%AF%8F%E5%A4%A9%E7%99%BB%E5%BD%95; PHPSESSID=vjeitstu34vqng73oargrnm487; yaozh_mylogin=1605744810; UtzD_f52b_creditnotice=0D0D2D0D0D0D0D0D0D833314; UtzD_f52b_creditbase=0D0D0D0D0D0D0D0D0; yaozh_userId=1008154; UtzD_f52b_lastact=1605760658%09uc.php%09; _gat=1"
}

#需要的是  字典类型
#方法一:手动拼  方法二:正则

response = requests.get(member_url,headers =headers )
data = response.content.decode("utf-8")

with open('04cook.html','w',encoding = 'utf-8') as f:
    f.write(data)

方法二:手动转化为字典形式

#encoding:utf-8
import requests

#请求数据url
member_url = 'https://www.yaozh.com/member/'
#  cookie  的字符串
#cookies = '_ga=GA1.2.453819300.1605698635; _gid=GA1.2.1545737704.1605698635; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1605699991; yaozh_uidhas=1; acw_tc=2f624a2c16056986318106268e7b5c5780592359df428fea4543999c74f920; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1605698635%2C1605699868%2C1605699919; UtzD_f52b_saltkey=TuAyAS9a; UtzD_f52b_lastvisit=1605697511; _ga=GA1.1.1391703708.1605701117; _gid=GA1.1.363878918.1605701117; UtzD_f52b_ulastactivity=1605699987%7C0; UtzD_f52b_creditrule=%E6%AF%8F%E5%A4%A9%E7%99%BB%E5%BD%95; PHPSESSID=vjeitstu34vqng73oargrnm487; yaozh_mylogin=1605744810; UtzD_f52b_creditnotice=0D0D2D0D0D0D0D0D0D833314; UtzD_f52b_creditbase=0D0D0D0D0D0D0D0D0; yaozh_userId=1008154; UtzD_f52b_lastact=1605760658%09uc.php%09; _gat=1'
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
}
#需要的是  字典类型
#方法一:手动拼  方法二:正则

cook_dict = {
"_ga":"GA1.2.453819300.1605698635",
"_gid":"GA1.2.1545737704.1605698635",
 "_gat":"1",
"Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94":"1605699991",
"yaozh_uidhas":"1",
"acw_tc":"2f624a2c16056986318106268e7b5c5780592359df428fea4543999c74f920",
"Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94":"1605698635%2C1605699868%2C1605699919",
"UtzD_f52b_saltkey":"TuAyAS9a",
"UtzD_f52b_lastvisit":"1605697511",
"_ga":"GA1.1.1391703708.1605701117",
"_gid":"GA1.1.363878918.1605701117",
"UtzD_f52b_ulastactivity":"1605699987%7C0",
"UtzD_f52b_creditrule":"%E6%AF%8F%E5%A4%A9%E7%99%BB%E5%BD%95", "PHPSESSID":"vjeitstu34vqng73oargrnm487",
"yaozh_mylogin":"1605744810",
"UtzD_f52b_creditnotice":"0D0D2D0D0D0D0D0D0D833314",
"UtzD_f52b_creditbase":"0D0D0D0D0D0D0D0D0",
"yaozh_userId":"1008154" ,
"UtzD_f52b_lastact":"1605760658%09uc.php%09",
"_gat":"1"
}

response = requests.get(member_url,headers =headers,cookies = cook_dict)
data = response.content.decode("utf-8")
with open('04cook.html','w',encoding = 'utf-8') as f:
    f.write(data)

获取cookie字典类型方法三:分隔+for循环

#cookie需要的是字典类型
cook_dict = {}
cookies_list = cookies.split('; ')
for cookie in cookies_list:
    #dict[key] = value
    cook_dict[cookie.split('=')[0]] = cookie.split('=')[1]

获取cookie字典类型方法四:字典推导式

#cookie需要的是字典类型
cook_dict = {}
cook_dict = {
        cookie.split('=')[0]:cookie.split('=')[1]  for cookie in cookies.split('; ')
    }

自动带着cookie去请求

#encoding:utf-8
import requests

# 请求数据url
member_url = 'https://www.yaozh.com/member/'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
# session 类 可以自动保存cookies === cookiesJar
session = requests.session()
# 1.代码登录
login_url = 'https://www.yaozh.com/login'
login_form_data = {
    "username": "wsssssang",
    "pwd": "wss7622",
    "formhash": "1F3337010D",
    "backurl": "https%3A%2F%2Fwww.yaozh.com%2F"
}
login_response = session.post(login_url,data=login_form_data,headers=headers)
print(login_response.content.decode())
# 2.登录成功之后 带着 有效的cookies 访问 请求目标数据
data = session.get(member_url,headers=headers).content.decode(encoding = "utf-8")

with open('05-cookie2.html','w',encoding = "utf-8") as f:
    f.write(data)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值