第四天 常见反爬

登录反爬操作

from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.keys import Keys

# 1. 控制浏览器进入登录页面
options = ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
b = Chrome(options=options)
b.get('https://www.taobao.com')

# 2. 留足够长的时间完成人工登录
input('是否:')

# 3. 获取登录后的cookie信息保存到本地文件中
open('files/taobao.txt', 'w').write(str(b.get_cookies()))
from selenium.webdriver import Chrome, ChromeOptions

# 1. 打开需要使用cookie的网站
options = ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
b = Chrome(options=options)
b.get('https://www.taobao.com')

# 2. 添加cookie
all_cookies = eval(open('files/taobao.txt').read())
for cookie in all_cookies:
    if cookie['secure']:
        b.add_cookie(cookie)

# 3.重新打开网页
b.get('https://www.taobao.com')

使用代理

import requests


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    # 添加代理ip
    proxies = {'http': '49.88.157.144:4560', 'https': '27.150.41.182:4512'} # 代理IP
    r = requests.get(url, headers=headers, proxies=proxies)

    r.encoding = r.apparent_encoding
    return r.text


if __name__ == '__main__':
    result = get_html('https://movie.douban.com/top250')
    print(result)

selenium使用代理

from selenium.webdriver import Chrome,ChromeOptions

options = ChromeOptions()
# 添加代理ip
options.add_argument('--proxy-server=http://119.7.145.68:4578')
options.add_experimental_option('excludeSwitches', ['enable-automation'])

b = Chrome(options=options)
b.get('https://movie.douban.com/top250')

zhihu登录反爬

import requests


def get_html(url):
    # 添加cookie
    headers = {
        'cookie': '_zap=30598a3d-5eef-4101-a497-4961b1f70a40; _xsrf=RF1zqKd7xnWMXdnnSXSeQiIXtf53kETs; d_c0="AHDRV3aWUBSPTv1_DvH6FbnoBGlKM69QDr0=|1641785535";',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    return r.text


if __name__ == '__main__':
    result = get_html('https://www.zhihu.com/')
    print(result)

字体反爬

import requests


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    return r.text


if __name__ == '__main__':
    print(get_html('https://www.qidian.com/finish/'))

数据字体反爬

import requests


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    r = requests.get(url, headers=headers)
    r.encoding = r.apparent_encoding
    return r.json()


def font():
    font_dict = {
        'E1D0': '7', 'E325': '4', 'E41D': '1', 'E52E': '9', 'E630': '2', 'E76E': '8',
        'E891': '5', 'E9CE': '0', 'EAF2': '3', 'EC4C': '6', 'F88A': '7'
    }
    new_font_dict = {'&#' + str(int(x, base=16)): font_dict[x] for x in font_dict}
    print(new_font_dict)
    return new_font_dict


if __name__ == '__main__':
    font_dict = font()
    url = 'https://mapi.guazi.com/car-source/carList/pcList?minor=&sourceType=&ec_buy_car_list_ab=&location_city=&district_id=&tag=-1&license_date=&auto_type=&driving_type=&gearbox=&road_haul=&air_displacement=&emission=&car_color=&guobie=&bright_spot_config=&seat=&fuel_type=&order=&priceRange=0,-1&tag_types=3&diff_city=&intention_options=&initialPriceRange=&monthlyPriceRange=&transfer_num=&car_year=&carid_qigangshu=&carid_jinqixingshi=&cheliangjibie=&page=1&pageSize=20&city_filter=12&city=12&guazi_city=12&qpres=&versionId=0.0.0.0&osv=IOS&platfromSource=wap'
    result = get_html(url)
    for x in result['data']['postList']:
        name = x['title']
        price = x['price']
        prices = price.split(';')
        price = ''
        for x in prices:
            if x in font_dict:
                price += font_dict[x]
            elif x[1:] in font_dict:
                price += '.'+font_dict[x[1:]]
            else:
                price += x
        print(name, price)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值