from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.keys import Keys
# 1. 控制浏览器进入登录页面
options = ChromeOptions()
options.add_experimental_option('excludeSwitches',['enable-automation'])
b = Chrome(options=options)
b.get('https://www.taobao.com')# 2. 留足够长的时间完成人工登录input('是否:')# 3. 获取登录后的cookie信息保存到本地文件中open('files/taobao.txt','w').write(str(b.get_cookies()))
from selenium.webdriver import Chrome, ChromeOptions
# 1. 打开需要使用cookie的网站
options = ChromeOptions()
options.add_experimental_option('excludeSwitches',['enable-automation'])
b = Chrome(options=options)
b.get('https://www.taobao.com')# 2. 添加cookie
all_cookies =eval(open('files/taobao.txt').read())for cookie in all_cookies:if cookie['secure']:
b.add_cookie(cookie)# 3.重新打开网页
b.get('https://www.taobao.com')
使用代理
import requests
defget_html(url):
headers ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}# 添加代理ip
proxies ={'http':'49.88.157.144:4560','https':'27.150.41.182:4512'}# 代理IP
r = requests.get(url, headers=headers, proxies=proxies)
r.encoding = r.apparent_encoding
return r.text
if __name__ =='__main__':
result = get_html('https://movie.douban.com/top250')print(result)
selenium使用代理
from selenium.webdriver import Chrome,ChromeOptions
options = ChromeOptions()# 添加代理ip
options.add_argument('--proxy-server=http://119.7.145.68:4578')
options.add_experimental_option('excludeSwitches',['enable-automation'])
b = Chrome(options=options)
b.get('https://movie.douban.com/top250')
zhihu登录反爬
import requests
defget_html(url):# 添加cookie
headers ={'cookie':'_zap=30598a3d-5eef-4101-a497-4961b1f70a40; _xsrf=RF1zqKd7xnWMXdnnSXSeQiIXtf53kETs; d_c0="AHDRV3aWUBSPTv1_DvH6FbnoBGlKM69QDr0=|1641785535";','User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
r = requests.get(url, headers=headers)
r.encoding = r.apparent_encoding
return r.text
if __name__ =='__main__':
result = get_html('https://www.zhihu.com/')print(result)
字体反爬
import requests
defget_html(url):
headers ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
r = requests.get(url, headers=headers)
r.encoding = r.apparent_encoding
return r.text
if __name__ =='__main__':print(get_html('https://www.qidian.com/finish/'))
数据字体反爬
import requests
defget_html(url):
headers ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
r = requests.get(url, headers=headers)
r.encoding = r.apparent_encoding
return r.json()deffont():
font_dict ={'E1D0':'7','E325':'4','E41D':'1','E52E':'9','E630':'2','E76E':'8','E891':'5','E9CE':'0','EAF2':'3','EC4C':'6','F88A':'7'}
new_font_dict ={'&#'+str(int(x, base=16)): font_dict[x]for x in font_dict}print(new_font_dict)return new_font_dict
if __name__ =='__main__':
font_dict = font()
url ='https://mapi.guazi.com/car-source/carList/pcList?minor=&sourceType=&ec_buy_car_list_ab=&location_city=&district_id=&tag=-1&license_date=&auto_type=&driving_type=&gearbox=&road_haul=&air_displacement=&emission=&car_color=&guobie=&bright_spot_config=&seat=&fuel_type=&order=&priceRange=0,-1&tag_types=3&diff_city=&intention_options=&initialPriceRange=&monthlyPriceRange=&transfer_num=&car_year=&carid_qigangshu=&carid_jinqixingshi=&cheliangjibie=&page=1&pageSize=20&city_filter=12&city=12&guazi_city=12&qpres=&versionId=0.0.0.0&osv=IOS&platfromSource=wap'
result = get_html(url)for x in result['data']['postList']:
name = x['title']
price = x['price']
prices = price.split(';')
price =''for x in prices:if x in font_dict:
price += font_dict[x]elif x[1:]in font_dict:
price +='.'+font_dict[x[1:]]else:
price += x
print(name, price)