Python爬虫Ip代理
1.注册网络代理IP
2.在项目中获取代理
proxy_address=ur.urlopen('http://api.ip.data5u.com/dynamic/get.html?order=3fae62193cd99e193417e4c4a53801d9&sep=4').read().decode('utf-8').strip()
#创建proxy_handler
proxy_handler=ur.ProxyHandler(
{
'http':proxy_address
}
)
##新建opener对象
proxy_opener=ur.build_opener(proxy_handler)
request=ur.Request('url='https://edu.csdn.net/'')
respone=proxy_opener.open(request).read()
#得到相应的respone
print(repone)
反爬策略之模拟登陆
import urllib.request as ur
import user_agent
import lxml.etree as le
#封装request
request =ur.Request(
url='https://edu.csdn.net/mycollege',
headers={
'User-Agent':user_agent.get_user_agent_pc(),
'Cookie':'uuid_tt_dd=10_37384859980-1570443653891-141995; dc_session_id=10_1570443653891.997904; ADHOC_MEMBERSHIP_CLIENT_ID1.0=d0c1d53f-96c3-2fdd-4e76-cf3be594f894; UN=taochaocaj; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_37384859980-1570443653891-141995!5744*1*taochaocaj; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1581313892,1581328400,1581661884,1581668472; SESSION=2890d0c2-f987-448e-9803-fe79e6e0dafd; UserName=taochaocaj; UserInfo=f0d12962427d4b45b9f41612e61f41f5; UserToken=f0d12962427d4b45b9f41612e61f41f5; UserNick=%E5%87%89%E6%8B%8C%E8%B1%86%E8%85%90%E5%B9%B2; AU=782; BT=1581668510444; p_uid=U000000; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblog.csdn.net%252Fblogdevteam%252Farticle%252Fdetails%252F103603408%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; TY_SESSION_ID=8ac5d3e7-70d4-4d11-bdf2-0b75539b0c34; cname15568=1; dc_tos=q5on8r; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1581668524'
}
)
#封装response
response=ur.urlopen(request).read().decode('utf-8')
# with open('mycollege.html','w',encoding='utf-8') as f:
# f.write(respone)
html_x=le.HTML(response)
title_s=html_x.xpath('//li[contains(@class,"item_box")]//h1/a/text()')
# print(title_s)
for title in title_s:
print(title)