IP池搭建
一 ip搭建背景
cmd使用
chrome --proxy-server=代理IP
用代理IP
python爬虫使用代理如下
import requests
url = "http://httpbin.org/ip"#一个响应ip的网址
proxy = {
'http':'代理'#代理=直接获取的代理或者http://代理
}
proxies = [
{'http':'代理'},
{'http':'代理'}
]#多个代理
res = requests.get(url=url,proxies=proxy)
1-1 证权流
1–1-1 登录
# 你是谁
1-1-2 权限
# 有没有会员
1-1-3 限流
# 访问评率
1-2 代理IP
# custom为客户端;middler中间代理商;server:服务端
二 获取IP流程分析
代理ip网址域名:www.zmhttp.com
登录流程分析
login_do里无法响应数据是因为登录时页面跳转过快,函数还没return就跳到另一个页面去了,此时可以采用转包工具Fidder去抓包如下图,成功获取响应数据
import requests
url = "https://uwapi.http.linkudp.com/index/users/login_do"
headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
}
data = "phone=18302843233&password=wjj18340.wjj&remember=1"
res = requests.post(url=url,headers=headers,data=data)
res
成功获取json响应数据如上图
2-2 领取ip流程分析
- 找到生成api链接的get_new_ip的包
- 如上图发现只有具有session-id字段才能正常获取响应
- 发现用登录请求返回响应数据中的“ret-data”中的数据替换session-id的值便能够成功获得响应
-
获取IP链接
import requests def login():#登录请求 url = "https://uwapi.http.linkudp.com/index/users/login_do" headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", } data = "phone=18302843233&password=wjj18340.wjj&remember=1" res = requests.post(url=url,headers=headers,data=data) return res.json().get("ret_data") def new_get_ip(): url = "https://owapi.http.linkudp.com/index/api/new_get_ips" headers = { "Content-Type":"application/x-www-form-urlencoded; charset=UTF-8", "session-id": login() } data = "num=1&package_id=276876&type=1&pro_id=&port_type=1&city_id=&yys=0&time_show=false&city_show=false&yys_show=false&manyregions=®ion_type=1&line_break=1&special_break=&port_bit=4&m_repeat=1&pack_type=pack&long_city=" res = requests.post(url=url,headers=headers,data=data) return res.json().get("ret_data") new_get_ip()
-
提取和测试IP
import requests def login():#登录请求 url = "https://uwapi.http.linkudp.com/index/users/login_do" headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", } data = "phone=18302843233&password=wjj18340.wjj&remember=1" res = requests.post(url=url,headers=headers,data=data) return res.json().get("ret_data") def get_ip_urls(): url = "https://owapi.http.linkudp.com/index/api/new_get_ips" headers = { "Content-Type":"application/x-www-form-urlencoded; charset=UTF-8", "session-id": login() } data = "num=1&package_id=276876&type=1&pro_id=&port_type=1&city_id=&yys=0&time_show=false&city_show=false&yys_show=false&manyregions=®ion_type=1&line_break=1&special_break=&port_bit=4&m_repeat=1&pack_type=pack&long_city=" res = requests.post(url=url,headers=headers,data=data) return list(res.json().get("ret_data").values()) def get_ips(): for url in get_ip_urls(): res = requests.get(url) yield res.text.strip("\r\n") def test_ip(): url = 'https://www.baidu.com' for i in get_ips(): # 代理ip格式化 proxies = { 'http':'http://'+i } # 使用代理ip发送请求 yield 200<= requests.get(url=url,proxies=proxies).status_code < 300 res = list(test_ip()) # 过滤出有效的ip valid_res = filter(lambda x:x,res) print(len(list(valid_res))/len(res))#打印一下有用的IP占的比例是多少