#!C:\Python3.7 # -*- coding:utf-8 -*- import requests from lxml import etree import os import optparse base_url = "https://www.kuaidaili.com/free/inha/" def get_proxy_IP_port(url): print("抓取网页:",url) ipAndPortList =[] try: req = requests.get(url) #print(req.status_code) html = etree.HTML(req.text) tr_lists = html.xpath('//*[@id="list"]//tbody/tr') for tr in tr_lists: ip = tr.xpath('./td[@data-title="IP"]/text()')[0] port = tr.xpath('./td[@data-title="PORT"]/text()')[0] if test_proxy(ip,port) ==True: ipAndPortList.append(ip+":"+port) return ipAndPortList except Exception as e: print(e) pass def test_proxy(ip,port): proxies = {'http':'http://'+str(ip)+':'+str(port)} req = requests.get(url="https://www.baidu.com/",proxies=proxies) if req.status_code==200: return True else: return False def save_prox(filename,proxy_list): base_path = os.getcwd() path = os.path.join(base_path,filename) with open(filename,'w+') as fw: for proxy in proxy_list: fw.write(proxy+'\n') print("proxy save file :",path) def main(): num = input("输入抓取代理个数:") proxy_list=[] page =1 while len(proxy_list)<int(num): url =base_url+str(page) proxy_list = proxy_list +get_proxy_IP_port(url) page = page+1 print(len(proxy_list)) #print(proxy_list) save_prox("proxy.txt",proxy_list) if __name__ == '__main__': main()
国内免费代理中制作有效地址的地址池
最新推荐文章于 2024-08-01 11:53:57 发布