from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.common.exceptions import TimeoutException
proxies = ['223.111.131.100:8888',
'47.94.234.50:8888',
'111.29.3.187:8888',
]
u = 'http://mobile.cfda.gov.cn/datasearch/QueryRecord?tableId=43&searchF=ID&searchK=3'
for proxy in proxies:
option = ChromeOptions()
option.add_argument('--proxy-server={0}'.format(proxy))
option.add_experimental_option('excludeSwitches',['enable-automation'])
browser = webdriver.Chrome(options=option)
browser.set_page_load_timeout(10)
p = []
try:
browser.get(u)
#browser.implicitly_wait(5)
soup = BeautifulSoup(browser.page_source)
browser.quit()
p.append(proxy)
except:
browser.quit()
print(proxy+'打开网页失败')
无用ip自动删除:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.common.exceptions import TimeoutException
def ip_judge(url,proxy):
'''
判断ip是否可用
'''
option = ChromeOptions()
option.add_argument('--proxy-server={0}'.format(proxy))
option.add_experimental_option('excludeSwitches',['enable-automation'])
browser = webdriver.Chrome(options=option)
browser.set_page_load_timeout(10)
try:
browser.get(url)
#browser.implicitly_wait(5)
soup = BeautifulSoup(browser.page_source,"lxml")
browser.quit()
#p.append(proxy)
return 'ok'
except:
browser.quit()
#print(proxy+'打开网页失败')
return 'no'
with open("E:\\testdata\\ipchi.txt",'r',encoding='utf-8-sig') as r:
lines=r.readlines()
#u为测试网页
u = 'http://baidu.com'
with open("E:\\testdata\\ipchi.txt",'w',encoding='utf-8') as w:
for line in lines:
judge = ip_judge(u,line.strip())
if judge == 'no':
continue
w.write(line.strip()+'\n')