import time
import csv
from urllib.parse import urlparse
from lxml import etree
from selenium import webdriver
from selenium. webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
# 定义谷歌webdriver
driver = webdriver.Chrome('E:/Anaconda/chromedriver')
base_url = 'https://www.wepcc.com'
def ping_(url_list):
"""
输入一个url,输出全球ping的ip结果list
"""
driver.maximize_window() # 将浏览器最大化
driver.get(base_url) # 打开全球ping网站
for url in url_list:
element = driver.find_element_by_name("host") # 找到输入框
element.clear() # 清空输入框
element.send_keys(url) # 写入要查询的url域名
element.send_keys(Keys.RETURN) # 模拟点击
time.sleep(10)
# element = driver.find_element_by_tag_name("table")
# html解析
soup = bs(driver.page_source, "html.parser")
tags = soup.find_all('td', class_='r_ip')
ip_li = []
for ip in tags:
if ip.text != 'loading':
ip_li.append(ip.text)
if len(set(ip_li)) > 1:
print('{}的ip结果为{},使用了cdn'.format(url, set(ip_li)))
else:
print('{}的ip结果为{},未使用cdn'.format(url, set(ip_li)))
driver.implicitly_wait(10) # seconds
driver.quit()
模拟浏览器爬取-全球ping结果
最新推荐文章于 2022-12-06 14:20:57 发布