import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver import ActionChains
a = []
def login(driver):
driver.delete_all_cookies()
url = "https://www.qcc.com/weblogin?back=%2F" #https://www.qcc.com/weblogin?back=%2F
driver.get(url)
time.sleep(10)
# 点击密码登入/html/body/div[1]/div[3]/div/div[2]/div[1]/div[2]/a
driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/div[1]/div[2]/a').click()
time.sleep(1)
# 输入账号密码
#driver.find_element_by_id('nameNormal').send_keys(username) # /html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[1]/input
driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[1]/input').send_keys("199......")
driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[2]/input').send_keys("13.......")
button = driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[3]/div/div/div[1]/span')
#滑动滑块//*[@id="nc_1__scale_text"]/span//*[@id="nc_1__scale_text"]
ActionChains(driver).click_and_hold(button).perform()
ActionChains(driver).move_by_offset(xoffset=308, yoffset=0).perform()
ActionChains(driver).release().perform()
time.sleep(2)#/html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[4]/button
driver.find_element_by_xpath('/html/body/div[1]/div[3]/div/div[2]/div[3]/form/div[4]/button/strong').click()# 点击登录
time.sleep(0.5)#https://www.qcc.com/web/search?key=%E6%B7%98%E5%AE%9D&p=2
url_a = ['https://www.qcc.com/web/search?key=%E6%B7%98%E5%AE%9D&p={}',#淘宝
'https://www.qcc.com/web/search?key=%E5%A9%9A%E5%BA%86&p={}',#婚庆
'https://www.qcc.com/web/search?key=%E6%8A%A5%E7%A4%BE&p={}',#报社
'https://www.qcc.com/web/search?key=%E7%A7%91%E6%8A%80&p={}',#科技
'https://www.qcc.com/web/search?key=%E7%94%B5%E5%AD%90&p={}',#电子
'https://www.qcc.com/web/search?key=%E7%94%9F%E6%B4%BB&p={}'#生活
]
num = 1
for r in url_a:
for j in range(1,6):
driver.get(r.format(j))
for i in range(1,20):
try:
d = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div[4]/div/div[2]/div/table/tr[{}]/td[3]/div/a[1]'.format(i))
print('第{}条----->>>'.format(num),d.get_attribute("href"))
num += 1
a.append(d.get_attribute("href"))
except:
pass
time.sleep(5)
da = pd.DataFrame(a)
da.to_csv('E:\movieinfo3.csv')
driver.close()
def main():
while True:
option = webdriver.ChromeOptions()#配置 chrome 启动是属性的类,就是初始化
option.add_experimental_option('excludeSwitches', ['enable-automation']) # webdriver防检测,开发者模式,防止被识别出来
option.add_argument("--disable-blink-features=AutomationControlled")#添加启动参数,隐藏webdriver
option.add_argument("--no-sandbox")#解决DevToolsActivePort文件不存在的报错
option.add_argument("--disable-dev-usage")
option.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})#添加实验性质的设置参数
driver = webdriver.Chrome(executable_path=r"E:\chromedriver\chromedriver.exe",options=option)
driver.set_page_load_timeout(15)#加载超时
login(driver)#调用
# jugesd(driver)
if __name__ == '__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36'}
main()
爬取企查查公司URL
最新推荐文章于 2023-10-07 11:30:20 发布