import time
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
def getChrome():
chrome_options = Options()
# chrome_options.add_argument("--headless")
# 增加一个参数设置,隐藏webdriver
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument(
'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36')
chrome_options.add_argument("Pragma=no-cache")
driver = Chrome(options=chrome_options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
driver.get("http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearch.html?annNum=1739")
driver.implicitly_wait(20)
# driver.find_element_by_partial_link_text('查 看').click()
driver.implicitly_wait(20)
links = []
for i in range(50):
print(i)
time.sleep(1)
try:
links.extend(getNext(driver))
links = list(set(links))
except Exception as e:
print(e)
pass
print("全部图片链接:")
print(links)
print(len(links))
#翻页 并且获取图片链接
def getNext(driver):
driver.find_element_by_partial_link_text('下一页').click()
driver.implicitly_wait(10)
td_contents = driver.find_elements_by_tag_name('img')
driver.implicitly_wait(10)
links = []
for img in td_contents:
links.append(img.get_attribute("src"))
print(links)
return links
if __name__ == '__main__':
getChrome()
08-04
1638