Python Scraping 4

最新推荐文章于 2024-04-07 09:59:09 发布

the 8th dwarf

最新推荐文章于 2024-04-07 09:59:09 发布

阅读量73

点赞数

文章标签： python chrome 开发语言

本文链接：https://blog.csdn.net/azenlijing/article/details/125404725

版权

Python 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

"""
Scrape city codes on zhipin.
"""
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import time
from lxml import etree
import csv

if __name__ == '__main__':
    opts = webdriver.ChromeOptions()
    opts.add_argument(
        "--user-agent=User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0")
    # run chrome.exe --remote-debugging-port=50000 --user-data-dir="C:\chromeremotedebug" in DOS
    opts.add_experimental_option("debuggerAddress", "127.0.0.1:50000")
    browser = webdriver.Chrome(options=opts)
    browser.implicitly_wait(5)
    browser.get("https://www.zhipin.com/job_detail/?query=&city=100010000&industry=&position=")
    WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//a[@ka='header-username']/img")))
    WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//div[contains(@class, 'city-sel')]/span")))
    browser.find_element("xpath", "//div[contains(@class, 'city-sel')]/span").click()
    eles = browser.find_elements("xpath", "//li[contains(@ka, 'sel-province')]")
    fp = open("zhipincitycode.csv", 'w', newline='', encoding='utf-8')
    csvw = csv.writer(fp)
    csvw.writerow(["Letter", "Code", "Name"])
    for ele in eles:
        ele.click()
        dom = etree.HTML(browser.find_element("xpath", "//ul[@class='section-city']").get_attribute("outerHTML"))
        for letter in dom.xpath("//li[@class='classify-city']"):
            # Should use xpath relative path below.
            l = letter.xpath(".//div[@class='city-title']")[0].text
            for span in letter.xpath(".//span"):
                csvw.writerow([l, span.get("data-val"), span.text])
                print(l, span.get("data-val"), span.text)
        time.sleep(1)
    browser.find_element("xpath", "//i[@class='icon-close']").click()
    fp.close()