Python Scraping 4

"""
Scrape city codes on zhipin.
"""
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import time
from lxml import etree
import csv

if __name__ == '__main__':
    opts = webdriver.ChromeOptions()
    opts.add_argument(
        "--user-agent=User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0")
    # run chrome.exe --remote-debugging-port=50000 --user-data-dir="C:\chromeremotedebug" in DOS
    opts.add_experimental_option("debuggerAddress", "127.0.0.1:50000")
    browser = webdriver.Chrome(options=opts)
    browser.implicitly_wait(5)
    browser.get("https://www.zhipin.com/job_detail/?query=&city=100010000&industry=&position=")
    WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//a[@ka='header-username']/img")))
    WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//div[contains(@class, 'city-sel')]/span")))
    browser.find_element("xpath", "//div[contains(@class, 'city-sel')]/span").click()
    eles = browser.find_elements("xpath", "//li[contains(@ka, 'sel-province')]")
    fp = open("zhipincitycode.csv", 'w', newline='', encoding='utf-8')
    csvw = csv.writer(fp)
    csvw.writerow(["Letter", "Code", "Name"])
    for ele in eles:
        ele.click()
        dom = etree.HTML(browser.find_element("xpath", "//ul[@class='section-city']").get_attribute("outerHTML"))
        for letter in dom.xpath("//li[@class='classify-city']"):
            # Should use xpath relative path below.
            l = letter.xpath(".//div[@class='city-title']")[0].text
            for span in letter.xpath(".//span"):
                csvw.writerow([l, span.get("data-val"), span.text])
                print(l, span.get("data-val"), span.text)
        time.sleep(1)
    browser.find_element("xpath", "//i[@class='icon-close']").click()
    fp.close()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值