"""
Scrape city codes on zhipin.
"""
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import time
from lxml import etree
import csv
if __name__ == '__main__':
opts = webdriver.ChromeOptions()
opts.add_argument(
"--user-agent=User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0")
# run chrome.exe --remote-debugging-port=50000 --user-data-dir="C:\chromeremotedebug" in DOS
opts.add_experimental_option("debuggerAddress", "127.0.0.1:50000")
browser = webdriver.Chrome(options=opts)
browser.implicitly_wait(5)
browser.get("https://www.zhipin.com/job_detail/?query=&city=100010000&industry=&position=")
WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//a[@ka='header-username']/img")))
WebDriverWait(browser, 60).until(ec.presence_of_element_located(("xpath", "//div[contains(@class, 'city-sel')]/span")))
browser.find_element("xpath", "//div[contains(@class, 'city-sel')]/span").click()
eles = browser.find_elements("xpath", "//li[contains(@ka, 'sel-province')]")
fp = open("zhipincitycode.csv", 'w', newline='', encoding='utf-8')
csvw = csv.writer(fp)
csvw.writerow(["Letter", "Code", "Name"])
for ele in eles:
ele.click()
dom = etree.HTML(browser.find_element("xpath", "//ul[@class='section-city']").get_attribute("outerHTML"))
for letter in dom.xpath("//li[@class='classify-city']"):
# Should use xpath relative path below.
l = letter.xpath(".//div[@class='city-title']")[0].text
for span in letter.xpath(".//span"):
csvw.writerow([l, span.get("data-val"), span.text])
print(l, span.get("data-val"), span.text)
time.sleep(1)
browser.find_element("xpath", "//i[@class='icon-close']").click()
fp.close()
Python Scraping 4
最新推荐文章于 2024-04-07 09:59:09 发布