from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os, json, time
from urllib import parse
from lxml import etree
from fake_useragent import UserAgent
from pwd import username,password
from pymongo import MongoClient
ua = UserAgent()#搜索的关键字
keywords_ls =['python','java','web','c']#搜索的热门城市
citys_ls =['北京','上海','深圳','广州','杭州','成都','南京','武汉','西安','厦门','长沙','苏州','天津']classLaGouselenium():def__init__(self,keywords_ls=keywords_ls,citys_ls=citys_ls):
self.keywords_ls=keywords_ls
self.citys_ls=citys_ls
self.crawl_city=[]#已经爬取的城市#断点续传
crawledCityPath='./lagou_crawled_city.json'if os.path.exists(crawledCityPath):withopen(crawledCityPath,'r',encoding='utf8')as f:
ls = json.load(f)
self.crawled_city=ls
self.col = MongoClient()['selenium']['LGW']#抓取条数print('已抓取',s