环境搭建
selenium-3.8.1+python2.7+chromedriver
具体的搭建方式请百度
媒体基础信息爬取实例
app-spider.py
# coding: UTF-8
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')
driver = webdriver.Chrome()
def getAppName(key):
driver.get("https://www.qimai.cn/")
driver.set_window_size(1000,1000)
attrible = driver.find_element_by_class_name("dropdown-box")
ActionChains(driver).move_to_element(attrible).perform()
time.sleep(1)
# left_click = driver.find_element_by_xpath("//i[@class='iconfont icon-anzhuo']/..")
# left_click = driver.find_element_by_xpath("//i[@class='iconfont icon-ios']/../../li[1]")
if key.isdigit() :
left_click = driver.find_element_by_xpath("//i[@class='iconfont icon-ios']/../../li[1]")
else :
left_click = driver.find_element_by_xpath("//i[@class='iconfont icon-anzhuo']/..")
ActionChains(driver).click(left_click).perform()
item_inp = driver.find_element_by_xpath("//div[@class='search-wrap']/div[1]/input[@class='ivu-input']")
item_inp.send_keys(key.decode('utf-8'))
item_inp.send_keys(Keys.RETURN)
time.sleep(3)
cunrtntUrl = driver.current_url
print cunrtntUrl
appname = driver.find_element_by_xpath("//div[@class='appname']").text
return appname
def main():
print '--start--'
# key = '414478124'
# key = 'com.tencent.mm'
# key = 'com.wedobest.xiangqi.mz'
file = open("appid")
os.remove("appname")
fo = open("appname", "a+")
fo.truncate()
while 1:
lines = file.readlines(100000)
if not lines:
break
for line in lines:
appid = line.replace("\n", "")
try:
id_name = appid+","+getAppName(appid)+","
fo.write(id_name+'\n')
except:
continue
print id_name
time.sleep(3)
driver.quit()
file.close()
fo.close()
print '--end--'
if __name__ == "__main__":
main()
参考文章: