# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import os
import time
import urllib2
import random
def write_log(text):
os.system('cls')
print text
log = open('log_generic.txt','a')
log.write(text)
log.write('\n')
log.close()
def read_label_file_en_ch(txt_file):
label_list_en = []
label_list_ch = []
f = open(txt_file, 'r')
for line in f:
line = line.strip()
label_en, label_ch = line.split('<---->')
label_list_en.append( label_en)
label_list_ch.append( label_ch)
f.close()
return label_list_en, label_list_ch
def getImg_main(basePathDir, keyWords_list):
#driver = webdriver.Firefox()
#driver.maximize_window()
dcap = dict(DesiredCapabilities.PHANTOMJS) #设置useragent
dcap['phantomjs.page.settings.userAgent'] = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 ') #根据需要设置具体的浏览器信息
driver = webdriver.PhantomJS(desired_capabilities=dcap) #封装浏览器信息
driver.maximize_window()
#driver = webdriver.PhantomJS()
label_list_en,label_list_ch = read_label_file_en_ch(keyWords_list)
for index in range(len(label_list_en)):
print 'keyWord : ' , index, label_list_en[index]
subjectName = label_list_en[index]
subjectDir = os.path.join(basePathDir, subjectName)
#if not os.path.exists(subjectDir):
# os.makedirs(subjectDir)
cur_url = 'https://play.google.com/store/search?c=apps&q=' + subjectName
getApps(subjectDir, subjectName, cur_url, driver)
#getImg_subject(subjectDir, subjectName, cur_url, driver)
print 'end apps'
driver.close()
def getApps(subjectDir, subjectName, cur_url, driver):
driver.get(cur_url)
pos = 0
countPerSub = 0 # 图片编号
for i in range(6):
print i
js1 = "window.scrollTo(0,document.body.scrollHeight);"
js2 = "document.getElementById('show-more-button').click();"
driver.execute_script(js1)
driver.execute_script(js2)
time.sleep(2)
hrefs = driver.find_elements_by_xpath("//div[@class='details']/a[2]")
print len(hrefs)
c = len(hrefs)
if c==248 :
file_name = 'data_.txt'
fp = open(file_name, 'w')
for element in hrefs:
href = element.get_attribute('href')
href = href.replace('https://play.google.com/store/apps/details?id=','')
fp.write(href+'\n')
#print element.get_attribute('href')
fp.close()
return
if __name__ == '__main__':
keyWords_list ='./keys.txt'
savePath = './apk'
if not os.path.exists(savePath):
os.makedirs(savePath)
getImg_main(savePath, keyWords_list)
利用python+phantomjs+selenium 获取搜索结果
最新推荐文章于 2021-08-16 22:13:23 发布