模拟搜索某个应用名,并爬取应用名和图片下载到本地
也可以从excel表中数据来模拟
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from lxml import etree
import urllib3
import re
import urllib
import urllib.request
import xlrd
wb = xlrd.open_workbook(r'C:/Users/Administrator/PycharmProjects/untitled/gxd/search_logo/search_app_logo.xls')
sh = wb.sheet_by_index(0) # 第一个表
rowName = sh.row_values(0)#读取一行的数据
colName = sh.col_values(4)#读取一列的数据
row = len(colName)#读取行数
col = len(rowName)#读取列数
print(col,row)
for i in range(0,row+1):
if colName[i] == '':
print()
else:
# search_app_name = input(('输入要搜索的应用名:'))
driver = webdriver.Chrome()
driver.get("https://sj.qq.com/myapp/")
sleep(1)
driver.find_element_by_id("J_MainInput").clear()
driver.find_element_by_id("J_MainInput").send_keys(colName[i])
driver.find_element_by_id("J_SearchBtn").click()
sleep(1)
now_url = driver.current_url
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36;'
}
response = requests.get(now_url,headers=headers)
r = response.text
html = etree.HTML(r,etree.HTMLParser())
r1 = driver.find_element_by_xpath('//*[@id="J_SearchDefaultListBox"]/li[1]/div[1]/div[1]/div[1]/a/img').get_attribute('src')
r2 = driver.find_element_by_xpath('//*[@id="J_SearchDefaultListBox"]/li[1]/div[1]/div[2]/div[1]/div[1]/a').text
if r2 == colName[i]:
urllib.request.urlretrieve(r1,'%s.jpg' %r2)
print("搜索结果 ",r2," 存储成功")
elif colName[i] in r2:
print("搜索名字与结果不符,但是包容,当前搜索结果为 ",r2," 要存储吗 Y/N")
p = input('')
if p=='Y':
urllib.request.urlretrieve(r1,'%s.jpg' %r2)
print("搜索结果 ", r2, " 存储成功")
else:
print("请手动搜索",colName[i])
else:
print("搜索名字与结果不符,请手动搜索",colName[i])
driver.close()