前提是各种usb模式都搞完了,连接时黑屏或者一闪而断开连接,或者运行代码提示 mincap time out之类的东西
解决方法,在airtestide连接按钮时,把其中的javacap勾上
纯代码的话在连接时加上连接参数
cap_method=javacap&touch_method=adb
一个完整的python项目例程,里面用到了大部分poco操作以及连接初始,看一遍自然全都会了
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 电商爬虫.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2021-01-10
from airtest.core.api import *
from airtest.aircv import *
from airtest.core.android.adb import *
from airtest.core.android.android import *
import os
import requests
import base64
from poco.drivers.android.uiautomation import AndroidUiautomationPoco
from html.parser import HTMLParser
from db_connect.sqlite_driver import *
from pprint import pprint
html_parser = HTMLParser()
import datetime
sql3 = init_db('result_datas/美菜网商品.db')
PACKAGE = "com.meicai.mall" # 美菜网包名
INSTALL_PATH = "app/美菜商城.apk"
def re_text(text):
text = text.replace('\xa5','¥')
return html_parser.unescape(text) # html转义字符还原
def ocr_login(api_key='KbfUHNoabG8Slos64ugqnff4',sdk_key='28GQ5PNNnwKAXy0BxoBvEHF5xtZs1Alf'):
host = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={sdk_key}'
response = requests.get(host)
if response:
rep_json = response.json()
access_token = rep_json.get('access_token')
print('登录成功,获得token:',access_token)
return access_token
return None
def setup_function():
# 连接当前设备
# HUAWEI honor 10 分辨率:2280*1080
# OPPO R15 分辨率:2280*1080
# Vivo x21 分辨率:2280*1080
# onePlus 5T 分辨率:2160×1080 poco sevices启动不稳定
# xiaomi mix2 元素无法识别
# onePlus pocoserver无法启动
# device = init_device("Android") # 获取设备号
adb = ADB()
device = Android() # 获取设备号
devicesList = adb.devices() # 获取所有设备列表
print('所有设备列表:',devicesList)
# connect_device("android:///" + devicesList[1][0]) # 切换手机
currentDevice = device.get_default_device()
print("现在连接的测试设备:", currentDevice)
auto_setup(__file__, logdir=True, devices=[
# "android://127.0.0.1:5037/{currentDevice}?cap_method=MINICAP_STREAM&&ori_method=MINICAPORI&&touch_method=MINITOUCH".format(currentDevice=currentDevice),
"android://127.0.0.1:5037/{currentDevice}?cap_method=javacap&touch_method=adb".format(
currentDevice=currentDevice),
])
try:
device.check_app(PACKAGE) # 检测是否安装了指定的apk
print('apk已存在,正在尝试打开...')
except AirtestError: # 安装应用,是否同意覆盖安装,默认否
print('检测到未安装指定的apk,正在为你安装,请耐心等待...')
device.install_app(INSTALL_PATH, False) # 不覆盖安装
print('apk安装完毕')
# clear_app(PACKAGE) # 清除数据
# uninstall(PACKAGE) # 卸载App
# install(INSTALL_PATH) # 安装应用
# stop_app(PACKAGE) # 停止应用
setup_function() # 连接设备
# api_url = "http://127.0.0.1:8000"
api_url = "http://qianyuan.iask.in:9090"
headers = {'content-type': 'application/x-www-form-urlencoded'}
poco = AndroidUiautomationPoco(use_airtest_input=True, screenshot_each_action=False)
height = G.DEVICE.display_info['height']
width = G.DEVICE.display_info['width']
pixes = [height, width] # 分辨率
access_token = ocr_login()
if not access_token:
raise UserWarning('登录失败,程序退出')
def get_nowtime_hs():
dt_ms = datetime.datetime.now().strftime('%Y-%m-%d_%H_%M_%S_%f') # 含微秒的日期时间,来源 比特量化
return dt_ms
def ocr_price(base_jpg, access_token,is_high):
if is_high:
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
else:
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"
# with open('2815.png.png', 'rb') as f:
# img = f.read()
# base_jpg = base64.encodebytes(img)
params = {"image": base_jpg}
access_token = access_token
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers)
if response:
ret = response.json()
if ret.get('error_msg'):
print('百度api接口已达上限,统一返回0')
price = 0
else:
result = ret.get('words_result') or []
if len(result) > 0:
price = ret.get('words_result')[0]['words']
else:
price = '价格获取失败'
return price
return 0
def get_price(price_ui,save_name):
if "/" in save_name:
path = save_name.split('/')
save_path = '/'.join(path[:-1])
os.makedirs(save_path,exist_ok=True)
pos = price_ui.get_position() # 取位置
size = price_ui.get_size() # 取尺寸
pic_loc = price_ui.get_bounds() # 取边缘
pic_loc = [pic_loc[3]*width,pic_loc[0]*height,pic_loc[1]*width,pic_loc[2]*height]
pos_value = [pos[0]*width,pos[1]*height]
size_value = [size[0]*width,size[1]*width]
pic_pos = (pos_value[0] - size_value[0] * 0.5, pos_value[1] - size_value[1] * 0.5, pos_value[0] + size_value[0] * 0.5, pos_value[1] + size_value[1] * 0.5)
screen = G.DEVICE.snapshot()
local_screen = aircv.crop_image(screen, pic_loc)
# local_screen = aircv.crop_image(screen, pic_pos)
pil_image = cv2_2_pil(local_screen)
pil_image.save(f"{save_name}", quality=99, optimize=True) # 读取截图并识别截图中的文字
with open(f'{save_name}', 'rb') as f:
file = f.read()
base_jpg = base64.b64encode(file)
encode_jpg = base_jpg.decode()
print('开始识别价格图片:', encode_jpg[-20:-12])
price_unit = ocr_price(base_jpg,access_token,False) # 调百度云接口识别文字
return encode_jpg,price_unit
def get_category(first=0,second=0,wait_time=3):
first_cate = poco(name='com.meicai.mall:id/ll_container').child('com.meicai.mall:id/horizontalScrollView').child() # 获取所有大类别。用的时候取第一个
first_cate[first].click()
first_cate_text = first_cate[first].get_text()
first_cate.wait_for_appearance(wait_time)
second_cate = poco(name='com.meicai.mall:id/lv_second_category').offspring('com.meicai.mall:id/cate_tv')
second_cate_text = second_cate[second].get_text()
second_cate[second].click()
second_cate.wait_for_appearance(wait_time)
return first_cate_text,second_cate_text
def swip_ui(ui_ret):
ui_pos = ui_ret.get_position()
ui_size = ui_ret.get_size()
print(ui_pos, ui_size)
# pos_detla = ui_pos[1] - 0.27395833333333336 + ui_size[1]*0.5
pos_detla = ui_size[1]
print('需要移动:', pos_detla)
# ret.swipe('up')
ui_ret.swipe([0, -pos_detla])
ui_ret.wait(3)
global index
print(f"下次从{index}开始爬.本节点爬取失败:{ui_ret.get_name()}")
def swip_up_one(ui_ret):
ui_pos = ui_ret.get_position()
ui_size = ui_ret.get_size()
ui_ret.swipe([0, -ui_size[1]])
time.sleep(1)
def swip_up2(ui_ret):
ui_size = ui_ret.get_size()
ui_ret.swipe([0, -2*ui_size[1]])
time.sleep(1)
def swip_down_one(ui_ret):
ui_size = ui_ret.get_size()
ui_ret.swipe([0, ui_size[1]])
time.sleep(2) # 下拉刷新
def get_now_page(cate_name):
global index,count,size_h
ret_list = poco(name='com.meicai.mall:id/lv_goods_list').offspring('android:id/list').offspring(name='com.meicai.mall:id/ll_container')
is_goods = ret_list[0].offspring('com.meicai.mall:id/tv_goods_name').exists()
if len(ret_list) > 2: # 必须三个以上
if not is_goods:
ret_list = [ret_list[1], ret_list[2]]
else:
ret_list = [ret_list[0], ret_list[1]]
for ret in ret_list:
try:
is_goods = ret.offspring('com.meicai.mall:id/tv_goods_name').exists()
if not is_goods:
print(f"跳过非商品的节点:{ret}")
continue
not_onsale = ret.offspring('com.meicai.mall:id/tv_goods_ssu_price').exists()
name = ret.offspring('com.meicai.mall:id/tv_goods_name') # 名称
ggs = ret.offspring("com.meicai.mall:id/specLabels").children() # 规格复数
ggs_texts = [gg.child().get_text() for gg in ggs] # 规格文本
guage = ret.offspring("com.meicai.mall:id/tv_goods_multi_gauge") # 瓜葛
guage = guage if guage.exists() else ret.offspring('com.meicai.mall:id/tv_goods_ssu_unitprice')
guage_text = guage.get_text() if guage.exists() else "" # 瓜葛文本
guage_text = re_text(guage_text)
price = ret.offspring('com.meicai.mall:id/tv_goods_price') if not not_onsale else ret.offspring('com.meicai.mall:id/tv_goods_ssu_price') # 价格图片
price_text = get_price(price, f'images/{get_nowtime_hs()}.png') # 价格文字 图片编码,文字
discount = ret.offspring('com.meicai.mall:id/tvDiscountsPrices') # 折扣
discount_text = re_text(discount.get_text()) if discount.exists() else "" # 折扣文本
# 销售信息 自营 券满减
if not_onsale:
sale_info = ret.offspring('com.meicai.mall:id/rl_price_container').child('com.meicai.mall:id/ll_goods_promote_tag').child('android.widget.LinearLayout')
else:
sale_info = ret.offspring('com.meicai.mall:id/ll_price_container').child('com.meicai.mall:id/ll_goods_promote_tag').child('android.widget.LinearLayout')
sale_texts = [sale.child().get_text() for sale in sale_info] # 销售信息文本
goods_info = {
"seq": f'{index}',
'cate_name':cate_name,
"name": name.get_text(),
"gg": '&'.join(ggs_texts),
'guage': guage_text,
'discount': discount_text,
"price": price_text[1],
'sale_info': '&'.join(sale_texts),
}
sql3.save(goods_info=goods_info) # 保存到数据库
index += 1
print("-----------一条完整商品信息为--------------")
print(goods_info) # 打印构造的信息
# req = requests.post(api_url, data={"mc": goods_info['name'], "gg": goods_info['gg'], "tp": goods_info['price'][1]},headers=headers)
# print(req.text)
except Exception as e:
name = ret.offspring('com.meicai.mall:id/tv_goods_name')
if name.exists():
print(name.get_text(),f"爬取失败,开始向上滑动\n{e}")
else:
print(ret, f"爬取失败,开始向上滑动\n{e}")
swip_up_one(ret)
swip_up2(ret_list[-1]) # 按列表中最后一个节点向上滑动两格
def main(number=300):
global index,count
index = count = 1
# first, second = get_category()
# cate_name = f'{first}/{second}'
cate_name = f'未定义'
print(f'正在爬取 {cate_name} 下所有商品...')
ui_root = poco(name='com.meicai.mall:id/lv_goods_list').offspring('android:id/list').offspring(
name='com.meicai.mall:id/ll_container')
global size_h # 全局变量每个菜看板高度
size_h = ui_root.get_size()[1]
# swip_down_one(ui_root) # 下拉刷新
num = 0
while num < number:
get_now_page(cate_name) # 获取这类下的当前页商品
num +=1
try:
start_app(PACKAGE) # 打开美菜网
poco('com.meicai.mall:id/tabLayout').wait_for_appearance(5) # 等待出现首页下菜单条
is_goods_home = poco('androidx.appcompat.app.ActionBar$Tab')[1].attr('selected') # 全部商品是否被选中
if not is_goods_home:
poco('androidx.appcompat.app.ActionBar$Tab')[1].click() # 没选择的话就点击全部商品
except:
raise AttributeError('美菜网 app打开失败,请检查并手动登录apk')
main()