Appium 爬取微信朋友圈
1、环境
Appium Desktop 1.13.0
android 模拟器 逍遥安卓 sdk 5.1.1
wechat 7.0.4
2、CODE
from appium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import pymongo
import time
import re
desired_caps_xiaoyao = {
"platformName": "Android",
"deviceName": "vivo_X9Plus",
"appPackage": "com.tencent.mm",
"appActivity": ".ui.LauncherUI",
"noReset": 'true'
}
desired_caps_oppo = {
"platformName": "Android",
"deviceName": "PBFM00",
"appPackage": "com.tencent.mm",
"appActivity": ".ui.LauncherUI",
"noReset": 'true'
}
server = 'http://192.168.43.196:4723/wd/hub'
MONGODB_CLIENT='mongodb://192.168.10.3:27017/'
MONGODB_DB='moments'
MONGODB_COLLECTION='moments'
FLICK_START_X = 300
FLICK_START_Y = 300
FLICK_DISTANCE = 300
#微信”发现“按钮”坐标位置相对于屏幕的比率
X=0.60125
Y=0.94765625
class Moment( object ):
def __init__(self ):
self.driver=webdriver.Remote( server , desired_caps_oppo )
self.wait = WebDriverWait( self.driver , 30 )
self.client = pymongo.MongoClient(MONGODB_CLIENT)
self.db = self.client[MONGODB_DB]
self.collection = self.db[MONGODB_COLLECTION]
def login( self ):
login = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/e80')))
login.click()
phone = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/l3')))
phone.send_keys("18980641111")
next_step = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/ay8')))
next_step.click()
passwd = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/l3')))
passwd.send_keys("12345678")
over = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/ay8')))
over.click()
#注意!!登录的过程中可能出现其他按钮 , 需要特殊处理下
try:
# 对“看看手机通讯录里谁在使用微信 ”做特殊处理
hint = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/azz')))
hint.click()
# 对“微信字体可设置大小”做特殊处理
set = self.wait.until(EC.presence_of_element_located((By.ID, 'com.tencent.mm:id/azz')))
set.click()
except :
pass
def enter(self):
find = self.wait.until(EC.presence_of_element_located( (By.ID,'com.tencent.mm:id/rq')))
#find.click() 无法点击, 采用tap() 方式
x=self.driver.get_window_size()['width']
y=self.driver.get_window_size()['height']
self.driver.tap([( x*X,y*Y)] , 1 )
friends_cricle = self.wait.until(EC.presence_of_element_located( (By.ID,'android:id/title')))
friends_cricle.click( )
def craw(self):
while True:
self.driver.swipe(FLICK_START_X , FLICK_START_Y + FLICK_DISTANCE , FLICK_START_X , FLICK_START_Y )
items = self.wait.until(EC.presence_of_all_elements_located( (By.XPATH , "//*[@resource-id='com.tencent.mm:id/emw']") ) )
for item in items:
try:
#昵称
nickname = item.find_element_by_id( 'com.tencent.mm:id/b6e').get_attribute('text')
#正文
content = item.find_element_by_id( 'com.tencent.mm:id/en0').get_attribute('text')
#时间
datetime = item.find_element_by_id( 'com.tencent.mm:id/ehz').get_attribute('text')
date = self.date( datetime)
print( "发表日期:%s(%s) 昵称:%s 正文:%s " %( date , datetime , nickname,content ))
data = {
'nickname':nickname ,
'content':content,
'date':date
}
#self.collection.update({'nickname': nickname, 'content': content}, {'$set': data}, True)
except NoSuchElementException :
pass
def date(self,datetime):
"""
处理时间
:param datetime:原始时间
:return:处理后的时间
"""
if re.match('\d+分钟前',datetime):
minute = re.match('(\d+)' , datetime ).group(1)
datetime = time.strftime('%Y-%m-%d' , time.localtime( time.time() - float( minute) * 60 ) )
if re.match( '\d+小时前' , datetime ):
hour = re.match('(\d+)' , datetime ).group(1)
datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - float(hour) * 60*60))
if re.match( '昨天' , datetime ):
datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - 24*60 * 60))
if re.match('\d+天前' , datetime ) :
day = re.match('(\d+)' , datetime ).group(1)
datetime = time.strftime('%Y-%m-%d', time.localtime(time.time() - float(day) * 24 * 60 * 60))
return datetime
if '__main__' == __name__ :
mt = Moment()
mt.login()
mt.enter()
mt.craw()
3、问题
- 真实设备无法获取发表时间
- wechat 的"发现" 按钮能获取,但是click()方法无法点击, 通过appium查看 clickable = false ,这种只能通过tap()点击特定点实现 。 为适应不同的设备(手机、平板等)或屏幕大小需要预先计算按钮在屏幕中位置
-测试发现,存在记录丢失(昵称和发表信息不同时出现在一页)且信息有大量更新 - 对于真实设备 需要设置noReset = true ,每次启动不在清空数据 。