mitmproxy+appium爬取抖音小视频
目标
自动化爬取抖音APP小视频,随机获取用户,下载五个小视频,结果以用户ID为单位保存至video目录
环境配置
见:
视频下载
dy_mitmdump.py
import urllib.request
import json
import os
path = 'C:\\Users\\only\\Desktop\\video\\'
url_key = '/aweme/v1/aweme/post'
def response(flow):
if url_key in flow.request.url:
data = json.loads(flow.response.text) # 以json方式加载response
u_id = data['aweme_list'][0]['author']['uid'] # 用户ID,不可见
unique_id = data['aweme_list'][0]['author']['unique_id'] # 抖音号
u_name = data['aweme_list'][0]['author']['nickname'] # 昵称
# 以用户ID为目录,判断用户ID,不下载重复文件
user_path = path + u_id
if not os.path.exists(user_path):
os.mkdir(user_path)
# os.makedirs(user_path)
num = 1
for data in data['aweme_list']:
video_name = data['desc'] or data['aweme_id'] # 视频描述或视频ID,作为文件名
video_url = data['video']['play_addr']['url_list'][0] # 视频链接
filename = user_path + '\\' + video_name
urllib.request.urlretrieve(video_url, filename=filename + '.mp4')
print('下载完成:' + filename)
if num < 5:
num += 1
else:
break
Appium自动化测试
dy_appium.py
from appium import webdriver
from appium.webdriver.common.touch_action import TouchAction
from time import sleep
server = 'http://localhost:4723/wd/hub'
desired_caps = {
'platformName': 'Android',
'deviceName': 'OPPO_A53m',
'appPackage': 'com.ss.android.ugc.aweme',
'appActivity': '.main.MainActivity'
}
# 启动回话
driver = webdriver.Remote(server, desired_caps)
print('启动会话完成')
# 关闭提示
sleep(5)
TouchAction(driver).tap(x=360, y=1024).perform() # 关闭提示
sleep(1)
TouchAction(driver).tap(x=374, y=564).perform() # 关闭上滑
# 动力链
while True:
# 点击进入用户主页
sleep(1)
# TouchAction(driver).tap(x=653, y=370).perform() # 有时点进关注页面?
driver.tap([(650, 360)], 500)
sleep(2)
# 返回推荐
try:
driver.find_element_by_id('com.ss.android.ugc.aweme:id/aae')
except Exception:
try:
driver.find_element_by_id('com.ss.android.ugc.aweme:id/dag')
except Exception:
print("关闭直播")
TouchAction(driver).tap(x=374, y=564).perform() # 关闭提示
TouchAction(driver).tap(x=675, y=50).perform() # 关闭直播
else:
print('关闭广告')
TouchAction(driver).tap(x=45, y=103).perform()
else:
print('正常返回')
TouchAction(driver).tap(x=40, y=100).perform()
sleep(1)
# 滑动下一个视频
# TouchAction(driver).press(x=342, y=938).move_to(x=368, y=267).release().perform() # 有时点成长按?
driver.swipe(340, 918, 340, 230)
Result
以此记录一下爬取过程,欢迎大家一起来学习交流。