XXX公众号题库爬取程序

#爬取题库
#导入模块
from appium import webdriver
from appium.webdriver.common.appiumby import By
from appium.webdriver.common.appiumby import AppiumBy
from appium.webdriver.common.touch_action import TouchAction
from time import sleep
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
import xlsxwriter
import requests
import os
from tkinter import *
import tkinter.ttk
import random
# ['NATIVE_APP', 'WEBVIEW_com.tencent.mm:tools']
#开始函数####################################################################################################################################
def start():
    path = path_e.get()
    num = num_e.get()
    start_write(path, num)
#开始写入####################################################################################################################################
def start_write(path, num):
    #设备信息
    desired_caps = {
        "platformName": "Android",
        "deviceName": "V0G-AL00",
        "platformVersion": "9.0.0",
        "appPackage": "com.tencent.mm",
        "appActivity": "com.tencent.mm.ui.LauncherUI",
        "automationName":"Uiautomator2",
        "noReset": True,
        "unicodeKeyboard": True,
        "resetKeyboard": True
    }
    #新建图片文件夹
    paths = os.path.join(path, 'pic')
    if os.path.exists(paths):
        os.remove(paths)
    else:
        os.mkdir(paths)
    #连接设备
    driver = webdriver.Remote('http://127.0.0.1:4723/wd/hub', desired_caps)
    driver.implicitly_wait(15)
    #点击进入公众号
    gzh_text = 'new UiSelector().text("公众号名称")'
    driver.find_element(By.ANDROID_UIAUTOMATOR, gzh_text).click()
    sleep(10)
    cards = int(num)
    write_card(cards, driver, path)
    sleep(1)
    driver.quit()
#卡片写入####################################################################################################################################
def write_card(cards, driver, path):
    for card in range(1, cards+1):
        card_name = enter_card(card, driver)
        #新建Excel文件
        wb = xlsxwriter.Workbook(os.path.join(path, '{}.xlsx'.format(card_name)))
        now = 0
        all_choose(wb, driver, path, card_name)
        now = now + 1
        rate_refresh(now, cards)
        #
        enter_card(card, driver)
        all_judge(wb, driver)
        now = now + 1
        rate_refresh(now, cards)
        #
        enter_card(card, driver)
        some_choose(wb, driver, path, card_name)
        now = now + 1
        rate_refresh(now, cards)
        #
        enter_card(card, driver)
        some_judge(wb, driver)
        now = now + 1
        rate_refresh(now, cards)
        wb.close()
#全部题型 选择题写入#########################################################################################################################
def all_choose(workbook, driver, path, card_name):
    #进入顺序看题
    sxkt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-label')))
    sxkt.click()
    #查看数量
    cksl = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[1]/uni-label[2]')))
    xzsl = int(cksl.text[1:-1])
    #点击开始看题
    kskt_x = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[2]')))
    kskt_x.click()
    sleep(5)
    #点击题本调整题序
    djtb = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]')))
    djtb.click()
    sleep(3)
    #序号点击
    tbxh = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]/uni-scroll-view/div/div/div/uni-view/uni-view[1]')))
    tbxh.click()
    sleep(3)
    #回车一步,再进去
    driver.press_keycode(4)
    sleep(5)
    #再次进入
    kskt_x = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[2]')))
    kskt_x.click()
    sleep(5)
    #新建工作表
    sheet = workbook.add_worksheet("选择题_全部题型")
    qbt = "全部题型"
    write_choose(sheet, xzsl, driver, path, card_name, qbt)

#全部题型 判断题写入#########################################################################################################################
def all_judge(workbook, driver):
    #进入顺序看题
    sxkt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-label')))
    sxkt.click()
    #查看数量
    cksl = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[1]/uni-label[2]')))
    pdsl = int(cksl.text[1:-1])
    #点击开始看题
    kskt_p = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[2]')))
    kskt_p.click()
    sleep(5)
    #点击题本调整题序
    djtb = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]')))
    djtb.click()
    sleep(3)
    #序号点击
    tbxh = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]/uni-scroll-view/div/div/div/uni-view/uni-view[1]')))
    tbxh.click()
    sleep(3)
    #回车一步,再进去
    driver.press_keycode(4)
    sleep(5)
    kskt_p = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[2]')))
    kskt_p.click()
    sleep(5)
    #开始录入题 新建工作表
    sheet = workbook.add_worksheet("判断题_全部题型")
    write_judge(sheet, pdsl, driver)
#精选题型 选择题写入#########################################################################################################################
def some_choose(workbook, driver, path, card_name):
    #进入顺序看题
    sxkt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-label')))
    sxkt.click()
    sleep(3)
    #切换为精选题型
    jxtx = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[1]/uni-view[2]/uni-view/uni-text/span')))
    qbt = jxtx.text
    jxtx.click()
    sleep(1)
    #查看数量
    cksl = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[1]/uni-label[3]')))
    xzsl = int(cksl.text[1:-1])
    #点击开始看题
    kskt_x = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[2]')))
    kskt_x.click()
    sleep(5)
    #点击题本调整题序
    djtb = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]')))
    djtb.click()
    sleep(3)
    #序号点击
    tbxh = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]/uni-scroll-view/div/div/div/uni-view/uni-view[1]')))
    tbxh.click()
    sleep(3)
    #回车一步,再进去
    driver.press_keycode(4)
    sleep(5)
    #再次进入
    kskt_x = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-view[2]')))
    kskt_x.click()
    sleep(5)
    #新建工作表
    sheet = workbook.add_worksheet("选择题_精选题型")
    qbt = "精选题型"
    write_choose(sheet, xzsl, driver, path, card_name, qbt)
#精选题型 判断题写入#########################################################################################################################
def some_judge(workbook, driver):
    #进入顺序看题
    sxkt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-label')))
    sxkt.click()
    sleep(1)
    #切换为精选题型
    jxtx = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[1]/uni-view[2]/uni-view/uni-text/span')))
    qbt = jxtx.text
    jxtx.click()
    sleep(1)
    #查看数量
    cksl = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[1]/uni-label[3]')))
    pdsl = int(cksl.text[1:-1])
    #点击开始看题
    kskt_p = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[2]')))
    kskt_p.click()
    sleep(5)
    #点击题本调整题序
    djtb = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]')))
    djtb.click()
    sleep(3)
    #序号点击
    tbxh = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[3]/uni-view[2]/uni-scroll-view/div/div/div/uni-view/uni-view[1]')))
    tbxh.click()
    sleep(3)
    #回车一步,再进去
    driver.press_keycode(4)
    sleep(5)
    kskt_p = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[3]/uni-view[2]')))
    kskt_p.click()
    sleep(5)
    #开始录入题 新建工作表
    sheet = workbook.add_worksheet("判断题_精选题型")
    write_judge(sheet, pdsl, driver)
#写入选择题####################################################################################################################################
def write_choose(sheet, nums, driver, path, card_name, qbt):
    for row in range(nums): 
        #写入序号
        sheet.write(row, 0, row+1)
        #写入题目
        tm = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[1]/uni-view/uni-view/uni-text/span')))
        sheet.write(row, 1, tm.text)
        #写入解析
        try:
            jx = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[5]/uni-view[2]')
            sheet.write(row, 2, jx.text)
        except NoSuchElementException as e:
            pass
        #写入选项
        try:
            xa = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[1]/uni-view[2]/uni-view/uni-view/uni-text/span')
            xb = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[2]/uni-view[2]/uni-view/uni-view/uni-text/span')
            xc = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[3]/uni-view[2]/uni-view/uni-view/uni-text/span')
            sheet.write(row, 3, "A: "+xa.text)
            sheet.write(row, 4, "B: "+xb.text)
            sheet.write(row, 5, "C: "+xc.text)
        except NoSuchElementException as msg:
            pass
        try: 
            #A
            xa = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[1]/uni-view[2]/uni-view/uni-view/img')
            xa_url = str(xa.get_attribute('src'))
            img_path =  os.path.join(path, 'pic\{}_{}_{}_{}_A.png'.format(card_name, qbt, "选择题", str(row+1)))
            content = requests.get(xa_url).content
            with open(img_path, 'wb')as f:
                f.write(content)
            sheet.insert_image(row, 3, img_path)
            #B
            xb = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[2]/uni-view[2]/uni-view/uni-view/img')
            xb_url = str(xb.get_attribute('src'))
            img_path = os.path.join(path, 'pic\{}_{}_{}_{}_B.png'.format(card_name, qbt, "选择题", str(row+1)))
            content = requests.get(xb_url).content
            with open(img_path, 'wb')as f:
                f.write(content)
            sheet.insert_image(row, 4, img_path)
            #C
            xc = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[3]/uni-view[2]/uni-view/uni-view/img')
            xc_url = str(xc.get_attribute('src'))
            img_path = os.path.join(path, 'pic\{}_{}_{}_{}_C.png'.format(card_name, qbt, "选择题", str(row+1)))
            content = requests.get(xc_url).content
            with open(img_path, 'wb')as f:
                f.write(content)
            sheet.insert_image(row, 5, img_path)
        except:
            pass
        #写入答案
        daan = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[5]/uni-view[1]/uni-text/span')))
        sheet.write(row, 6, "答案: "+daan.text)
        #点击下一题
        xyt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-view[2]')))
        xyt.click()
        sleep(float(random.randint(1,10)/10))
    #退出到公众号页面
    for i in range(4):
        driver.press_keycode(4)
        sleep(5)
    driver.switch_to.context('NATIVE_APP')
    sleep(5)
#写入判断题##################################################################################################################################
def write_judge(sheet, nums, driver):
    for row in range(nums): 
        #写入序号
        sheet.write(row, 0, row+1)
        #写入题目
        tm = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[1]/uni-view/uni-view/uni-text/span')))
        sheet.write(row, 1, tm.text)
        #写入解析
        try:
            jx = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[5]/uni-view[2]')
            sheet.write(row, 2, jx.text)
        except NoSuchElementException as e:
            pass
        #写入选项
        try:
            xa = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[1]/uni-view[2]/uni-view/uni-view/uni-text/span')
            xb = driver.find_element(AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[2]/uni-radio-group/uni-label[2]/uni-view[2]/uni-view/uni-view/uni-text/span')
            sheet.write(row, 3, xa.text)
            sheet.write(row, 4, xb.text)
        except NoSuchElementException as msg:
            pass
        #写入答案
        daan = WebDriverWait(driver, 10).until(EC.presence_of_element_located((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[5]/uni-view[1]/uni-text/span')))
        sheet.write(row, 5, "答案: "+daan.text)
        #点击下一题目
        xyt = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[4]/uni-view[2]')))
        xyt.click()
        sleep(float(random.randint(1,10)/10))
    #退出到公众号页面
    for i in range(4):
        driver.press_keycode(4)
        sleep(5) 
    driver.switch_to.context('NATIVE_APP')
    sleep(5)
#进度刷新####################################################################################################################################
def rate_refresh(now, number):
    pg['value'] = (now/(number * 4)) * 100
    top.update()
    sleep(0.01)
#从公众号进入卡片页进入卡片页################################################################################################################
def enter_card(card, driver):
    #点击进入卡片页
    mn_text = 'new UiSelector().text("模拟考试")'
    driver.find_element(By.ANDROID_UIAUTOMATOR, mn_text).click()
    sleep(20)
    #driver 切换为网页版
    driver.switch_to.context('WEBVIEW_com.tencent.mm:tools')
    sleep(10)
    #进入卡片
    kpmc = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((AppiumBy.XPATH, '/html/body/uni-app/uni-page/uni-page-wrapper/uni-page-body/uni-view/uni-view[2]/uni-view[{}]/uni-view[1]/uni-view[2]/uni-view'.format(card))))
    card_name = kpmc.text
    kpmc.click()
    sleep(3)
    return card_name

top = Tk()
top.title("爬取题库")
top.geometry("500x300")
Label(top, text = "请输入保存路径:").place(x = 50, y = 50, width = 100, height = 30)
Label(top, text = "请输入卡片数量:").place(x = 50, y = 100, width = 100, height = 30)
Label(top, text = "下载显示进度条:").place(x = 50, y = 200, width = 100, height = 30)
path_e = Entry(top)
path_e.place(x = 170, y = 50, width = 280, height = 30)
num_e = Entry(top)
num_e.place(x = 170, y = 100, width = 280, height = 30)
pg = tkinter.ttk.Progressbar(top, length = 200, mode = "determinate", orient = tkinter.HORIZONTAL)
pg.place(x = 170, y = 200, width = 280, height = 30)
pg["maximum"] = 100
pg["value"] = 0
Button(top, text = "开始爬取", command = start).place(x = 200, y = 150, width = 100, height = 30)
top.mainloop()

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值