appium自动爬取数据

爬取类容:推荐知识点中所有的题目

爬取方式:appium模拟操作获取前端数据

入门级简单实现,针对题目和答案是文字内容的没有提取出来

适用场景;数据不多,参数加密,反爬严格等场景

python 复制代码
from appium import webdriver
import time
import xlwt
# 定义夜神模拟器的 IP 地址和端口号
from selenium.common.exceptions import NoSuchElementException

desired_caps = {
    #移动设备平台
    'platformName': 'Android',
    #平台OS版本号,写整数位即可
    'platformVersion':'7.0.5.8',
    #设备的名称--值可以随便写
    'deviceName': '夜神模拟器',
    #提供被测app的信息-包名,入口信息:
    #adb shell dumpsys window | findstr mCurrentFocus
    'appPackage': 'me.hxyfj.rk',
    'appActivity': 'com.uzmap.pkg.EntranceActivity',
    #确保自动化之后不重置app
    'noReset': True,
    'unicodeKeyboard': True,
    'resetKeyboard': True,
    #设置session的超时时间,单位秒,默认60s
    'newConnabdTineout':6000,
}

# 连接到夜神模拟器
# 初始化driver对象,用于控制手机,启动被测应用
# IP:appium-server所在机器的网络IP地址;port:监听的端口号;path固定/wd/hub
driver = webdriver.Remote('http://localhost:4723/wd/hub', desired_caps)
driver.implicitly_wait(10)
# 获取屏幕宽度
width = driver.get_window_size()['width']
# 获取屏幕高度
height = driver.get_window_size()['height']
anwers = ["A","B","C","D"]
topics = ["序号","题目","A","B","C","D","答案","解析"]

#创建存储的excel
workbook=xlwt.Workbook(encoding='utf-8')

#根据题目类别列表
def get_questions(question_lists,content_name):
    #excel每个类别创建一个sheet
    booksheet=workbook.add_sheet(content_name)
    for i in range(len(topics)):
        booksheet.write(0,i,topics[i])
    for i in range(len(question_lists)):#len(question_lists)
        #题目处理----题目有2种方式
        question_singles = ''
        myanwers = ""
        try:
            question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[1]/android.widget.TextView")
            for question_single in question_texts:
                question_singles = question_singles+question_single.get_attribute("text")
            if question_singles.strip() == '':
                raise NoSuchElementException('NoSuchElementException')
            # print("try"+str(i)+question_singles)
        except NoSuchElementException as e:
            question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.widget.TextView")
            for question_single in question_texts:
                question_singles = question_singles+question_single.get_attribute("text")
            # print("except"+str(i)+question_singles)
        # print(i)
        # print(question_singles)
        booksheet.write(i+1,0,i+1)
        booksheet.write(i+1,1,question_singles)
        #答案数据处理
        for j in range(len(anwers)):
            question_anw = driver.find_element_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.Image").text
            question_anwts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.TextView")
            question_anwall = ''
            for question_anwt in question_anwts:
                question_anwall = question_anwall+question_anwt.get_attribute("text")
            # print(anwers[j])
            # print(question_anwall)
            if question_anw == "ic_exam_answer_true":
                # print("答案:"+anwers[j])
                myanwers = anwers[j]
            #写入A,B,C,D四个选项
            booksheet.write(i+1,j+2,question_anwall)
        #写入答案
        booksheet.write(i+1,6,myanwers)
    workbook.save('choice_question.xls')
# driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[3]").click()
# time.sleep(2)

#外层科目类别
content_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']")
for i in range(len(content_lists)):
    time.sleep(1)
    content_name = driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]/preceding-sibling::android.view.View[1]/android.view.View[2]").text
    driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]").click()
    time.sleep(1)
    driver.find_element_by_xpath("//android.view.View[@resource-id='mode']/android.view.View[2]").click()
    time.sleep(1)
    question_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View")
    get_questions(question_lists,content_name)
    driver.keyevent(4)
    time.sleep(1)
    #向下滑动,显示出底下的类别
    if i == 4:
        driver.swipe(width * 0.5, height * 0.9, width * 0.5, height * 0.1, 1000)
    time.sleep(1)




# 关闭夜神模拟器
driver.quit()

结果:

相关推荐
Abadbeginning2 分钟前
FastSoyAdmin centos7云服务器+宝塔部署
vue.js·后端·python
kida_yuan21 分钟前
【从零开始】13. 数据增强(Data Augmentation)
数据结构·python·nlp
蜀中廖化42 分钟前
bash:trtexec:command not found
开发语言·bash
封奚泽优1 小时前
班级互动小程序(Python)
python·deepseek
堕落年代1 小时前
小红书JS SDK签名过程
开发语言·javascript·ecmascript
MediaTea1 小时前
Python:math 库函数手册(双曲函数)
开发语言·python
€8111 小时前
Java入门级教程16——JUC的安全并发包机制
java·开发语言·juc的安全并发包机制·栅栏机制·闭锁机制·信号量机制·无锁机制
枫叶V1 小时前
用 FastAPI 实现大文件分片上传与断点续传(含可运行示例与客户端脚本,仅供参考)
python·fastapi
神仙别闹1 小时前
基于 Python + redis + flask 的在线聊天室
redis·python·flask