appium自动爬取数据

爬取类容:推荐知识点中所有的题目

爬取方式:appium模拟操作获取前端数据

入门级简单实现,针对题目和答案是文字内容的没有提取出来

适用场景;数据不多,参数加密,反爬严格等场景

python 复制代码
from appium import webdriver
import time
import xlwt
# 定义夜神模拟器的 IP 地址和端口号
from selenium.common.exceptions import NoSuchElementException

desired_caps = {
    #移动设备平台
    'platformName': 'Android',
    #平台OS版本号,写整数位即可
    'platformVersion':'7.0.5.8',
    #设备的名称--值可以随便写
    'deviceName': '夜神模拟器',
    #提供被测app的信息-包名,入口信息:
    #adb shell dumpsys window | findstr mCurrentFocus
    'appPackage': 'me.hxyfj.rk',
    'appActivity': 'com.uzmap.pkg.EntranceActivity',
    #确保自动化之后不重置app
    'noReset': True,
    'unicodeKeyboard': True,
    'resetKeyboard': True,
    #设置session的超时时间,单位秒,默认60s
    'newConnabdTineout':6000,
}

# 连接到夜神模拟器
# 初始化driver对象,用于控制手机,启动被测应用
# IP:appium-server所在机器的网络IP地址;port:监听的端口号;path固定/wd/hub
driver = webdriver.Remote('http://localhost:4723/wd/hub', desired_caps)
driver.implicitly_wait(10)
# 获取屏幕宽度
width = driver.get_window_size()['width']
# 获取屏幕高度
height = driver.get_window_size()['height']
anwers = ["A","B","C","D"]
topics = ["序号","题目","A","B","C","D","答案","解析"]

#创建存储的excel
workbook=xlwt.Workbook(encoding='utf-8')

#根据题目类别列表
def get_questions(question_lists,content_name):
    #excel每个类别创建一个sheet
    booksheet=workbook.add_sheet(content_name)
    for i in range(len(topics)):
        booksheet.write(0,i,topics[i])
    for i in range(len(question_lists)):#len(question_lists)
        #题目处理----题目有2种方式
        question_singles = ''
        myanwers = ""
        try:
            question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[1]/android.widget.TextView")
            for question_single in question_texts:
                question_singles = question_singles+question_single.get_attribute("text")
            if question_singles.strip() == '':
                raise NoSuchElementException('NoSuchElementException')
            # print("try"+str(i)+question_singles)
        except NoSuchElementException as e:
            question_texts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.widget.TextView")
            for question_single in question_texts:
                question_singles = question_singles+question_single.get_attribute("text")
            # print("except"+str(i)+question_singles)
        # print(i)
        # print(question_singles)
        booksheet.write(i+1,0,i+1)
        booksheet.write(i+1,1,question_singles)
        #答案数据处理
        for j in range(len(anwers)):
            question_anw = driver.find_element_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.Image").text
            question_anwts = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View[@index='"+str(i)+"']/android.view.View[@clickable='true']["+str(j+1)+"]/android.widget.TextView")
            question_anwall = ''
            for question_anwt in question_anwts:
                question_anwall = question_anwall+question_anwt.get_attribute("text")
            # print(anwers[j])
            # print(question_anwall)
            if question_anw == "ic_exam_answer_true":
                # print("答案:"+anwers[j])
                myanwers = anwers[j]
            #写入A,B,C,D四个选项
            booksheet.write(i+1,j+2,question_anwall)
        #写入答案
        booksheet.write(i+1,6,myanwers)
    workbook.save('choice_question.xls')
# driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[3]").click()
# time.sleep(2)

#外层科目类别
content_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']")
for i in range(len(content_lists)):
    time.sleep(1)
    content_name = driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]/preceding-sibling::android.view.View[1]/android.view.View[2]").text
    driver.find_element_by_xpath("//android.view.View[@resource-id='content-list']/android.view.View[@clickable='true']["+str(i+1)+"]").click()
    time.sleep(1)
    driver.find_element_by_xpath("//android.view.View[@resource-id='mode']/android.view.View[2]").click()
    time.sleep(1)
    question_lists = driver.find_elements_by_xpath("//android.view.View[@resource-id='question_list']/android.view.View")
    get_questions(question_lists,content_name)
    driver.keyevent(4)
    time.sleep(1)
    #向下滑动,显示出底下的类别
    if i == 4:
        driver.swipe(width * 0.5, height * 0.9, width * 0.5, height * 0.1, 1000)
    time.sleep(1)




# 关闭夜神模拟器
driver.quit()

结果:

相关推荐
方也_arkling1 小时前
【Java-Day08】static / final / 枚举
java·开发语言
风吹夏回1 小时前
Python 全局异常处理:从“满屏 try-except”到优雅兜底
开发语言·python
Chengbei112 小时前
一站式源码安全检测工具、云安全 / APP / 小程序源码敏感信息递归多层目录扫描AK、JWT、手机号、身份证等敏感信息
java·开发语言·安全·web安全·网络安全·系统安全·安全架构
llz_1122 小时前
web-第一次课后作业
java·开发语言·idea
小熊Coding2 小时前
Python爬取当当网二手图书项目实战!
开发语言·爬虫·python·beautifulsoup·requests·二手图书
秋92 小时前
Java项目运行5天左右自动宕机:系统性定位与解决方案
java·开发语言·python
小江的记录本2 小时前
【JVM虚拟机】垃圾回收GC:垃圾收集器:CMS:核心原理、回收流程、优缺点、废弃原因(附《思维导图》+《面试高频考点清单》)
java·jvm·后端·python·spring·面试·maven
xiaoshuaishuai82 小时前
C# 内存管理与资源泄漏
开发语言·c#
lsx2024063 小时前
SVN 检出操作
开发语言
田里的水稻3 小时前
OE_ubuntu26.04与宿主机之间复制粘贴内容
人工智能·python·机器人