基于uiautomation的自动化流程RPA开源开发演示

本文介绍了一个基于uiautomation的RPA开源开发框架，包含三大核心模块：

1、捕获控件模块，通过增强型控件捕捉工具实现可视化元素定位和属性提取；

2、运行代码模块，提供Python代码编辑器与执行环境，支持语法高亮和自动补全；

3、核心解析模块，通过JSON配置文件实现UI自动化操作。

该框架支持浏览器自动化、控件交互（点击/输入）、屏幕截图等功能，采用分层架构设计，各模块通过标准接口通信。关键技术包括Windows UI自动化、控件树解析、坐标定位和异常处理机制，适用于构建Windows桌面应用的自动化。

RPA一共分为3大模块：

1、捕获控件模块

2、运行代码模块

3、核心解析模块

代码如下：

复制代码

# 调用RPA
from RPA import RPA
rpa = RPA()

import time
import sys
import os
# 添加目前的环境为外部库调用地址
sys.path.append(os.getcwd())

# 登录网页
rpa.app.chrome(url="xxxx")

# 输入手机号
rpa.get("账号").input(input_text="11111") 
time.sleep(1)
# 输入密码
rpa.get("密码").input(input_text="666666") 
time.sleep(1)
# 登录
rpa.get("登录").click()

python 复制代码

# 调用RPA
from RPA import RPA
rpa = RPA()

import time
import sys
import os
# 添加目前的环境为外部库调用地址
sys.path.append(os.getcwd())

# 登录网页
rpa.app.chrome(url="xxxx")

# 输入手机号
rpa.get("账号").input(input_text="11111") 
time.sleep(1)
# 输入密码
rpa.get("密码").input(input_text="666666") 
time.sleep(1)
# 登录
rpa.get("登录").click()

捕捉到的控件：

控件器捕获：

复制代码

{
  "基本信息": {
    "控件类型": "PaneControl",
    "是否Chrome窗口": "否",
    "是否可见": "是",
    "深度": 5,
    "系统名称": "",
    "自动化ID": "",
    "子节点数量": 0,
    "捕获时间": "2025-08-15 09:32:48",
    "用户名称": "123",
    "功能描述": "",
    "备注信息": ""
  },
  "所属顶层窗口信息": {
    "窗口标题": "控件捕捉器",
    "窗口句柄": "0x51022",
    "窗口类名": "TkTopLevel",
    "窗口位置": [
      558,
      297
    ],
    "窗口尺寸": [
      1016,
      739
    ],
    "自动化ID": ""
  },
  "控件树结构": [
    {
      "id": 1755221568652,
      "parent_id": null,
      "depth": 5,
      "type": "PaneControl",
      "name": "",
      "handle": "N/A",
      "automation_id": "",
      "position": [
        576,
        338
      ],
      "size": [
        578,
        27
      ],
      "visible": "是",
      "enabled": "是",
      "is_target": true,
      "is_chrome_window": "否"
    }
  ]
}

网页百度搜索框获取：

复制代码

{
  "基本信息": {
    "控件类型": "EditControl",
    "是否Chrome窗口": "是",
    "是否可见": "是",
    "深度": 22,
    "系统名称": "某大厂CEO误喝隔夜水险丧命",
    "自动化ID": "chat-textarea",
    "子节点数量": 0,
    "捕获时间": "2025-08-15 09:35:17",
    "用户名称": "baidu",
    "功能描述": "",
    "备注信息": ""
  },
  "所属顶层窗口信息": {
    "窗口标题": "百度一下，你就知道 - Google Chrome",
    "窗口句柄": "0xe0fb4",
    "窗口类名": "Chrome_WidgetWin_1",
    "窗口位置": [
      100,
      163
    ],
    "窗口尺寸": [
      1567,
      727
    ],
    "自动化ID": ""
  },
  "控件树结构": [
    {
      "id": 1755221717257,
      "parent_id": null,
      "depth": 22,
      "type": "EditControl",
      "name": "某大厂CEO误喝隔夜水险丧命",
      "handle": "N/A",
      "automation_id": "chat-textarea",
      "position": [
        493,
        479
      ],
      "size": [
        771,
        26
      ],
      "visible": "是",
      "enabled": "是",
      "is_target": true,
      "is_chrome_window": "是"
    }
  ]
}

1、捕获控件模块：

python 复制代码

import uiautomation as auto
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import win32gui
import win32con
import json
import keyboard
from datetime import datetime
import os
import sys
import time


class EnhancedControlCaptureTool:
    def __init__(self, root):
        self.root = root
        self.root.title("控件捕捉器")
        self.root.geometry("1000x700")
        self.root.resizable(True, True)

        # 初始化变量
        self.capture_active = False
        self.target_control = None
        self.save_dir = ""
        self.highlight_hwnd = None
        self.root_hwnd = self.root.winfo_id()  # 主窗口句柄（用于排除自身）
        self.last_highlighted = None  # 记录上一个高亮控件
        self.highlight_update_delay = 0.05  # 高亮更新延迟(秒)
        self.last_highlight_time = 0  # 上次高亮时间
        self.last_control_info = None  # 记录上一个控件信息，避免重复处理
        self.highlight_selected_active = False  # 选中控件高亮状态

        # 窗口类型标记
        self.is_chrome_window = False
        self.is_chrome_render_window = False

        self.init_highlight_window()
        self.create_widgets()
        self.bind_hotkeys()
        self.root.after(100, self.track_mouse)

    def init_highlight_window(self):
        self.highlight_window = tk.Toplevel(self.root)
        self.highlight_window.overrideredirect(True)
        self.highlight_window.attributes("-topmost", True)
        self.highlight_window.attributes("-transparentcolor", "white")
        self.highlight_window.withdraw()

        self.highlight_canvas = tk.Canvas(self.highlight_window, bg="white", highlightthickness=0)
        self.highlight_canvas.pack(fill=tk.BOTH, expand=True)

        hwnd = self.highlight_window.winfo_id()
        self.highlight_hwnd = hwnd
        ex_style = win32gui.GetWindowLong(hwnd, win32con.GWL_EXSTYLE)
        ex_style |= win32con.WS_EX_TRANSPARENT
        win32gui.SetWindowLong(hwnd, win32con.GWL_EXSTYLE, ex_style)

    def create_widgets(self):
        main_frame = ttk.Frame(self.root, padding=5)
        main_frame.pack(fill=tk.BOTH, expand=True)

        # 左侧面板
        left_frame = ttk.Frame(main_frame)
        left_frame.pack(side=tk.LEFT, fill=tk.Y, padx=(0, 5), expand=False)

        self.capture_btn = ttk.Button(
            left_frame, text="开始捕捉 (Ctrl)", command=self.toggle_capture
        )
        self.capture_btn.pack(fill=tk.X, pady=5, padx=5)

        self.highlight_selected_btn = ttk.Button(
            left_frame, text="高亮选中控件", command=self.toggle_highlight_selected
        )
        self.highlight_selected_btn.pack(fill=tk.X, pady=5, padx=5)

        input_frame = ttk.LabelFrame(left_frame, text="控件信息", padding=5)
        input_frame.pack(fill=tk.X, pady=5, padx=5)

        ttk.Label(input_frame, text="名称:").pack(anchor=tk.W, pady=(2, 1))
        self.control_name_var = tk.StringVar()
        ttk.Entry(input_frame, textvariable=self.control_name_var).pack(fill=tk.X, pady=1)

        ttk.Label(input_frame, text="描述:").pack(anchor=tk.W, pady=(2, 1))
        self.control_desc_var = tk.StringVar()
        ttk.Entry(input_frame, textvariable=self.control_desc_var).pack(fill=tk.X, pady=1)

        ttk.Label(input_frame, text="备注:").pack(anchor=tk.W, pady=(2, 1))
        self.control_note_text = tk.Text(input_frame, height=3, wrap=tk.WORD)
        self.control_note_text.pack(fill=tk.X, pady=1)

        ttk.Button(
            input_frame, text="保存控件", command=self.save_captured_control
        ).pack(fill=tk.X, pady=5)

        list_frame = ttk.LabelFrame(left_frame, text="已保存控件", padding=5)
        list_frame.pack(fill=tk.BOTH, expand=True, pady=5, padx=5)

        self.saved_controls_list = tk.Listbox(list_frame)
        self.saved_controls_list.pack(fill=tk.BOTH, expand=True)
        self.saved_controls_list.bind("<<ListboxSelect>>", self.on_saved_control_select)

        # 右侧面板
        right_frame = ttk.Frame(main_frame)
        right_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)

        ttk.Label(right_frame, text="控件属性信息:").pack(anchor=tk.W, pady=(5, 2))
        columns = ("property", "value")
        self.info_tree = ttk.Treeview(right_frame, columns=columns, show="headings", height=15)
        self.info_tree.heading("property", text="属性")
        self.info_tree.heading("value", text="值")
        self.info_tree.column("property", width=120, anchor=tk.W)
        self.info_tree.column("value", width=400, anchor=tk.W)
        self.info_tree.pack(fill=tk.BOTH, expand=True, pady=5)

        ttk.Label(right_frame, text="实时预览:").pack(anchor=tk.W, pady=(5, 2))
        preview_frame = ttk.Frame(right_frame, relief=tk.SUNKEN, borderwidth=1)
        preview_frame.pack(fill=tk.BOTH, expand=True, pady=5)

        self.preview_label = ttk.Label(preview_frame, text="请开启捕捉模式选择控件", padding=5)
        self.preview_label.pack(fill=tk.BOTH, expand=True)

        self.status_var = tk.StringVar()
        self.status_var.set("就绪 - 按Ctrl键开始/停止捕捉")
        status_bar = ttk.Label(self.root, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
        status_bar.pack(side=tk.BOTTOM, fill=tk.X)

    def bind_hotkeys(self):
        keyboard.add_hotkey('ctrl', self.toggle_capture)

    def toggle_capture(self):
        self.capture_active = not self.capture_active
        if self.capture_active:
            self.capture_btn.config(text="停止捕捉 (Ctrl)")
            self.status_var.set("捕捉中 - 移动鼠标到目标控件上")
        else:
            self.capture_btn.config(text="开始捕捉 (Ctrl)")
            self.status_var.set("就绪 - 按Ctrl键开始捕捉")
            if not self.highlight_selected_active:
                self.clear_highlight()
            if self.target_control:
                self.display_control_info(self.target_control)

    def toggle_highlight_selected(self):
        self.highlight_selected_active = not self.highlight_selected_active
        if self.highlight_selected_active:
            self.highlight_selected_btn.config(text="取消高亮选中控件")
            if self.target_control:
                self.highlight_control(self.target_control)
                self.status_var.set("已高亮选中控件 - 点击按钮取消高亮")
            else:
                self.status_var.set("没有选中的控件可高亮")
        else:
            self.highlight_selected_btn.config(text="高亮选中控件")
            self.clear_highlight()
            self.status_var.set("已取消高亮 - 按Ctrl键开始捕捉")

    def track_mouse(self):
        current_time = time.time()
        if self.capture_active:
            try:
                x, y = win32gui.GetCursorPos()

                update_highlight = current_time - self.last_highlight_time > self.highlight_update_delay

                if update_highlight:
                    self.highlight_window.withdraw()
                    # 从坐标获取控件（使用UIAutomation的默认方式）
                    raw_control = auto.ControlFromPoint(x, y)
                    # 获取最深层可见子控件
                    deep_control = self.get_deepest_child_control(raw_control, x, y,
                                                                  max_depth=20) if raw_control else None

                    # 窗口类型识别
                    self.check_window_type(raw_control)
                    # 选择可见的最佳控件（不使用优先级，只看可见性和深度）
                    control = self.select_best_control(raw_control, deep_control, x, y)

                    self.highlight_window.deiconify()

                    if control:
                        top_level = control.GetTopLevelControl()
                        # 排除自身窗口
                        if top_level.NativeWindowHandle != self.root_hwnd and control.NativeWindowHandle != self.highlight_hwnd:
                            control_id = (control.NativeWindowHandle, control.ControlTypeName, control.Name)
                            if control_id != self.last_control_info:
                                self.last_control_info = control_id
                                self.target_control = control
                                if not self.highlight_selected_active:
                                    self.highlight_control(control)
                                self.update_preview(control)
                                self.last_highlight_time = current_time
            except Exception as e:
                self.status_var.set(f"错误: {str(e)}")
        elif self.highlight_selected_active and self.target_control:
            self.highlight_control(self.target_control)
        self.root.after(100, self.track_mouse)

    # 窗口类型识别（基础识别，不影响优先级）
    def check_window_type(self, control):
        self.is_chrome_window = False
        self.is_chrome_render_window = False
        if not control:
            return

        try:
            top_level = control.GetTopLevelControl()
            window_title = top_level.Name
            hwnd = top_level.NativeWindowHandle
            class_name = win32gui.GetClassName(hwnd)

            self.is_chrome_window = "chrome" in class_name.lower() or "chrome" in window_title.lower()
            self.is_chrome_render_window = class_name == "Chrome_RenderWidgetHostHWND"
        except:
            self.is_chrome_window = False
            self.is_chrome_render_window = False

    # 选择最佳控件：优先选择可见的深层控件（不使用优先级）
    def select_best_control(self, control1, control2, x, y):
        candidates = []
        # 收集有效候选控件（必须可见）
        if control1 and self.is_valid_control(control1, x, y) and self.is_visible_control(control1):
            candidates.append(control1)
        if control2 and self.is_valid_control(control2, x, y) and self.is_visible_control(
                control2) and control2 != control1:
            candidates.append(control2)

        # 补充更多子控件（增加捕获范围）
        if control1:
            try:
                deeper_controls = self.get_deeper_controls(control1, x, y, max_depth=15)
                candidates.extend(deeper_controls)
            except:
                pass

        if not candidates:
            return None

        # 去重
        unique_candidates = []
        seen = set()
        for c in candidates:
            try:
                c_id = (c.NativeWindowHandle, c.ControlTypeName, c.Name)
                if c_id not in seen:
                    seen.add(c_id)
                    unique_candidates.append(c)
            except:
                continue

        # 按深度排序（更深层的控件优先）
        unique_candidates.sort(key=lambda c: self.get_control_depth(c), reverse=True)
        return unique_candidates[0] if unique_candidates else None

    # 获取控件深度（用于排序）
    def get_control_depth(self, control, current_depth=0):
        try:
            parent = control.GetParentControl()
            if parent and parent != control:
                return self.get_control_depth(parent, current_depth + 1)
        except:
            pass
        return current_depth

    # 获取更深层控件（增加深度，确保捕获更多层级）
    def get_deeper_controls(self, parent, x, y, max_depth=15):
        controls = []
        if max_depth <= 0 or not parent:
            return controls
        try:
            # 获取所有子控件（包括不可见的，后续会过滤）
            children = parent.GetChildren()
            for child in children:
                if self.is_valid_control(child, x, y):
                    # 先添加当前子控件
                    controls.append(child)
                    # 递归获取更深层
                    deeper = self.get_deeper_controls(child, x, y, max_depth - 1)
                    controls.extend(deeper)
        except:
            pass
        return controls

    # 检查控件是否可见（强化可见性判断）
    def is_visible_control(self, control):
        try:
            # 检查控件自身可见性
            if not getattr(control, 'IsVisible', True):
                return False
            # 检查父控件可见性
            parent = control.GetParentControl()
            if parent and not getattr(parent, 'IsVisible', True):
                return False
            # 检查边界是否在屏幕内
            rect = control.BoundingRectangle
            screen_rect = win32gui.GetWindowRect(win32gui.GetDesktopWindow())
            return (rect.left < screen_rect[2] and rect.right > screen_rect[0] and
                    rect.top < screen_rect[3] and rect.bottom > screen_rect[1])
        except:
            return False

    # 检查控件是否包含坐标
    def is_valid_control(self, control, x, y):
        try:
            rect = control.BoundingRectangle
            return rect.left <= x <= rect.right and rect.top <= y <= rect.bottom
        except:
            return False

    # 获取最深层子控件（不限制类型，只看可见性）
    def get_deepest_child_control(self, parent, x, y, max_depth=20):
        try:
            if not parent or max_depth <= 0:
                return parent

            best_child = None
            best_depth = -1

            # 遍历所有子控件
            for child in parent.GetChildren():
                if self.is_valid_control(child, x, y):
                    # 递归获取更深层
                    deeper_child = self.get_deepest_child_control(child, x, y, max_depth - 1)
                    current_child = deeper_child if deeper_child else child

                    # 计算当前深度
                    current_depth = self.get_control_depth(current_child)

                    # 优先选择可见且深度更深的
                    if self.is_visible_control(current_child) and current_depth > best_depth:
                        best_depth = current_depth
                        best_child = current_child

            return best_child if best_child else parent
        except:
            return parent

    def highlight_control(self, control):
        try:
            if self.last_highlighted == control:
                return

            self.last_highlighted = control
            rect = control.BoundingRectangle
            # 扩大高亮范围1px，便于观察
            x, y = rect.left - 1, rect.top - 1
            width = rect.right - rect.left + 2
            height = rect.bottom - rect.top + 2
            self.highlight_window.geometry(f"{width}x{height}+{x}+{y}")
            self.highlight_canvas.delete("all")

            # 统一高亮样式（红色虚线框）
            self.highlight_canvas.create_rectangle(
                0, 0, width, height, outline="#FF0000", width=2, dash=(4, 2), fill="white"
            )
            self.highlight_window.deiconify()
        except:
            pass

    def clear_highlight(self):
        if self.highlight_window.winfo_ismapped():
            self.highlight_window.withdraw()
        self.last_highlighted = None
        self.last_control_info = None

    def update_preview(self, control):
        try:
            rect = control.BoundingRectangle
            visibility = "可见" if self.is_visible_control(control) else "不可见"
            depth = self.get_control_depth(control)
            # 窗口类型标记
            chrome_note = "（Chrome窗口）" if self.is_chrome_window else ""
            self.preview_label.config(text=(
                f"类型: {control.ControlTypeName}\n"
                f"{chrome_note}\n"
                f"位置: ({rect.left}, {rect.top})\n"
                f"大小: {(rect.right - rect.left)}x{(rect.bottom - rect.top)}\n"
                f"名称: {control.Name}\n"
                f"可见性: {visibility}\n"
                f"深度: {depth}"
            ))
        except:
            self.preview_label.config(text="无法获取控件信息")

    def display_control_info(self, control):
        for item in self.info_tree.get_children():
            self.info_tree.delete(item)
        try:
            rect = control.BoundingRectangle
            visibility = "是" if self.is_visible_control(control) else "否"
            depth = self.get_control_depth(control)

            info = [
                ("控件类型", control.ControlTypeName),
                ("是否Chrome窗口", "是" if self.is_chrome_window else "否"),
                ("是否可见", visibility),
                ("深度", depth),
                ("系统名称", control.Name),
                ("自动化ID", control.AutomationId),
                ("句柄", hex(control.NativeWindowHandle)),
                ("位置", f"({rect.left}, {rect.top})"),
                ("大小", f"{rect.right - rect.left}x{rect.bottom - rect.top}"),
                ("父控件", control.GetParentControl().ControlTypeName if control.GetParentControl() else "无"),
                ("启用状态", "是" if getattr(control, 'IsEnabled', True) else "否")
            ]
            for prop, value in info:
                self.info_tree.insert("", tk.END, values=(prop, value))
        except Exception as e:
            self.info_tree.insert("", tk.END, values=("错误", f"{str(e)}"))

    def save_captured_control(self):
        if not self.target_control:
            messagebox.showwarning("警告", "请先捕捉控件")
            return
        try:
            control = self.target_control
            top_level = control.GetTopLevelControl()

            try:
                children = control.GetChildren()
                child_count = len(children)
            except:
                child_count = 0

            user_name = self.control_name_var.get().strip()
            if not user_name:
                user_name = f"{control.ControlTypeName}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

            visibility = "是" if self.is_visible_control(control) else "否"
            depth = self.get_control_depth(control)

            basic_info = {
                "控件类型": control.ControlTypeName,
                "是否Chrome窗口": "是" if self.is_chrome_window else "否",
                "是否可见": visibility,
                "深度": depth,
                "系统名称": control.Name,
                "自动化ID": control.AutomationId,
                "子节点数量": child_count,
                "捕获时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "用户名称": user_name,
                "功能描述": self.control_desc_var.get(),
                "备注信息": self.control_note_text.get("1.0", tk.END).strip()
            }

            try:
                top_hwnd = top_level.NativeWindowHandle
                top_class = win32gui.GetClassName(top_hwnd)
            except:
                top_class = ""

            top_rect = top_level.BoundingRectangle
            top_position = [top_rect.left, top_rect.top]
            top_size = [top_rect.right - top_rect.left, top_rect.bottom - top_rect.top]

            top_window_info = {
                "窗口标题": top_level.Name,
                "窗口句柄": hex(top_level.NativeWindowHandle),
                "窗口类名": top_class,
                "窗口位置": top_position,
                "窗口尺寸": top_size,
                "自动化ID": top_level.AutomationId
            }

            current_ts = int(datetime.now().timestamp() * 1000)
            control_rect = control.BoundingRectangle
            control_position = [control_rect.left, control_rect.top]
            control_size = [control_rect.right - control_rect.left, control_rect.bottom - control_rect.top]

            control_tree = [
                {
                    "id": current_ts,
                    "parent_id": None,
                    "depth": depth,
                    "type": control.ControlTypeName,
                    "name": control.Name,
                    "handle": "N/A",
                    "automation_id": control.AutomationId,
                    "position": control_position,
                    "size": control_size,
                    "visible": visibility,
                    "enabled": "是" if getattr(control, 'IsEnabled', True) else "否",
                    "is_target": True,
                    "is_chrome_window": "是" if self.is_chrome_window else "否"
                }
            ]

            data = {
                "基本信息": basic_info,
                "所属顶层窗口信息": top_window_info,
                "控件树结构": control_tree
            }

            safe_name = "".join(c for c in user_name if c.isalnum() or c in ('_', '-')).strip()
            if not safe_name:
                safe_name = "control_" + datetime.now().strftime("%Y%m%d_%H%M%S")

            if not self.save_dir:
                self.save_dir = os.getcwd()  # 保存到当前目录，不创建子目录

            file_path = os.path.join(self.save_dir, f"{safe_name}.json")

            counter = 1
            original_file_path = file_path
            while os.path.exists(file_path):
                file_path = f"{os.path.splitext(original_file_path)[0]}_{counter}.json"
                counter += 1

            with open(file_path, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=2)

            self.saved_controls_list.insert(tk.END,
                                            f"{data['基本信息']['用户名称']} ({data['基本信息']['控件类型']})")

            self.control_name_var.set("")
            self.control_desc_var.set("")
            self.control_note_text.delete("1.0", tk.END)
            self.status_var.set(f"已保存: {file_path}")

            index = self.saved_controls_list.size() - 1
            self.saved_controls_list.selection_set(index)
            self.saved_controls_list.see(index)
            self.on_saved_control_select(None)

        except Exception as e:
            messagebox.showerror("错误", f"保存失败: {str(e)}")

    def on_saved_control_select(self, event):
        selection = self.saved_controls_list.curselection()
        if selection:
            index = selection[0]
            item_text = self.saved_controls_list.get(index)
            user_name = item_text.split(' (')[0]
            safe_name = "".join(c for c in user_name if c.isalnum() or c in ('_', '-')).strip()

            if not self.save_dir:
                self.save_dir = os.getcwd()

            file_path = os.path.join(self.save_dir, f"{safe_name}.json")

            if not os.path.exists(file_path):
                counter = 1
                while True:
                    numbered_path = f"{os.path.splitext(file_path)[0]}_{counter}.json"
                    if os.path.exists(numbered_path):
                        file_path = numbered_path
                        break
                    counter += 1
                    if counter > 100:
                        break

            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    data = json.load(f)

                for item in self.info_tree.get_children():
                    self.info_tree.delete(item)

                for k, v in data["基本信息"].items():
                    self.info_tree.insert("", tk.END, values=(k, str(v)))

                self.info_tree.insert("", tk.END, values=(" ", " "))
                for k, v in data["所属顶层窗口信息"].items():
                    self.info_tree.insert("", tk.END, values=(k, str(v)))

                self.info_tree.insert("", tk.END, values=(" ", " "))
                for k, v in data["控件树结构"][0].items():
                    self.info_tree.insert("", tk.END, values=(k, str(v)))

            except Exception as e:
                messagebox.showerror("错误", f"读取文件失败: {str(e)}")


if __name__ == "__main__":
    root = tk.Tk()
    app = EnhancedControlCaptureTool(root)
    root.mainloop()

2、运行代码模块：

python 复制代码

import re
import os
import sys
from io import StringIO
from datetime import datetime
import tkinter as tk
from tkinter import ttk, scrolledtext, filedialog, messagebox
from pygments import lex
from pygments.lexers.python import PythonLexer
from RPA import RPA


class CodeEditor(scrolledtext.ScrolledText):
    def __init__(self, master=None, **kw):
        super().__init__(master, **kw)
        # 核心事件绑定
        self.bind('<KeyRelease>', self.on_key_release)
        self.bind('<Tab>', self.on_tab)
        self.bind('<Control-z>', self.undo)
        self.bind('<Control-Z>', self.undo)

        # 精简且有序的补全列表（按使用频率排序）
        self.completion_list = [
            'def', 'class', 'import', 'from', 'if', 'else', 'for', 'while',
            'return', 'print', 'self', '__init__', 'pass', 'in', 'is',
            'True', 'False', 'None', 'len', 'list', 'dict', 'str', 'int',
            'append', 'print', 'range', 'try', 'except', 'break', 'continue',
            'with', 'as', 'or', 'and', 'not', 'lambda', 'yield', 'open',
            'split', 'join', 'strip', 'keys', 'values', 'items'
        ]

        self.completion_window = None
        self.completion_listbox = None
        self.configure(font=('Courier New', 12), wrap='none')
        self.setup_tags()
        self.current_file_path = None

    def setup_tags(self):
        # 基础语法高亮配置
        self.tag_configure('Token.Keyword', foreground='#00008B')
        self.tag_configure('Token.Comment', foreground='#006400')
        self.tag_configure('Token.String', foreground='#8B4513')
        self.tag_configure('Token.Literal.Number', foreground='#4B0082')
        self.tag_configure('Token.Name.Builtin', foreground='#008B8B')

    def highlight_syntax(self):
        # 清除现有高亮
        for tag in self.tag_names():
            self.tag_remove(tag, '1.0', 'end')

        # 应用新高亮
        code = self.get('1.0', 'end-1c')
        start = '1.0'
        for token, text in lex(code, PythonLexer()):
            token_type = str(token)
            end = self.index(f"{start}+{len(text)}c")
            if token_type in self.tag_names():
                self.tag_add(token_type, start, end)
            start = end

    def on_key_release(self, event):
        self.highlight_syntax()
        # 仅在输入有效字符时触发补全
        if event.char.isalnum() or event.char == '_':
            self.show_completion()
        else:
            self.hide_completion()

    def show_completion(self):
        # 获取当前输入的单词
        line_num = self.index(tk.INSERT).split('.')[0]
        line_text = self.get(f"{line_num}.0", tk.INSERT)
        word_match = re.search(r'(\w+)$', line_text)
        if not word_match:
            self.hide_completion()
            return

        word = word_match.group(1).lower()
        matches = [w for w in self.completion_list if w.lower().startswith(word)]
        if not matches:
            self.hide_completion()
            return

        # 创建补全窗口
        self.hide_completion()
        self.completion_window = tk.Toplevel(self, bd=1, relief=tk.SOLID)
        self.completion_window.overrideredirect(True)

        # 计算显示位置
        try:
            x, y, _, h = self.bbox(tk.INSERT)
            win_x = self.winfo_rootx() + x
            win_y = self.winfo_rooty() + y + h
            self.completion_window.geometry(f"+{win_x}+{win_y}")
        except:
            self.completion_window.geometry(f"+{self.winfo_rootx() + 50}+{self.winfo_rooty() + 50}")

        # 创建补全列表
        self.completion_listbox = tk.Listbox(
            self.completion_window,
            width=max(len(m) for m in matches) + 2,
            height=min(5, len(matches)),
            font=('Courier New', 11)
        )
        for match in matches:
            self.completion_listbox.insert(tk.END, match)
        self.completion_listbox.pack()
        self.completion_listbox.selection_set(0)

        # 绑定选择事件
        self.completion_listbox.bind('<ButtonRelease-1>', self.on_completion_select)
        self.completion_listbox.bind('<Return>', self.on_completion_select)
        self.completion_listbox.bind('<Tab>', self.on_completion_select)

    def hide_completion(self):
        if self.completion_window:
            self.completion_window.destroy()
            self.completion_window = None
            self.completion_listbox = None

    def on_completion_select(self, event):
        if self.completion_listbox:
            selected = self.completion_listbox.get(self.completion_listbox.curselection()[0])
            self.hide_completion()
            self.replace_word(selected)
        return 'break'

    def replace_word(self, selected):
        line_num = self.index(tk.INSERT).split('.')[0]
        line_text = self.get(f"{line_num}.0", tk.INSERT)
        word_match = re.search(r'(\w+)$', line_text)
        if word_match:
            start_pos = f"{line_num}.{len(line_text) - len(word_match.group(1))}"
            self.delete(start_pos, tk.INSERT)
            self.insert(tk.INSERT, selected)

    def on_tab(self, event):
        if self.completion_window:
            self.on_completion_select(event)
        else:
            self.insert(tk.INSERT, ' ' * 4)
        return 'break'

    def undo(self, event=None):
        try:
            self.edit_undo()
        except tk.TclError:
            pass
        return 'break'

    def save_file(self, file_path=None):
        if not file_path and self.current_file_path:
            file_path = self.current_file_path
        if not file_path:
            file_path = filedialog.asksaveasfilename(
                defaultextension=".py",
                filetypes=[("Python files", "*.py"), ("All files", "*.*")]
            )
        if file_path:
            try:
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write(self.get("1.0", tk.END))
                self.current_file_path = file_path
                return True, f"已保存: {file_path}"
            except Exception as e:
                return False, f"保存失败: {str(e)}"
        return False, "未选择文件"

    def load_file(self, file_path):
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                self.delete("1.0", tk.END)
                self.insert("1.0", f.read())
            self.current_file_path = file_path
            return True, f"已加载: {file_path}"
        except Exception as e:
            return False, f"加载失败: {str(e)}"


class FileBrowser:
    def __init__(self, master, on_file_select):
        self.master = master
        self.on_file_select = on_file_select
        self.frame = ttk.Frame(master)
        self.frame.pack(fill=tk.BOTH, expand=True)

        # 路径控制
        path_frame = ttk.Frame(self.frame)
        path_frame.pack(fill=tk.X, padx=5, pady=5)
        self.path_var = tk.StringVar(value=os.getcwd())
        ttk.Entry(path_frame, textvariable=self.path_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 5))
        ttk.Button(path_frame, text="浏览", command=self.browse_dir).pack(side=tk.LEFT)

        # 文件列表
        self.listbox = tk.Listbox(self.frame, relief=tk.FLAT)
        self.listbox.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        self.listbox.bind('<Double-1>', self.on_double_click)

        # 滚动条
        scroll = ttk.Scrollbar(self.listbox, orient=tk.VERTICAL, command=self.listbox.yview)
        scroll.pack(side=tk.RIGHT, fill=tk.Y)
        self.listbox.config(yscrollcommand=scroll.set)

        self.load_dir()

    def browse_dir(self):
        dir = filedialog.askdirectory()
        if dir:
            self.path_var.set(dir)
            self.load_dir()

    def load_dir(self):
        self.listbox.delete(0, tk.END)
        dir = self.path_var.get()
        try:
            self.listbox.insert(tk.END, "..")  # 上级目录
            items = os.listdir(dir)
            dirs, files = [], []
            for item in items:
                path = os.path.join(dir, item)
                if os.path.isdir(path):
                    dirs.append(item)
                else:
                    files.append(item)
            for d in sorted(dirs):
                self.listbox.insert(tk.END, f"[目录] {d}")
            for f in sorted(files):
                self.listbox.insert(tk.END, f)
        except Exception as e:
            self.listbox.insert(tk.END, f"错误: {str(e)}")

    def on_double_click(self, event):
        if not self.listbox.curselection():
            return
        item = self.listbox.get(self.listbox.curselection()[0])
        current_dir = self.path_var.get()

        if item == "..":
            parent = os.path.dirname(current_dir)
            if parent != current_dir:
                self.path_var.set(parent)
                self.load_dir()
        elif item.startswith("[目录] "):
            subdir = os.path.join(current_dir, item[6:])
            self.path_var.set(subdir)
            self.load_dir()
        else:
            file_path = os.path.join(current_dir, item)
            self.on_file_select(file_path)


class CodeApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Python简易编辑器")
        self.root.geometry("1000x600")

        # 设置主题样式
        self.style = ttk.Style()
        self.style.theme_use('clam')  # 使用简洁的clam主题

        # 配置颜色方案
        self.style.configure('TButton', padding=5)
        self.style.configure('TLabelFrame', padding=10)

        # 主布局
        main_frame = ttk.Frame(root, padding=5)
        main_frame.pack(fill=tk.BOTH, expand=True)

        # 分割窗口
        paned = ttk.PanedWindow(main_frame, orient=tk.HORIZONTAL)
        paned.pack(fill=tk.BOTH, expand=True)

        # 左侧文件浏览器
        left_frame = ttk.LabelFrame(paned, text="文件浏览")
        paned.add(left_frame, weight=1)
        self.file_browser = FileBrowser(left_frame, self.load_file)

        # 右侧区域
        right_frame = ttk.Frame(paned)
        paned.add(right_frame, weight=3)

        # 代码编辑器
        self.editor = CodeEditor(right_frame, relief=tk.FLAT)
        self.editor.pack(fill=tk.BOTH, expand=True, pady=(0, 5))

        # 按钮区
        btn_frame = ttk.Frame(right_frame)
        btn_frame.pack(fill=tk.X, pady=(0, 5))
        ttk.Button(btn_frame, text="运行", command=self.run_code).pack(side=tk.LEFT, padx=2)
        ttk.Button(btn_frame, text="保存", command=self.save_code).pack(side=tk.LEFT, padx=2)
        ttk.Button(btn_frame, text="清空输出", command=self.clear_output).pack(side=tk.LEFT, padx=2)

        # 输出区 - 改为可编辑状态
        output_frame = ttk.LabelFrame(right_frame, text="输出")
        output_frame.pack(fill=tk.BOTH, expand=True)
        self.output = scrolledtext.ScrolledText(
            output_frame,
            height=5,
            font=('Courier New', 10),
            relief=tk.FLAT,
            wrap='word'
        )
        self.output.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        # 输出区域保持可编辑状态，无需设置state=tk.DISABLED

    def get_timestamp(self):
        return datetime.now().strftime("%H:%M:%S")

    def clear_output(self):
        self.output.delete('1.0', tk.END)

    def run_code(self):
        code = self.editor.get('1.0', tk.END)
        try:
            self.output.insert(tk.END, f"[{self.get_timestamp()}] 运行中...\n")

            # 重定向输出
            old_stdout = sys.stdout
            sys.stdout = StringIO()
            user_globals = {'__builtins__': __builtins__}

            # 执行代码
            exec(code, user_globals)

            # 恢复输出
            output = sys.stdout.getvalue()
            sys.stdout = old_stdout
            self.output.insert(tk.END, f"[{self.get_timestamp()}] 运行结果:\n{output}")
        except Exception as e:
            self.output.insert(tk.END, f"[{self.get_timestamp()}] 错误: {str(e)}\n")

    def save_code(self):
        success, msg = self.editor.save_file()
        self.output.insert(tk.END, f"[{self.get_timestamp()}] {msg}\n")

    def load_file(self, file_path):
        if self.editor.edit_modified():
            res = messagebox.askyesnocancel("保存", "保存当前文件?")
            if res is None:
                return
            if res:
                self.save_code()
        success, msg = self.editor.load_file(file_path)
        self.output.insert(tk.END, f"[{self.get_timestamp()}] {msg}\n")
        self.editor.edit_modified(False)


if __name__ == "__main__":
    root = tk.Tk()
    app = CodeApp(root)
    root.mainloop()

3、核心解析模块：

python 复制代码

import os
import json
import time
import subprocess
import re
from PIL import ImageGrab
import uiautomation as auto
from win32api import GetSystemMetrics


class RPA:
    """RPA自动化类，用于从JSON文件读取UI控件信息并执行自动化操作"""

    class AppOperations:
        def __init__(self, rpa_instance):
            self.rpa = rpa_instance

        def chrome(self, url=None):
            try:
                chrome_paths = [
                    "chrome.exe",
                    "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
                    "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
                ]

                chrome_path = None
                for path in chrome_paths:
                    try:
                        subprocess.Popen([path])
                        chrome_path = path
                        break
                    except:
                        continue

                if not chrome_path:
                    print("未找到Chrome浏览器，请检查是否已安装")
                    return False

                print("Chrome浏览器已启动")

                if url:
                    time.sleep(2)
                    chrome_window = auto.WindowControl(
                        searchDepth=1,
                        Title="Google Chrome"
                    )
                    if chrome_window.Exists(5):
                        chrome_window.SetActive()
                        chrome_window.SendKeys(url + '{Enter}')
                        print(f"已在Chrome中打开URL: {url}")
                    else:
                        print("无法定位Chrome窗口，无法打开指定URL")

                return True
            except Exception as e:
                print(f"启动Chrome失败: {e}")
                return False

        def edge(self, url=None):
            try:
                edge_paths = [
                    "msedge.exe",
                    "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
                    "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe"
                ]

                edge_path = None
                for path in edge_paths:
                    try:
                        subprocess.Popen([path])
                        edge_path = path
                        break
                    except:
                        continue

                if not edge_path:
                    print("未找到Edge浏览器，请检查是否已安装")
                    return False

                print("Edge浏览器已启动")

                if url:
                    time.sleep(2)
                    edge_window = auto.WindowControl(
                        searchDepth=1,
                        Title="Microsoft Edge"
                    )
                    if edge_window.Exists(5):
                        edge_window.SetActive()
                        edge_window.SendKeys(url + '{Enter}')
                        print(f"已在Edge中打开URL: {url}")
                    else:
                        print("无法定位Edge窗口，无法打开指定URL")

                return True
            except Exception as e:
                print(f"启动Edge失败: {e}")
                return False

    _instances = {}

    def __init__(self, json_name=None):
        self.control_map = None
        self.main_window = None
        self.json_name = json_name
        self.json_path = None
        self._load_controls(json_name)
        self.app = self.AppOperations(self)

    @classmethod
    def get(cls, json_name):
        if json_name in cls._instances:
            return cls._instances[json_name]
        return cls(json_name)

    def _load_controls(self, json_name=None):
        try:
            json_files = [f for f in os.listdir(os.getcwd()) if f.endswith('.json')]
            if not json_files:
                raise FileNotFoundError("当前目录下未找到任何JSON文件")

            if json_name:
                exact_match = [f for f in json_files if f == json_name]
                if exact_match:
                    json_path = exact_match[0]
                else:
                    fuzzy_match = [f for f in json_files if json_name in f]
                    if not fuzzy_match:
                        raise FileNotFoundError(f"未找到名称包含'{json_name}'的JSON文件")
                    json_path = fuzzy_match[0]
            else:
                json_path = json_files[0]

            self.json_path = json_path
            print(f"正在加载JSON文件: {json_path}")

            with open(json_path, "r", encoding="utf-8-sig") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError as e:
                    raise ValueError(f"JSON文件格式错误（{json_path}）：{str(e)}")

            required_keys = ["基本信息", "控件树结构", "所属顶层窗口信息"]
            for key in required_keys:
                if key not in data:
                    raise KeyError(f"JSON文件缺少必要字段：'{key}'（文件：{json_path}）")

            base_info = data["基本信息"]
            control_tree = data["控件树结构"]
            top_window_info = data["所属顶层窗口信息"]
            top_window_title = top_window_info["窗口标题"]

            main_window = auto.WindowControl(
                searchDepth=1,
                Title=top_window_title,
                ClassName=top_window_info["窗口类名"]
            )
            if main_window.Exists(2):
                print(f"已定位主窗口：{top_window_title}")
            else:
                print(f"主窗口未找到（标题：{top_window_title}），将使用桌面作为根控件")
                main_window = auto.GetRootControl()

            control_map = {}
            sorted_controls = sorted(control_tree, key=lambda x: x["depth"])

            for ctrl in sorted_controls:
                ctrl_required = ["id", "parent_id", "depth", "type", "position", "size"]
                for key in ctrl_required:
                    if key not in ctrl:
                        raise KeyError(f"控件信息缺少字段'{key}'（文件：{json_path}）")

                ctrl_id = ctrl["id"]
                parent_id = ctrl["parent_id"]
                ctrl_type = ctrl["type"]
                automation_id = ctrl.get("automation_id", "")
                ctrl_name = ctrl.get("name", "")
                is_chrome = ctrl.get("is_chrome_window", "否") == "是"

                parent_ctrl = main_window if parent_id is None else control_map.get(parent_id)
                if not parent_ctrl:
                    raise Exception(f"父控件（id：{parent_id}）未找到，无法解析子控件（id：{ctrl_id}）")

                ctrl_type_obj = getattr(auto, ctrl_type, None)
                child_ctrl = None

                if ctrl_type_obj:
                    if automation_id:
                        child_ctrl = ctrl_type_obj(searchFromControl=parent_ctrl, AutomationId=automation_id, searchDepth=5)
                    elif ctrl_name:
                        child_ctrl = ctrl_type_obj(searchFromControl=parent_ctrl, Name=ctrl_name, searchDepth=5)

                if not child_ctrl or not child_ctrl.Exists(1):
                    x, y = ctrl["position"]
                    w, h = ctrl["size"]
                    parent_rect = parent_ctrl.BoundingRectangle
                    # 针对Chrome窗口，修正坐标（排除标题栏等非内容区域）
                    if is_chrome and parent_ctrl == main_window:
                        title_bar_height = 80  # Chrome标题栏+地址栏高度补偿
                        abs_x = parent_rect.left + x
                        abs_y = parent_rect.top + y + title_bar_height
                    else:
                        abs_x = parent_rect.left + x
                        abs_y = parent_rect.top + y
                    center_x = abs_x + w // 2
                    center_y = abs_y + h // 2
                    child_ctrl = auto.ControlFromPoint(center_x, center_y)
                    if not child_ctrl.Exists(1):
                        raise Exception(f"控件定位失败（id：{ctrl_id}，类型：{ctrl_type}，坐标：({abs_x},{abs_y})）")

                control_map[ctrl_id] = child_ctrl
                print(f"已解析控件：id={ctrl_id}，类型={ctrl_type}，名称={ctrl_name}")

            self.control_map = control_map
            self.main_window = main_window
            print(f"控件加载完成（共{len(control_map)}个控件，文件：{json_path}）")
            return True

        except Exception as e:
            print(f"加载控件失败：{str(e)}")
            self.control_map = None
            self.main_window = None
            return False

    def count(self, json_name=None, control_type=None):
        if not self.control_map or (json_name and json_name != self.json_name):
            if not self._load_controls(json_name):
                return 0

        if control_type:
            return len([ctrl for ctrl in self.control_map.values()
                        if ctrl.ControlTypeName == control_type])
        else:
            return len(self.control_map)

    def text(self, json_name=None, index=0):
        if not self.control_map or (json_name and json_name != self.json_name):
            if not self._load_controls(json_name):
                return None

        target_ctrls = list(self.control_map.values())
        if 0 <= index < len(target_ctrls):
            target_ctrl = target_ctrls[index]
            try:
                current_text = target_ctrl.Name if hasattr(target_ctrl, 'Name') else None
                if not current_text:
                    current_text = target_ctrl.Text if hasattr(target_ctrl, 'Text') else None
                print(f"获取到文本（索引{index}）：{current_text or '空'}")
                return current_text
            except Exception as e:
                print(f"获取文本失败：{e}")
                return None
        else:
            print(f"未找到索引为{index}的控件（共{len(target_ctrls)}个）")
            return None

    def input(self, json_name=None, input_text="", clear_first=True, index=0):
        if not self.control_map or (json_name and json_name != self.json_name):
            if not self._load_controls(json_name):
                return False

        target_ctrls = list(self.control_map.values())
        if 0 <= index < len(target_ctrls):
            target_ctrl = target_ctrls[index]
            try:
                target_ctrl.SetActive()
                if clear_first:
                    target_ctrl.SendKeys('{Ctrl}a{Delete}')
                target_ctrl.SendKeys(input_text)
                print(f"已输入文本（索引{index}）：{input_text}")
                return True
            except Exception as e:
                print(f"输入失败：{e}")
                return False
        else:
            print(f"未找到索引为{index}的控件（共{len(target_ctrls)}个）")
            return False

    def click(self, json_name=None, control_type=None, control_name=None, index=0):
        if not self.control_map or (json_name and json_name != self.json_name):
            if not self._load_controls(json_name):
                return False

        target_ctrls = []
        if control_type:
            target_ctrls = [
                ctrl for ctrl in self.control_map.values()
                if ctrl.ControlTypeName == control_type and
                   (not control_name or control_name in ctrl.Name)
            ]
        else:
            target_ctrls = list(self.control_map.values())

        if 0 <= index < len(target_ctrls):
            target_ctrl = target_ctrls[index]
            try:
                target_ctrl.SetActive()
                target_ctrl.Click()
                print(f"已点击控件（类型：{target_ctrl.ControlTypeName}，索引{index}）")
                return True
            except Exception as e:
                print(f"点击失败：{e}")
                return False
        else:
            print(f"未找到符合条件的控件（索引{index}，类型：{control_type}）")
            return False

    def photo(self, index=0, save_path=None):
        if not self.control_map:
            print("未加载控件信息，无法截图")
            return False

        controls = list(self.control_map.values())
        if index < 0 or index >= len(controls):
            print(f"无效索引：{index}（总控件数：{len(controls)}）")
            return False

        target_ctrl = controls[index]
        try:
            target_ctrl.SetActive()
            time.sleep(0.5)

            # 获取控件绝对坐标（修正Chrome浏览器偏移）
            rect = target_ctrl.BoundingRectangle
            # 验证控件尺寸是否与JSON一致
            ctrl_size = (rect.right - rect.left, rect.bottom - rect.top)
            print(f"目标控件实际尺寸: {ctrl_size}")

            # 截取控件区域
            screenshot = ImageGrab.grab(bbox=(rect.left, rect.top, rect.right, rect.bottom))

            if not save_path:
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                save_path = f"screenshot_{timestamp}.png"

            screenshot.save(save_path)
            print(f"控件截图已保存：{os.path.abspath(save_path)}（坐标：{rect.left},{rect.top},{rect.right},{rect.bottom}）")
            print(f"截图尺寸：{rect.right - rect.left}x{rect.bottom - rect.top}")
            return True
        except Exception as e:
            print(f"截图失败：{e}")
            return False