相机实时进行透视变换矫正 并用streamlit做个界面

python 复制代码
import cv2
import numpy as np

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 25, 125)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]

        # if prev rectangle exists and the change is smaller than the stability factor, use the previous rectangle
        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one
prev_rectangle = None
dst = np.float32([[0, 0], [0, 488],[337, 488],[337, 0]])
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
        prev_rectangle = rectangle  # save the rectangle for the next frame
        print(rectangle)
        if rectangle is not None:
            src = np.float32(rectangle)
            m = cv2.getPerspectiveTransform(src, dst)
            result = cv2.warpPerspective(frame, m, (337, 488))

            cv2.imshow("src", frame)
            cv2.imshow("result", result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

streamlit界面

python 复制代码
import streamlit as st
from cnstd import CnStd
from cnocr import CnOcr
import cv2
import numpy as np
import pandas as pd
from PIL import Image

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 50, 100)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]


        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one

# Code where the two scripts are fused together
dst = np.float32([[0, 0], [0, 588],[640, 588],[640, 0]])
prev_rectangle = None
cap = cv2.VideoCapture(0)
std = CnStd()
cn_ocr = CnOcr()

st.sidebar.title("易读写")
option = st.sidebar.selectbox('请选择要运行的功能', ('显示矫正后画面', '图片转文字'))
# 定义两个图像显示窗口
FRAME_WINDOW_RAW = st.image([])
FRAME_WINDOW_TRANSFORMED = st.image([])
# 需要实时显示矫正后画面
if option == '显示矫正后画面':
    run = st.checkbox('开始运行')
    FRAME_WINDOW = st.image([])
    alpha = st.sidebar.slider('亮度调节 Brightness', min_value=0.0, max_value=3.0,
                              value=1.0)  # Add a slider for brightness
    angle = st.sidebar.slider('视角调节 Rotation', 0, 360, 0)  # Add a slider for rotation
    if run:
        while True:
            ret, frame = cap.read()
            if not ret:
                st.write("Can't receive frame (Stream end?). Exiting..")
                break
            rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
            prev_rectangle = rectangle  # save the rectangle for the next frame
            if rectangle is not None:
                src = np.float32(rectangle)
                m = cv2.getPerspectiveTransform(src, dst)
                result = cv2.warpPerspective(frame, m, (640, 588))

                raw_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for raw frame
                img_raw = Image.fromarray(raw_frame)
                FRAME_WINDOW_RAW.image(img_raw)  # Show raw frame

                result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for transformed
                result = cv2.convertScaleAbs(result, alpha=alpha, beta=0)
                img_transformed = Image.fromarray(result)
                FRAME_WINDOW_TRANSFORMED.image(img_transformed)  # Show transformed result
    else:
        st.write('Stopped')
    cap.release()

elif option == '图片转文字':
    uploaded_file = st.sidebar.file_uploader("选择一个图片文件")

    if uploaded_file is not None:
        img = Image.open(uploaded_file)
        st.image(img, caption='Uploaded Image.', use_column_width=True)

        if st.sidebar.button('开始运行'):
            # Copy and adapt your code here
            with st.spinner('OCR is in progress...'):
                np_img = np.array(img)  # Convert PIL Image to numpy array
                box_infos = std.detect(np_img)

                text = []
                for box_info in box_infos['detected_texts']:
                    cropped_img = box_info['cropped_img']
                    ocr_res = cn_ocr.ocr_for_single_line(cropped_img)
                    text.append(ocr_res['text'])

                st.subheader("Extracted text:")
                for line in text:
                    st.write(line)

            st.success('Text extraction complete')
相关推荐
火山引擎开发者社区15 分钟前
火山AgentPlan/CodingPlan同步上线GLM-5.2
人工智能
冬奇Lab1 小时前
Skill 系列(05):Skill 工作流串联——4 种模式实测,并发加速 1.5x
人工智能·开源
冬奇Lab1 小时前
每日一个开源项目(第141篇):hiring-agent - HackerRank 开源了他们的简历评分系统,你的简历能得几分?
人工智能·面试·开源
甲维斯2 小时前
又升级咯!坦克大战2026,科技与复古并存!
前端·人工智能·游戏开发
姗姗来迟了4 小时前
用React Hook封装AI对话状态
人工智能
Goodbye4 小时前
从 Token 到 Embedding:LLM 核心基础深度解析
javascript·人工智能
阿瑞IT4 小时前
AI Agent 在甘特计划变更场景中的动态响应工程实践
人工智能
用户938515635074 小时前
工具调用背后:LLM 如何突破“缸中大脑”,操控真实世界?
javascript·人工智能