相机实时进行透视变换矫正 并用streamlit做个界面

python 复制代码
import cv2
import numpy as np

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 25, 125)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]

        # if prev rectangle exists and the change is smaller than the stability factor, use the previous rectangle
        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one
prev_rectangle = None
dst = np.float32([[0, 0], [0, 488],[337, 488],[337, 0]])
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
        prev_rectangle = rectangle  # save the rectangle for the next frame
        print(rectangle)
        if rectangle is not None:
            src = np.float32(rectangle)
            m = cv2.getPerspectiveTransform(src, dst)
            result = cv2.warpPerspective(frame, m, (337, 488))

            cv2.imshow("src", frame)
            cv2.imshow("result", result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

streamlit界面

python 复制代码
import streamlit as st
from cnstd import CnStd
from cnocr import CnOcr
import cv2
import numpy as np
import pandas as pd
from PIL import Image

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 50, 100)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]


        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one

# Code where the two scripts are fused together
dst = np.float32([[0, 0], [0, 588],[640, 588],[640, 0]])
prev_rectangle = None
cap = cv2.VideoCapture(0)
std = CnStd()
cn_ocr = CnOcr()

st.sidebar.title("易读写")
option = st.sidebar.selectbox('请选择要运行的功能', ('显示矫正后画面', '图片转文字'))
# 定义两个图像显示窗口
FRAME_WINDOW_RAW = st.image([])
FRAME_WINDOW_TRANSFORMED = st.image([])
# 需要实时显示矫正后画面
if option == '显示矫正后画面':
    run = st.checkbox('开始运行')
    FRAME_WINDOW = st.image([])
    alpha = st.sidebar.slider('亮度调节 Brightness', min_value=0.0, max_value=3.0,
                              value=1.0)  # Add a slider for brightness
    angle = st.sidebar.slider('视角调节 Rotation', 0, 360, 0)  # Add a slider for rotation
    if run:
        while True:
            ret, frame = cap.read()
            if not ret:
                st.write("Can't receive frame (Stream end?). Exiting..")
                break
            rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
            prev_rectangle = rectangle  # save the rectangle for the next frame
            if rectangle is not None:
                src = np.float32(rectangle)
                m = cv2.getPerspectiveTransform(src, dst)
                result = cv2.warpPerspective(frame, m, (640, 588))

                raw_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for raw frame
                img_raw = Image.fromarray(raw_frame)
                FRAME_WINDOW_RAW.image(img_raw)  # Show raw frame

                result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for transformed
                result = cv2.convertScaleAbs(result, alpha=alpha, beta=0)
                img_transformed = Image.fromarray(result)
                FRAME_WINDOW_TRANSFORMED.image(img_transformed)  # Show transformed result
    else:
        st.write('Stopped')
    cap.release()

elif option == '图片转文字':
    uploaded_file = st.sidebar.file_uploader("选择一个图片文件")

    if uploaded_file is not None:
        img = Image.open(uploaded_file)
        st.image(img, caption='Uploaded Image.', use_column_width=True)

        if st.sidebar.button('开始运行'):
            # Copy and adapt your code here
            with st.spinner('OCR is in progress...'):
                np_img = np.array(img)  # Convert PIL Image to numpy array
                box_infos = std.detect(np_img)

                text = []
                for box_info in box_infos['detected_texts']:
                    cropped_img = box_info['cropped_img']
                    ocr_res = cn_ocr.ocr_for_single_line(cropped_img)
                    text.append(ocr_res['text'])

                st.subheader("Extracted text:")
                for line in text:
                    st.write(line)

            st.success('Text extraction complete')
相关推荐
搞笑的秀儿1 分钟前
信息新技术
大数据·人工智能·物联网·云计算·区块链
阿里云大数据AI技术20 分钟前
OpenSearch 视频 RAG 实践
数据库·人工智能·llm
XMAIPC_Robot32 分钟前
基于ARM+FPGA的光栅尺精密位移加速度测试解决方案
arm开发·人工智能·fpga开发·自动化·边缘计算
加油吧zkf42 分钟前
YOLO目标检测数据集类别:分类与应用
人工智能·计算机视觉·目标跟踪
Blossom.1181 小时前
机器学习在智能制造业中的应用:质量检测与设备故障预测
人工智能·深度学习·神经网络·机器学习·机器人·tensorflow·sklearn
天天扭码1 小时前
AI时代,前端如何处理大模型返回的多模态数据?
前端·人工智能·面试
难受啊马飞2.01 小时前
如何判断 AI 将优先自动化哪些任务?
运维·人工智能·ai·语言模型·程序员·大模型·大模型学习
顺丰同城前端技术团队1 小时前
掌握未来:构建专属领域的大模型与私有知识库——从部署到微调的全面指南
人工智能·deepseek
许泽宇的技术分享1 小时前
用.NET9+Blazor+Semantic Kernel,打造企业级AI知识库和智能体平台——AntSK深度解读
人工智能
烟锁池塘柳02 小时前
【深度学习】强化学习(Reinforcement Learning, RL)主流架构解析
人工智能·深度学习·机器学习