相机实时进行透视变换矫正 并用streamlit做个界面

python 复制代码
import cv2
import numpy as np

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 25, 125)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]

        # if prev rectangle exists and the change is smaller than the stability factor, use the previous rectangle
        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one
prev_rectangle = None
dst = np.float32([[0, 0], [0, 488],[337, 488],[337, 0]])
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
        prev_rectangle = rectangle  # save the rectangle for the next frame
        print(rectangle)
        if rectangle is not None:
            src = np.float32(rectangle)
            m = cv2.getPerspectiveTransform(src, dst)
            result = cv2.warpPerspective(frame, m, (337, 488))

            cv2.imshow("src", frame)
            cv2.imshow("result", result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

streamlit界面

python 复制代码
import streamlit as st
from cnstd import CnStd
from cnocr import CnOcr
import cv2
import numpy as np
import pandas as pd
from PIL import Image

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 50, 100)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]


        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one

# Code where the two scripts are fused together
dst = np.float32([[0, 0], [0, 588],[640, 588],[640, 0]])
prev_rectangle = None
cap = cv2.VideoCapture(0)
std = CnStd()
cn_ocr = CnOcr()

st.sidebar.title("易读写")
option = st.sidebar.selectbox('请选择要运行的功能', ('显示矫正后画面', '图片转文字'))
# 定义两个图像显示窗口
FRAME_WINDOW_RAW = st.image([])
FRAME_WINDOW_TRANSFORMED = st.image([])
# 需要实时显示矫正后画面
if option == '显示矫正后画面':
    run = st.checkbox('开始运行')
    FRAME_WINDOW = st.image([])
    alpha = st.sidebar.slider('亮度调节 Brightness', min_value=0.0, max_value=3.0,
                              value=1.0)  # Add a slider for brightness
    angle = st.sidebar.slider('视角调节 Rotation', 0, 360, 0)  # Add a slider for rotation
    if run:
        while True:
            ret, frame = cap.read()
            if not ret:
                st.write("Can't receive frame (Stream end?). Exiting..")
                break
            rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
            prev_rectangle = rectangle  # save the rectangle for the next frame
            if rectangle is not None:
                src = np.float32(rectangle)
                m = cv2.getPerspectiveTransform(src, dst)
                result = cv2.warpPerspective(frame, m, (640, 588))

                raw_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for raw frame
                img_raw = Image.fromarray(raw_frame)
                FRAME_WINDOW_RAW.image(img_raw)  # Show raw frame

                result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for transformed
                result = cv2.convertScaleAbs(result, alpha=alpha, beta=0)
                img_transformed = Image.fromarray(result)
                FRAME_WINDOW_TRANSFORMED.image(img_transformed)  # Show transformed result
    else:
        st.write('Stopped')
    cap.release()

elif option == '图片转文字':
    uploaded_file = st.sidebar.file_uploader("选择一个图片文件")

    if uploaded_file is not None:
        img = Image.open(uploaded_file)
        st.image(img, caption='Uploaded Image.', use_column_width=True)

        if st.sidebar.button('开始运行'):
            # Copy and adapt your code here
            with st.spinner('OCR is in progress...'):
                np_img = np.array(img)  # Convert PIL Image to numpy array
                box_infos = std.detect(np_img)

                text = []
                for box_info in box_infos['detected_texts']:
                    cropped_img = box_info['cropped_img']
                    ocr_res = cn_ocr.ocr_for_single_line(cropped_img)
                    text.append(ocr_res['text'])

                st.subheader("Extracted text:")
                for line in text:
                    st.write(line)

            st.success('Text extraction complete')
相关推荐
johnny2332 小时前
AI工作流编排平台
人工智能
百***35483 小时前
DeepSeek在情感分析中的细粒度识别
人工智能
合方圆~小文3 小时前
AI摄像头精准识别技术依赖于深度算法
数据结构·数据库·数码相机·模块测试
Qzkj6663 小时前
从规则到智能:企业数据分类分级的先进实践与自动化转型
大数据·人工智能·自动化
weixin79893765432...3 小时前
React + Fastify + DeepSeek 实现一个简单的对话式 AI 应用
人工智能·react.js·fastify
大千AI助手3 小时前
概率单位回归(Probit Regression)详解
人工智能·机器学习·数据挖掘·回归·大千ai助手·概率单位回归·probit回归
狂炫冰美式4 小时前
3天,1人,从0到付费产品:AI时代个人开发者的生存指南
前端·人工智能·后端
LCG元4 小时前
垂直Agent才是未来:详解让大模型"专业对口"的三大核心技术
人工智能
我不是QI5 小时前
周志华《机器学习—西瓜书》二
人工智能·安全·机器学习
操练起来5 小时前
【昇腾CANN训练营·第八期】Ascend C生态兼容:基于PyTorch Adapter的自定义算子注册与自动微分实现
人工智能·pytorch·acl·昇腾·cann