相机实时进行透视变换矫正 并用streamlit做个界面

python 复制代码
import cv2
import numpy as np

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 25, 125)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]

        # if prev rectangle exists and the change is smaller than the stability factor, use the previous rectangle
        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one
prev_rectangle = None
dst = np.float32([[0, 0], [0, 488],[337, 488],[337, 0]])
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
        prev_rectangle = rectangle  # save the rectangle for the next frame
        print(rectangle)
        if rectangle is not None:
            src = np.float32(rectangle)
            m = cv2.getPerspectiveTransform(src, dst)
            result = cv2.warpPerspective(frame, m, (337, 488))

            cv2.imshow("src", frame)
            cv2.imshow("result", result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

streamlit界面

python 复制代码
import streamlit as st
from cnstd import CnStd
from cnocr import CnOcr
import cv2
import numpy as np
import pandas as pd
from PIL import Image

def get_rectangle_corners(image, resize_height, prev_rectangle=None, stability_factor=12):
    def process_image(image, height):
        ratio = image.shape[0] / height
        image = cv2.resize(image, (int(image.shape[1] / ratio), height))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edges = cv2.Canny(gray, 50, 100)
        return edges, ratio

    def find_rectangle(edges):
        contours, _ = cv2.findContours(edges.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]

        for contour in contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)

            if len(approx) == 4:
                return approx

    edges, ratio = process_image(image, resize_height)
    rectangle = find_rectangle(edges)

    if rectangle is not None:
        rectangle = rectangle.reshape(4, 2) * ratio
        rectangle = [[int(point[0]), int(point[1])] for point in rectangle]


        if prev_rectangle is not None:
            diff = np.abs(np.array(prev_rectangle) - np.array(rectangle))
            if np.all(diff < stability_factor):
                return prev_rectangle

        return rectangle
    else:
        return prev_rectangle  # if no rectangle detected, use the previous one

# Code where the two scripts are fused together
dst = np.float32([[0, 0], [0, 588],[640, 588],[640, 0]])
prev_rectangle = None
cap = cv2.VideoCapture(0)
std = CnStd()
cn_ocr = CnOcr()

st.sidebar.title("易读写")
option = st.sidebar.selectbox('请选择要运行的功能', ('显示矫正后画面', '图片转文字'))
# 定义两个图像显示窗口
FRAME_WINDOW_RAW = st.image([])
FRAME_WINDOW_TRANSFORMED = st.image([])
# 需要实时显示矫正后画面
if option == '显示矫正后画面':
    run = st.checkbox('开始运行')
    FRAME_WINDOW = st.image([])
    alpha = st.sidebar.slider('亮度调节 Brightness', min_value=0.0, max_value=3.0,
                              value=1.0)  # Add a slider for brightness
    angle = st.sidebar.slider('视角调节 Rotation', 0, 360, 0)  # Add a slider for rotation
    if run:
        while True:
            ret, frame = cap.read()
            if not ret:
                st.write("Can't receive frame (Stream end?). Exiting..")
                break
            rectangle = get_rectangle_corners(frame, 500, prev_rectangle)
            prev_rectangle = rectangle  # save the rectangle for the next frame
            if rectangle is not None:
                src = np.float32(rectangle)
                m = cv2.getPerspectiveTransform(src, dst)
                result = cv2.warpPerspective(frame, m, (640, 588))

                raw_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for raw frame
                img_raw = Image.fromarray(raw_frame)
                FRAME_WINDOW_RAW.image(img_raw)  # Show raw frame

                result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for transformed
                result = cv2.convertScaleAbs(result, alpha=alpha, beta=0)
                img_transformed = Image.fromarray(result)
                FRAME_WINDOW_TRANSFORMED.image(img_transformed)  # Show transformed result
    else:
        st.write('Stopped')
    cap.release()

elif option == '图片转文字':
    uploaded_file = st.sidebar.file_uploader("选择一个图片文件")

    if uploaded_file is not None:
        img = Image.open(uploaded_file)
        st.image(img, caption='Uploaded Image.', use_column_width=True)

        if st.sidebar.button('开始运行'):
            # Copy and adapt your code here
            with st.spinner('OCR is in progress...'):
                np_img = np.array(img)  # Convert PIL Image to numpy array
                box_infos = std.detect(np_img)

                text = []
                for box_info in box_infos['detected_texts']:
                    cropped_img = box_info['cropped_img']
                    ocr_res = cn_ocr.ocr_for_single_line(cropped_img)
                    text.append(ocr_res['text'])

                st.subheader("Extracted text:")
                for line in text:
                    st.write(line)

            st.success('Text extraction complete')
相关推荐
肥仔哥哥193014 分钟前
基于OpenCv做照片分析(Java)
java·人工智能·opencv·图像原理
2501_924879261 小时前
客流特征识别误报率↓76%!陌讯多模态时序融合算法在智慧零售的实战解析
大数据·人工智能·算法·目标检测·计算机视觉·视觉检测·零售
说私域1 小时前
消费、渠道与技术变革下新零售的崛起与开源AI大模型AI智能名片S2B2C商城小程序的融合发展
人工智能·开源·零售
北京地铁1号线1 小时前
广告推荐模型2:因子分解机(Factorization Machines, FM)
人工智能·算法·推荐算法
做一个快乐的小傻瓜1 小时前
机器学习笔记
人工智能·决策树·机器学习
居然JuRan1 小时前
MCP:基础概念、快速应用和背后原理
人工智能
1ucency1 小时前
Dify插件“Database”安装及配置
人工智能
eqwaak02 小时前
科技信息差(8.26)
大数据·开发语言·人工智能·编辑器
念夏沫2 小时前
“华生科技杯”2025年全国青少年龙舟锦标赛在海宁举行
大数据·人工智能·科技
2202_756749692 小时前
自然处理语言NLP: 基于双分支 LSTM 的酒店评论情感分析模型构建与实现
人工智能·自然语言处理·lstm