使用CNN模型训练图片识别(键盘,椅子,眼镜,水杯,鼠标)

首先是环境:

我是在Anaconda3中的Jupyter Notebook (tensorflow)中进行训练,环境各位自行安装

数据集:

本次数据集五个类型(键盘,椅子,眼镜,水杯,鼠标)我收集了每个接近两千张的图片共11091张

这个可以不用这么多因为cnn模型训练也用不上这么多的图片,可以自行减少,这个是我这边的要求,所以我索性就直接训练了。

预测结果如下:

代码如下:

相关库:

python 复制代码
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models, layers

图片数据处理:

python 复制代码
# 首先导入必要的库并设置PIL的限制
import os
import numpy as np
from PIL import Image
Image.MAX_IMAGE_PIXELS = None  # 解除PIL的图片大小限制
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models, layers
import warnings
warnings.filterwarnings('ignore')  # 忽略警告信息

def load_and_preprocess_data(base_path, img_size=(300, 300), batch_size=32):
    # 初始化列表存储图片路径和标签
    image_paths = []
    labels = []
    
    # 类别映射
    class_mapping = {
        'jp': 0,  # 键盘
        'yz': 1,  # 椅子
        'yj': 2,  # 眼镜
        'bz': 3,  # 水杯
        'sb': 4   # 鼠标
    }
    
    # 首先检查并收集有效的图片路径和标签
    print("正在检查图片文件...")
    for filename in os.listdir(base_path):
        if filename.endswith(('.jpg', '.png')):
            prefix = filename[:2]
            if prefix in class_mapping:
                try:
                    img_path = os.path.join(base_path, filename)
                    # 尝试打开图片验证其有效性
                    with Image.open(img_path) as img:
                        image_paths.append(img_path)
                        labels.append(class_mapping[prefix])
                        print(f"成功验证图片: {filename}")
                except Exception as e:
                    print(f"跳过无效图片 {filename}: {str(e)}")
                    continue
    
    if not image_paths:
        raise ValueError("没有找到有效的图片文件!")
    
    # 转换标签为numpy数组
    labels = np.array(labels)
    
    # 创建数据生成器
    class ImageDataGenerator:
        def __init__(self, image_paths, labels, img_size, batch_size):
            self.image_paths = image_paths
            self.labels = labels
            self.img_size = img_size
            self.batch_size = batch_size
            self.n = len(image_paths)
            self.indexes = np.arange(self.n)
            np.random.shuffle(self.indexes)
            self.i = 0
            
        def __len__(self):
            return (self.n + self.batch_size - 1) // self.batch_size
        
        def __iter__(self):
            return self
        
        def __next__(self):
            if self.i >= self.n:
                self.i = 0
                np.random.shuffle(self.indexes)
                raise StopIteration
            
            batch_indexes = self.indexes[self.i:min(self.i + self.batch_size, self.n)]
            batch_paths = [self.image_paths[i] for i in batch_indexes]
            batch_labels = self.labels[batch_indexes]
            
            batch_images = []
            valid_labels = []
            
            for path, label in zip(batch_paths, batch_labels):
                try:
                    with Image.open(path) as img:
                        # 转换为RGB模式
                        if img.mode != 'RGB':
                            img = img.convert('RGB')
                        
                        # 调整图片大小
                        if img.size[0] > 1000 or img.size[1] > 1000:
                            img.thumbnail((1000, 1000), Image.Resampling.LANCZOS)
                        img = img.resize(self.img_size, Image.Resampling.LANCZOS)
                        
                        # 转换为numpy数组
                        img_array = np.array(img, dtype=np.float32) / 255.0
                        batch_images.append(img_array)
                        valid_labels.append(label)
                except Exception as e:
                    print(f"处理图片 {path} 时出错: {str(e)}")
                    continue
            
            self.i += self.batch_size
            
            if not batch_images:  # 如果这个批次没有有效图片
                return self.__next__()  # 尝试下一个批次
                
            return np.array(batch_images), np.array(valid_labels)
    
    # 打印数据集信息
    print(f"\n总共找到 {len(image_paths)} 张有效图片")
    for label in set(labels):
        count = np.sum(labels == label)
        print(f"类别 {label}: {count} 张图片")
    
    # 划分训练集和测试集的索引
    n_samples = len(image_paths)
    n_train = int(0.8 * n_samples)
    indices = np.random.permutation(n_samples)
    train_idx, test_idx = indices[:n_train], indices[n_train:]
    
    # 创建训练集和测试集的生成器
    train_generator = ImageDataGenerator(
        [image_paths[i] for i in train_idx],
        labels[train_idx],
        img_size,
        batch_size
    )
    
    test_generator = ImageDataGenerator(
        [image_paths[i] for i in test_idx],
        labels[test_idx],
        img_size,
        batch_size
    )
    
    return train_generator, test_generator

# 修改训练函数中的训练循环
def train_and_save_model(model, train_generator, test_generator, model_name, epochs=10):
    # 编译模型
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # 训练模型
    history = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': []}
    
    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1}/{epochs}')
        
        # 训练阶段
        train_loss = []
        train_acc = []
        for i, (batch_images, batch_labels) in enumerate(train_generator):
            if len(batch_images) == 0:
                continue
            metrics = model.train_on_batch(batch_images, batch_labels)
            train_loss.append(metrics[0])
            train_acc.append(metrics[1])
            print(f'\rBatch {i+1} - loss: {metrics[0]:.4f} - accuracy: {metrics[1]:.4f}', end='')
        
        # 验证阶段
        val_loss = []
        val_acc = []
        for batch_images, batch_labels in test_generator:
            if len(batch_images) == 0:
                continue
            metrics = model.test_on_batch(batch_images, batch_labels)
            val_loss.append(metrics[0])
            val_acc.append(metrics[1])
        
        # 记录历史
        epoch_train_loss = np.mean(train_loss) if train_loss else 0
        epoch_train_acc = np.mean(train_acc) if train_acc else 0
        epoch_val_loss = np.mean(val_loss) if val_loss else 0
        epoch_val_acc = np.mean(val_acc) if val_acc else 0
        
        history['accuracy'].append(epoch_train_acc)
        history['val_accuracy'].append(epoch_val_acc)
        history['loss'].append(epoch_train_loss)
        history['val_loss'].append(epoch_val_loss)
        
        print(f'\nEpoch {epoch+1} - loss: {epoch_train_loss:.4f} - accuracy: {epoch_train_acc:.4f} - '
              f'val_loss: {epoch_val_loss:.4f} - val_accuracy: {epoch_val_acc:.4f}')
    
    # 绘制训练历史
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='训练准确率')
    plt.plot(history['val_accuracy'], label='验证准确率')
    plt.title('模型准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='训练损失')
    plt.plot(history['val_loss'], label='验证损失')
    plt.title('模型损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失')
    plt.legend()
    
    plt.savefig(f'{model_name}_training_history.png')
    
    # 保存模型
    model.save(f'{model_name}.h5')

模型训练:

python 复制代码
#这里的epochs的数值为2,代表训练2次,各位可以自行更改
def train_and_save_model(model, train_generator, test_generator, model_name, epochs=2):
    # 编译模型
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # 训练模型
    history = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': []}
    
    for epoch in range(epochs):
        print(f'\nEpoch {epoch+1}/{epochs}')
        
        # 训练阶段
        train_loss = []
        train_acc = []
        for batch_images, batch_labels in train_generator:
            metrics = model.train_on_batch(batch_images, batch_labels)
            train_loss.append(metrics[0])
            train_acc.append(metrics[1])
        
        # 验证阶段
        val_loss = []
        val_acc = []
        for batch_images, batch_labels in test_generator:
            metrics = model.test_on_batch(batch_images, batch_labels)
            val_loss.append(metrics[0])
            val_acc.append(metrics[1])
        
        # 记录历史
        history['accuracy'].append(np.mean(train_acc))
        history['val_accuracy'].append(np.mean(val_acc))
        history['loss'].append(np.mean(train_loss))
        history['val_loss'].append(np.mean(val_loss))
        
        print(f'loss: {np.mean(train_loss):.4f} - accuracy: {np.mean(train_acc):.4f} - '
              f'val_loss: {np.mean(val_loss):.4f} - val_accuracy: {np.mean(val_acc):.4f}')
    
    # 绘制训练历史
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='训练准确率')
    plt.plot(history['val_accuracy'], label='验证准确率')
    plt.title('模型准确率')
    plt.xlabel('Epoch')
    plt.ylabel('准确率')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='训练损失')
    plt.plot(history['val_loss'], label='验证损失')
    plt.title('模型损失')
    plt.xlabel('Epoch')
    plt.ylabel('损失')
    plt.legend()
    
    plt.savefig(f'{model_name}_training_history.png')
    
    # 保存模型
    model.save(f'{model_name}.h5')

主程序:

python 复制代码
# 设置数据集路径
base_path = 'E:/modol'

try:
    # 检查路径是否存在
    if not os.path.exists(base_path):
        raise FileNotFoundError(f"找不到指定路径:{base_path}")
    
    # 数据预处理
    print("正在加载和预处理数据...")
    train_generator, test_generator = load_and_preprocess_data(base_path, batch_size=32)
    
    # 训练CNN模型
    print("\n正在训练CNN模型...")
    cnn_model = create_cnn_model()
    train_and_save_model(cnn_model, train_generator, test_generator, 'cnn_model')
    
    
    print("\n模型训练完成!可以运行GUI程序进行测试。")
    
except Exception as e:
    print(f"\n程序出错:{str(e)}")

然后是GUI界面:

python 复制代码
#单cnn模型gui界面
import tkinter as tk
from tkinter import filedialog, ttk
from PIL import Image, ImageTk
import numpy as np
import tensorflow as tf

class ObjectClassifierGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("物体分类器")
        self.root.geometry("800x600")  # 设置窗口大小
        
        # 加载模型
        self.model = tf.keras.models.load_model('cnn_model.h5')
        
        # 类别标签
        self.classes = ['键盘', '椅子', '眼镜', '水杯', '鼠标']
        
        # 创建GUI组件
        self.create_widgets()
    
    def create_widgets(self):
        # 创建主框架
        main_frame = ttk.Frame(self.root, padding="10")
        main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        # 创建按钮框架
        button_frame = ttk.Frame(main_frame)
        button_frame.grid(row=0, column=0, columnspan=2, pady=10)
        
        # 选择图片按钮
        self.select_btn = ttk.Button(
            button_frame, 
            text="选择图片", 
            command=self.select_image
        )
        self.select_btn.pack(side=tk.LEFT, padx=5)
        
        # 清除图片按钮
        self.clear_btn = ttk.Button(
            button_frame,
            text="清除图片",
            command=self.clear_image,
            state='disabled'  # 初始状态为禁用
        )
        self.clear_btn.pack(side=tk.LEFT, padx=5)
        
        # 创建左右分栏
        left_frame = ttk.Frame(main_frame)
        left_frame.grid(row=1, column=0, padx=10)
        
        right_frame = ttk.Frame(main_frame)
        right_frame.grid(row=1, column=1, padx=10)
        
        # 图片显示区域(左侧)
        ttk.Label(left_frame, text="选择的图片:").pack(pady=5)
        self.image_label = ttk.Label(left_frame)
        self.image_label.pack(pady=5)
        
        # 预测结果显示(右侧)
        ttk.Label(right_frame, text="预测结果:").pack(pady=5)
        self.result_frame = ttk.Frame(right_frame)
        self.result_frame.pack(pady=5)
        
        # 预测结果详细信息
        self.pred_class_label = ttk.Label(
            self.result_frame,
            text="预测类别:-",
            font=('Arial', 12)
        )
        self.pred_class_label.pack(pady=5)
        
        self.confidence_label = ttk.Label(
            self.result_frame,
            text="置信度:-",
            font=('Arial', 12)
        )
        self.confidence_label.pack(pady=5)
        
        # 所有类别的概率分布
        self.prob_frame = ttk.Frame(self.result_frame)
        self.prob_frame.pack(pady=10)
        self.prob_bars = []
        for i in range(len(self.classes)):
            ttk.Label(self.prob_frame, text=f"{self.classes[i]}:").grid(row=i, column=0, padx=5)
            prob_bar = ttk.Progressbar(self.prob_frame, length=200, mode='determinate')
            prob_bar.grid(row=i, column=1, padx=5)
            prob_value = ttk.Label(self.prob_frame, text="0%")
            prob_value.grid(row=i, column=2, padx=5)
            self.prob_bars.append((prob_bar, prob_value))
    
    def select_image(self):
        # 打开文件选择对话框
        file_path = filedialog.askopenfilename()
        if file_path:
            # 处理并显示图片
            image = Image.open(file_path)
            # 保持原始宽高比例缩放图片用于显示
            display_size = (300, 300)
            image.thumbnail(display_size, Image.Resampling.LANCZOS)
            
            # 显示图片
            photo = ImageTk.PhotoImage(image)
            self.image_label.configure(image=photo)
            self.image_label.image = photo
            
            # 预处理图片用于预测
            image_for_pred = image.resize((300, 300))
            img_array = np.array(image_for_pred) / 255.0
            self.current_image_array = np.expand_dims(img_array, axis=0)
            
            # 进行预测
            self.predict_image(self.current_image_array)
            
            # 启用清除按钮
            self.clear_btn['state'] = 'normal'
    
    def predict_image(self, img_array):
        # 使用模型预测
        predictions = self.model.predict(img_array)
        
        # 获取预测结果
        pred_class_idx = np.argmax(predictions[0])
        pred_class = self.classes[pred_class_idx]
        confidence = predictions[0][pred_class_idx] * 100
        
        # 更新预测类别和置信度
        self.pred_class_label.config(text=f"预测类别:{pred_class}")
        self.confidence_label.config(text=f"置信度:{confidence:.2f}%")
        
        # 更新所有类别的概率条
        for i, ((bar, value_label), prob) in enumerate(zip(self.prob_bars, predictions[0])):
            percentage = prob * 100
            bar['value'] = percentage
            value_label.config(text=f"{percentage:.1f}%")
    
    def clear_image(self):
        # 清除图片显示
        self.image_label.configure(image='')
        self.image_label.image = None
        
        # 重置预测结果
        self.pred_class_label.config(text="预测类别:-")
        self.confidence_label.config(text="置信度:-")
        
        # 重置概率条
        for bar, value_label in self.prob_bars:
            bar['value'] = 0
            value_label.config(text="0%")
        
        # 禁用清除按钮
        self.clear_btn['state'] = 'disabled'
        
        # 清除存储的图像数组
        if hasattr(self, 'current_image_array'):
            del self.current_image_array

# 主程序
if __name__ == "__main__":
    root = tk.Tk()
    app = ObjectClassifierGUI(root)
    root.mainloop()

另外是一个调取摄像头实时识别的页面,但是这个精度不是很高可能是摄像头的画面太杂了就不分享了。

相关推荐
神一样的老师1 小时前
TinyML在OBD-II边缘设备上燃油类型分类的实现与优化
人工智能
hunteritself1 小时前
OpenAI直播发布第11天:ChatGPT桌面客户端升级,就这?
人工智能·gpt·chatgpt·语音识别·claude
MarkHD2 小时前
第二十四天 循环神经网络(RNN)基本原理与实现
人工智能·rnn·深度学习
爱研究的小牛2 小时前
DeepFaceLab技术浅析(六):后处理过程
人工智能·深度学习·机器学习·aigc
AI_NEW_COME2 小时前
高新技术企业知识管理新篇章:高效内部知识库建设
人工智能
orion-orion2 小时前
概率论沉思录:初等假设检验
人工智能·概率论·科学哲学
人工智能研究所3 小时前
MaskGCT——开源文本转语音模型,可模仿任何人说话声音
人工智能·文本转语音·文本转音频
宸码3 小时前
【项目实战】ISIC 数据集上的实验揭秘:UNet + SENet、Spatial Attention 和 CBAM 的最终表现
人工智能·python·深度学习·神经网络·机器学习·计算机视觉
老板多放点香菜4 小时前
AI、大数据、机器学习、深度学习、神经网络之间的关系
大数据·人工智能·深度学习·神经网络·机器学习
volcanical4 小时前
MoCo 对比自监督学习
人工智能·学习·机器学习