基于Python实现水果新鲜度分类

python 复制代码
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import random

sns.set(style="whitegrid")
python 复制代码
#DATASET PATH

DATASET_PATH = "/Fruit Freshness Dataset"
python 复制代码
#DETECT CLASSES

class_paths = []
class_labels = []

for fruit in os.listdir(DATASET_PATH):
    fruit_path = os.path.join(DATASET_PATH, fruit)
    if os.path.isdir(fruit_path):
        for status in os.listdir(fruit_path):
            status_path = os.path.join(fruit_path, status)
            if os.path.isdir(status_path):
                class_paths.append(status_path)
                class_labels.append(f"{fruit}_{status}")

print("Detected Classes:")
for label in class_labels:
    print(label)
复制代码
Detected Classes:
Apple_Fresh
Apple_Rotten
Strawberry_Fresh
Strawberry_Rotten
Banana_Fresh
Banana_Rotten
python 复制代码
#CLASS DISTRIBUTION

class_counts = {label: len(os.listdir(path)) for label, path in zip(class_labels, class_paths)}

plt.figure(figsize=(10,5))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1765: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
  order = pd.unique(vector)
python 复制代码
#SHOW RANDOM SAMPLE IMAGES

plt.figure(figsize=(12,8))
i = 1
for path, label in zip(class_paths, class_labels):
    img_name = random.choice(os.listdir(path))
    img = Image.open(os.path.join(path, img_name))
    plt.subplot(2, 3, i)
    plt.imshow(img)
    plt.title(label)
    plt.axis("off")
    i += 1
plt.tight_layout()
plt.show()
python 复制代码
#IMAGE SIZE ANALYSIS

widths, heights = [], []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:  # sample to speed up
        img = Image.open(os.path.join(path, img_name))
        w, h = img.size
        widths.append(w)
        heights.append(h)

plt.figure(figsize=(10,4))
sns.histplot(widths, bins=20, kde=True)
plt.title("Image Width Distribution")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(heights, bins=20, kde=True)
plt.title("Image Height Distribution")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#BRIGHTNESS DISTRIBUTION

brightness_values = []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:
        img = Image.open(os.path.join(path, img_name))
        gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        brightness_values.append(np.mean(gray))

plt.figure(figsize=(10,4))
sns.histplot(brightness_values, bins=30, kde=True, color="orange")
plt.title("Brightness Distribution")
plt.xlabel("Brightness")
plt.ylabel("Frequency")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#COLOR HISTOGRAM (RGB) OF SAMPLE IMAGE

sample_path = random.choice(class_paths)
sample_img = random.choice(os.listdir(sample_path))
img = cv2.imread(os.path.join(sample_path, sample_img))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10,5))
colors = ("r", "g", "b")
for i, color in enumerate(colors):
    hist = img_rgb[:,:,i].flatten()
    sns.histplot(hist, bins=50, color=color, label=color, alpha=0.5)
plt.title(f"RGB Histogram ({sample_path.split('/')[-2]}_{sample_path.split('/')[-1]})")
plt.legend()
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#PCA 2D VISUALIZATION

X = []
y = []

for path, label in zip(class_paths, class_labels):
    for img_name in os.listdir(path)[:40]:  # limit to speed up
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # force RGB
        img = img.resize((64,64))
        X.append(np.array(img).flatten())
        y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)
print("Number of labels:", len(y))

# Apply PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plot PCA result
plt.figure(figsize=(10,7))
for label in set(y):
    idx = np.where(y == label)
    plt.scatter(X_pca[idx,0], X_pca[idx,1], label=label, alpha=0.7)
plt.title("PCA 2D Visualization of Fruit Freshness Dataset")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend()
plt.show()
复制代码
Shape of X: (233, 12288)
Number of labels: 233
python 复制代码
plt.figure(figsize=(12,8))

for i, (path, label) in enumerate(zip(class_paths, class_labels)):
    imgs = []

    # Limit number of images to reduce blur
    sample_imgs = random.sample(os.listdir(path), min(len(os.listdir(path)), 30))

    for img_name in sample_imgs:
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # Force RGB
        img = img.resize((128,128))                # Uniform size
        imgs.append(np.array(img))

    # Stack and compute average
    avg_img = np.mean(np.stack(imgs, axis=0), axis=0).astype("uint8")

    # Plot
    plt.subplot(2,3,i+1)
    plt.imshow(avg_img)
    plt.title(label)
    plt.axis("off")

plt.tight_layout()
plt.show()
相关推荐
小白|1 分钟前
tensorflow:昇腾CANN的TensorFlow适配层
人工智能·python·tensorflow
Matlab程序猿小助手10 分钟前
【MATLAB源码-第319期】基于matlab的帝王蝶优化算法(MBO)无人机三维路径规划,输出做短路径图和适应度曲线.
开发语言·算法·matlab
码点滴13 分钟前
CRI-O选型与容器运行时标准
开发语言·人工智能·架构·kubernetes·cri-o
回眸&啤酒鸭14 分钟前
【回眸】嵌入式软件单元测试工具链实战指南
开发语言·单元测试·白盒测试
彦为君16 分钟前
JavaSE-10-并发编程(11个案例)
java·开发语言·python·ai·nio
石山代码17 分钟前
java前景
java·开发语言
10岁的博客19 分钟前
C++ 进制转换:通用 a 进制转 b 进制(2-36进制)题解
开发语言·c++
图码27 分钟前
二分查找进阶:如何在有序数组中快速找到Upper Bound?
数据结构·算法·面试·分类·柔性数组
Cthy_hy28 分钟前
树状数组(BIT)进阶:差分优化实现区间修改、区间查询
数据结构·python·算法
码界筑梦坊32 分钟前
133-基于Python的全球城市生活成本数据可视化分析系统
开发语言·python·信息可视化·django·毕业设计·生活