基于Python实现水果新鲜度分类

python 复制代码
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import random

sns.set(style="whitegrid")
python 复制代码
#DATASET PATH

DATASET_PATH = "/Fruit Freshness Dataset"
python 复制代码
#DETECT CLASSES

class_paths = []
class_labels = []

for fruit in os.listdir(DATASET_PATH):
    fruit_path = os.path.join(DATASET_PATH, fruit)
    if os.path.isdir(fruit_path):
        for status in os.listdir(fruit_path):
            status_path = os.path.join(fruit_path, status)
            if os.path.isdir(status_path):
                class_paths.append(status_path)
                class_labels.append(f"{fruit}_{status}")

print("Detected Classes:")
for label in class_labels:
    print(label)
复制代码
Detected Classes:
Apple_Fresh
Apple_Rotten
Strawberry_Fresh
Strawberry_Rotten
Banana_Fresh
Banana_Rotten
python 复制代码
#CLASS DISTRIBUTION

class_counts = {label: len(os.listdir(path)) for label, path in zip(class_labels, class_paths)}

plt.figure(figsize=(10,5))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1765: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
  order = pd.unique(vector)
python 复制代码
#SHOW RANDOM SAMPLE IMAGES

plt.figure(figsize=(12,8))
i = 1
for path, label in zip(class_paths, class_labels):
    img_name = random.choice(os.listdir(path))
    img = Image.open(os.path.join(path, img_name))
    plt.subplot(2, 3, i)
    plt.imshow(img)
    plt.title(label)
    plt.axis("off")
    i += 1
plt.tight_layout()
plt.show()
python 复制代码
#IMAGE SIZE ANALYSIS

widths, heights = [], []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:  # sample to speed up
        img = Image.open(os.path.join(path, img_name))
        w, h = img.size
        widths.append(w)
        heights.append(h)

plt.figure(figsize=(10,4))
sns.histplot(widths, bins=20, kde=True)
plt.title("Image Width Distribution")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(heights, bins=20, kde=True)
plt.title("Image Height Distribution")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#BRIGHTNESS DISTRIBUTION

brightness_values = []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:
        img = Image.open(os.path.join(path, img_name))
        gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        brightness_values.append(np.mean(gray))

plt.figure(figsize=(10,4))
sns.histplot(brightness_values, bins=30, kde=True, color="orange")
plt.title("Brightness Distribution")
plt.xlabel("Brightness")
plt.ylabel("Frequency")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#COLOR HISTOGRAM (RGB) OF SAMPLE IMAGE

sample_path = random.choice(class_paths)
sample_img = random.choice(os.listdir(sample_path))
img = cv2.imread(os.path.join(sample_path, sample_img))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10,5))
colors = ("r", "g", "b")
for i, color in enumerate(colors):
    hist = img_rgb[:,:,i].flatten()
    sns.histplot(hist, bins=50, color=color, label=color, alpha=0.5)
plt.title(f"RGB Histogram ({sample_path.split('/')[-2]}_{sample_path.split('/')[-1]})")
plt.legend()
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#PCA 2D VISUALIZATION

X = []
y = []

for path, label in zip(class_paths, class_labels):
    for img_name in os.listdir(path)[:40]:  # limit to speed up
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # force RGB
        img = img.resize((64,64))
        X.append(np.array(img).flatten())
        y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)
print("Number of labels:", len(y))

# Apply PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plot PCA result
plt.figure(figsize=(10,7))
for label in set(y):
    idx = np.where(y == label)
    plt.scatter(X_pca[idx,0], X_pca[idx,1], label=label, alpha=0.7)
plt.title("PCA 2D Visualization of Fruit Freshness Dataset")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend()
plt.show()
复制代码
Shape of X: (233, 12288)
Number of labels: 233
python 复制代码
plt.figure(figsize=(12,8))

for i, (path, label) in enumerate(zip(class_paths, class_labels)):
    imgs = []

    # Limit number of images to reduce blur
    sample_imgs = random.sample(os.listdir(path), min(len(os.listdir(path)), 30))

    for img_name in sample_imgs:
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # Force RGB
        img = img.resize((128,128))                # Uniform size
        imgs.append(np.array(img))

    # Stack and compute average
    avg_img = np.mean(np.stack(imgs, axis=0), axis=0).astype("uint8")

    # Plot
    plt.subplot(2,3,i+1)
    plt.imshow(avg_img)
    plt.title(label)
    plt.axis("off")

plt.tight_layout()
plt.show()
相关推荐
Volunteer Technology2 小时前
文本数据分析(二)
python·数据挖掘·数据分析
组合缺一2 小时前
Claude Code Agent Skills vs. Solon AI Skills:从工具增强到框架规范的深度对齐
java·人工智能·python·开源·solon·skills
学海无涯书山有路2 小时前
Android ViewBinding 新手详解(Java 版)—— 结合 ViewModel+LiveData 实战
android·java·开发语言
辰阳星宇2 小时前
python代码修复字符串json数据格式问题,并将其按照字典形式读取
windows·python·json
jaysee-sjc2 小时前
【练习十】Java 面向对象实战:智能家居控制系统
java·开发语言·算法·智能家居
零基础的修炼2 小时前
算法---常见位运算总结
java·开发语言·前端
wgslucky2 小时前
sm2 js加密,java服务器端解密
java·开发语言·javascript
我是大咖2 小时前
C 语言笔记: const 指针 + 堆内存申请
c语言·开发语言
海天一色y2 小时前
基于Inception-V3实现CIFAR-100数据集的分类任务
人工智能·分类·数据挖掘
dyyx1112 小时前
C++编译期数据结构
开发语言·c++·算法