基于Python实现水果新鲜度分类

python 复制代码
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import random

sns.set(style="whitegrid")
python 复制代码
#DATASET PATH

DATASET_PATH = "/Fruit Freshness Dataset"
python 复制代码
#DETECT CLASSES

class_paths = []
class_labels = []

for fruit in os.listdir(DATASET_PATH):
    fruit_path = os.path.join(DATASET_PATH, fruit)
    if os.path.isdir(fruit_path):
        for status in os.listdir(fruit_path):
            status_path = os.path.join(fruit_path, status)
            if os.path.isdir(status_path):
                class_paths.append(status_path)
                class_labels.append(f"{fruit}_{status}")

print("Detected Classes:")
for label in class_labels:
    print(label)
复制代码
Detected Classes:
Apple_Fresh
Apple_Rotten
Strawberry_Fresh
Strawberry_Rotten
Banana_Fresh
Banana_Rotten
python 复制代码
#CLASS DISTRIBUTION

class_counts = {label: len(os.listdir(path)) for label, path in zip(class_labels, class_paths)}

plt.figure(figsize=(10,5))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1765: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
  order = pd.unique(vector)
python 复制代码
#SHOW RANDOM SAMPLE IMAGES

plt.figure(figsize=(12,8))
i = 1
for path, label in zip(class_paths, class_labels):
    img_name = random.choice(os.listdir(path))
    img = Image.open(os.path.join(path, img_name))
    plt.subplot(2, 3, i)
    plt.imshow(img)
    plt.title(label)
    plt.axis("off")
    i += 1
plt.tight_layout()
plt.show()
python 复制代码
#IMAGE SIZE ANALYSIS

widths, heights = [], []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:  # sample to speed up
        img = Image.open(os.path.join(path, img_name))
        w, h = img.size
        widths.append(w)
        heights.append(h)

plt.figure(figsize=(10,4))
sns.histplot(widths, bins=20, kde=True)
plt.title("Image Width Distribution")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(heights, bins=20, kde=True)
plt.title("Image Height Distribution")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#BRIGHTNESS DISTRIBUTION

brightness_values = []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:
        img = Image.open(os.path.join(path, img_name))
        gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        brightness_values.append(np.mean(gray))

plt.figure(figsize=(10,4))
sns.histplot(brightness_values, bins=30, kde=True, color="orange")
plt.title("Brightness Distribution")
plt.xlabel("Brightness")
plt.ylabel("Frequency")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#COLOR HISTOGRAM (RGB) OF SAMPLE IMAGE

sample_path = random.choice(class_paths)
sample_img = random.choice(os.listdir(sample_path))
img = cv2.imread(os.path.join(sample_path, sample_img))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10,5))
colors = ("r", "g", "b")
for i, color in enumerate(colors):
    hist = img_rgb[:,:,i].flatten()
    sns.histplot(hist, bins=50, color=color, label=color, alpha=0.5)
plt.title(f"RGB Histogram ({sample_path.split('/')[-2]}_{sample_path.split('/')[-1]})")
plt.legend()
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#PCA 2D VISUALIZATION

X = []
y = []

for path, label in zip(class_paths, class_labels):
    for img_name in os.listdir(path)[:40]:  # limit to speed up
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # force RGB
        img = img.resize((64,64))
        X.append(np.array(img).flatten())
        y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)
print("Number of labels:", len(y))

# Apply PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plot PCA result
plt.figure(figsize=(10,7))
for label in set(y):
    idx = np.where(y == label)
    plt.scatter(X_pca[idx,0], X_pca[idx,1], label=label, alpha=0.7)
plt.title("PCA 2D Visualization of Fruit Freshness Dataset")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend()
plt.show()
复制代码
Shape of X: (233, 12288)
Number of labels: 233
python 复制代码
plt.figure(figsize=(12,8))

for i, (path, label) in enumerate(zip(class_paths, class_labels)):
    imgs = []

    # Limit number of images to reduce blur
    sample_imgs = random.sample(os.listdir(path), min(len(os.listdir(path)), 30))

    for img_name in sample_imgs:
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # Force RGB
        img = img.resize((128,128))                # Uniform size
        imgs.append(np.array(img))

    # Stack and compute average
    avg_img = np.mean(np.stack(imgs, axis=0), axis=0).astype("uint8")

    # Plot
    plt.subplot(2,3,i+1)
    plt.imshow(avg_img)
    plt.title(label)
    plt.axis("off")

plt.tight_layout()
plt.show()
相关推荐
devilnumber26 分钟前
Java 递归算法 详解 + 核心要点 + 实战运用 + 避坑指南
java·开发语言·算法
大貔貅喝啤酒1 小时前
Python Requests库教程
自动化测试·python·requests库
copyer_xyf2 小时前
LangChain 调用 LLM
后端·python·agent
copyer_xyf2 小时前
Prompt 组织管理
后端·python·agent
asdfg12589632 小时前
JavaBean是什么?怎么理解?有什么用途?
java·开发语言
dsyyyyy11012 小时前
JavaScript变量
开发语言·javascript·ecmascript
shimly1234562 小时前
python3 uvicorn 是啥?
python
z落落3 小时前
C#WinForm 窗体切换与窗体传值(登录跳转案例)+WinForm 窗体传值(从上往下传、从下往上传)
开发语言·windows·c#
CTA量化套保3 小时前
期货量化程序 time.sleep 卡死:天勤单线程与 deadline 替代
python·区块链
allway23 小时前
How to Echo Multiline to a File in Bash [3 Methods]
开发语言·chrome·bash