基于Python实现水果新鲜度分类

python 复制代码
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import random

sns.set(style="whitegrid")
python 复制代码
#DATASET PATH

DATASET_PATH = "/Fruit Freshness Dataset"
python 复制代码
#DETECT CLASSES

class_paths = []
class_labels = []

for fruit in os.listdir(DATASET_PATH):
    fruit_path = os.path.join(DATASET_PATH, fruit)
    if os.path.isdir(fruit_path):
        for status in os.listdir(fruit_path):
            status_path = os.path.join(fruit_path, status)
            if os.path.isdir(status_path):
                class_paths.append(status_path)
                class_labels.append(f"{fruit}_{status}")

print("Detected Classes:")
for label in class_labels:
    print(label)
复制代码
Detected Classes:
Apple_Fresh
Apple_Rotten
Strawberry_Fresh
Strawberry_Rotten
Banana_Fresh
Banana_Rotten
python 复制代码
#CLASS DISTRIBUTION

class_counts = {label: len(os.listdir(path)) for label, path in zip(class_labels, class_paths)}

plt.figure(figsize=(10,5))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()), palette="viridis")
plt.title("Number of Images per Class")
plt.xticks(rotation=45)
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1765: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
  order = pd.unique(vector)
python 复制代码
#SHOW RANDOM SAMPLE IMAGES

plt.figure(figsize=(12,8))
i = 1
for path, label in zip(class_paths, class_labels):
    img_name = random.choice(os.listdir(path))
    img = Image.open(os.path.join(path, img_name))
    plt.subplot(2, 3, i)
    plt.imshow(img)
    plt.title(label)
    plt.axis("off")
    i += 1
plt.tight_layout()
plt.show()
python 复制代码
#IMAGE SIZE ANALYSIS

widths, heights = [], []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:  # sample to speed up
        img = Image.open(os.path.join(path, img_name))
        w, h = img.size
        widths.append(w)
        heights.append(h)

plt.figure(figsize=(10,4))
sns.histplot(widths, bins=20, kde=True)
plt.title("Image Width Distribution")
plt.show()

plt.figure(figsize=(10,4))
sns.histplot(heights, bins=20, kde=True)
plt.title("Image Height Distribution")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#BRIGHTNESS DISTRIBUTION

brightness_values = []

for path in class_paths:
    for img_name in os.listdir(path)[:50]:
        img = Image.open(os.path.join(path, img_name))
        gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        brightness_values.append(np.mean(gray))

plt.figure(figsize=(10,4))
sns.histplot(brightness_values, bins=30, kde=True, color="orange")
plt.title("Brightness Distribution")
plt.xlabel("Brightness")
plt.ylabel("Frequency")
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#COLOR HISTOGRAM (RGB) OF SAMPLE IMAGE

sample_path = random.choice(class_paths)
sample_img = random.choice(os.listdir(sample_path))
img = cv2.imread(os.path.join(sample_path, sample_img))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(10,5))
colors = ("r", "g", "b")
for i, color in enumerate(colors):
    hist = img_rgb[:,:,i].flatten()
    sns.histplot(hist, bins=50, color=color, label=color, alpha=0.5)
plt.title(f"RGB Histogram ({sample_path.split('/')[-2]}_{sample_path.split('/')[-1]})")
plt.legend()
plt.show()
复制代码
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
/usr/local/lib/python3.11/dist-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
  with pd.option_context('mode.use_inf_as_na', True):
python 复制代码
#PCA 2D VISUALIZATION

X = []
y = []

for path, label in zip(class_paths, class_labels):
    for img_name in os.listdir(path)[:40]:  # limit to speed up
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # force RGB
        img = img.resize((64,64))
        X.append(np.array(img).flatten())
        y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

print("Shape of X:", X.shape)
print("Number of labels:", len(y))

# Apply PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Plot PCA result
plt.figure(figsize=(10,7))
for label in set(y):
    idx = np.where(y == label)
    plt.scatter(X_pca[idx,0], X_pca[idx,1], label=label, alpha=0.7)
plt.title("PCA 2D Visualization of Fruit Freshness Dataset")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend()
plt.show()
复制代码
Shape of X: (233, 12288)
Number of labels: 233
python 复制代码
plt.figure(figsize=(12,8))

for i, (path, label) in enumerate(zip(class_paths, class_labels)):
    imgs = []

    # Limit number of images to reduce blur
    sample_imgs = random.sample(os.listdir(path), min(len(os.listdir(path)), 30))

    for img_name in sample_imgs:
        img_path = os.path.join(path, img_name)
        img = Image.open(img_path).convert('RGB')  # Force RGB
        img = img.resize((128,128))                # Uniform size
        imgs.append(np.array(img))

    # Stack and compute average
    avg_img = np.mean(np.stack(imgs, axis=0), axis=0).astype("uint8")

    # Plot
    plt.subplot(2,3,i+1)
    plt.imshow(avg_img)
    plt.title(label)
    plt.axis("off")

plt.tight_layout()
plt.show()
相关推荐
咸鱼2.01 小时前
【java入门到放弃】Dubbo
java·开发语言·dubbo
做怪小疯子7 小时前
华为笔试0429
python·numpy
Warson_L8 小时前
Dictionary
python
JAVA面经实录9178 小时前
Java企业级工程化·终极完整版背诵手册(无遗漏、全覆盖、面试+落地通用)
java·开发语言·面试
周杰伦fans9 小时前
AutoCAD .NET 二次开发:深入理解 EntityJig 的工作原理与正确实现
开发语言·.net
寒山李白10 小时前
解决 python-docx 生成的 Word 文档打开时弹出“无法读取内容“警告
python·word·wps·文档·docx·qoder
2401_8323655210 小时前
JavaScript中rest参数(...args)取代arguments的优势
jvm·数据库·python
Sirius.z10 小时前
第J3周:DenseNet121算法详解
python
Bat U10 小时前
JavaEE|多线程初阶(七)
java·开发语言
谭欣辰11 小时前
C++ 排列组合完整指南
开发语言·c++·算法