SVM支持向量机分类——基于Python实现

SVM支持向量机分类

1.描述统计

python 复制代码

from numpy import *
from scipy import *
from pandas import *
import matplotlib.pyplot as plt

import seaborn as sns
glass=read_csv('../data/第5章数据/Glass.csv',sep=',')
glass.head()
glass['Type'].value_counts()

复制代码

Type
2    76
1    70
7    29
3    17
5    13
6     9
Name: count, dtype: int64

划分训练与测试集

python 复制代码

import random
random.seed(1234)
train_index=random.sample(list(glass.index),int(0.7*len(glass.index)))
test_index=list(set(list(glass.index))-set(train_index))
train_data=glass.iloc[train_index,:]
test_data=glass.iloc[test_index,:]
#训练集与测试集均包含所有类别
train_data['Type'].value_counts()

复制代码

Type
1    55
2    50
7    21
3    10
5     7
6     6
Name: count, dtype: int64

python 复制代码

test_data['Type'].value_counts()

复制代码

Type
2    26
1    15
7     8
3     7
5     6
6     3
Name: count, dtype: int64

2.建立SVM模型

python 复制代码

from sklearn import svm
clf=svm.SVC(C=4,tol=1e-6,kernel='linear',gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
test_datac = test_data.copy()
value = clf.predict(test_data[clf.feature_names_in_])
test_datac.loc[:,'SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result

| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 9.0 | 7.0 | 5.0 | 0.0 | 0.0 | 1.0 |
| 2 | 6.0 | 18.0 | 2.0 | 0.0 | 2.0 | 3.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |

7	0.0	0.0	0.0	1.0	0.0	4.0

SVM with rbf kernel

python 复制代码

clf=svm.SVC(C=4,tol=1e-6,kernel='rbf',
            gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data[clf.feature_names_in_])
test_datac = test_data.copy()
test_datac['SVM_pred']= value
test_datac.head()
result=test_datac.iloc[:,0].groupby([test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result

| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 12.0 | 6.0 | 6.0 | 0.0 | 0.0 | 1.0 |
| 2 | 3.0 | 19.0 | 1.0 | 1.0 | 2.0 | 2.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |

7	0.0	0.0	0.0	0.0	0.0	4.0

SVM with polynomial kernel

python 复制代码

clf=svm.SVC(C=4,tol=1e-6,kernel='poly',degree=4,
            gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data.iloc[:,0:9])
test_datac = test_data.copy()
test_datac['SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'], test_datac['Type']]).count().unstack().fillna(0)
result

| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 14.0 | 7.0 | 6.0 | 0.0 | 0.0 | 1.0 |
| 2 | 1.0 | 18.0 | 0.0 | 0.0 | 1.0 | 2.0 |
| 3 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1.0 |

7	0.0	0.0	0.0	1.0	0.0	4.0

svm---libsvm3.21用法示例

python 复制代码

#例5.2
import re
import numpy as np
from sklearn.datasets import dump_svmlight_file
#利用dump_svmlight_file可以生成svmlight文件
X_list = []
#UCI HAR Dataset
with open('../data/第5章数据/X_train.txt', 'r') as file:
    for line in file:
         row = re.split(r'\s+', line.strip())
         X_list.append(row)
y_list=[]

with open('../UCI HAR Dataset/UCI HAR Dataset/train/y_train.txt', 'r') as file:
  for line in file:
      row = re.split(r'\s+', line.strip())
      y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/train.txt')

X_list = []
with open('../UCI HAR Dataset/UCI HAR Dataset/test/X_test.txt', 'r') as file:
  for line in file:
      row = re.split(r'\s+', line.strip())
      X_list.append(row)
y_list=[]
with open('../UCI HAR Dataset/UCI HAR Dataset/test/y_test.txt', 'r') as file:
  for line in file:
      row = re.split(r'\s+', line.strip())
      y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/test.txt')

from libsvm.svmutil import *
y,x =svm_read_problem('../data/第5章数据/train.txt')
y1,x1=svm_read_problem('../data/第5章数据/test.txt')
m1=svm_train(y,x,'-t 0')
m2=svm_train(y,x,'-t 1')
p_labs, p_acc, p_vals=svm_predict(y1, x1, m1)
p_labs, p_acc, p_vals=svm_predict(y1, x1, m2)

复制代码

Accuracy = 96.4031% (2841/2947) (classification)
Accuracy = 90.7703% (2675/2947) (classification)

svm决策边界

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

# Function to plot decision boundary
def plot_decision_boundary(clf, X, y, title):
    # Create a mesh to plot the decision boundary
    h = .02  # step size in the mesh
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # Plot the decision boundary by assigning a color to each point in the mesh
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)

    # Plot the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='k')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)

# Example data (replace with your actual data)
X = train_data.iloc[:, 0:2].values  # Use only the first two features for visualization
y = train_data['Type'].values

# Create a figure with subplots
plt.figure(figsize=(15, 5))

# Linear kernel
plt.subplot(1, 3, 1)
clf_linear = svm.SVC(C=4, kernel='linear', gamma=0.1, decision_function_shape='ovr')
clf_linear.fit(X, y)
plot_decision_boundary(clf_linear, X, y, 'SVM with Linear Kernel')

# RBF kernel
plt.subplot(1, 3, 2)
clf_rbf = svm.SVC(C=4, kernel='rbf', gamma=0.1, decision_function_shape='ovr')
clf_rbf.fit(X, y)
plot_decision_boundary(clf_rbf, X, y, 'SVM with RBF Kernel')

# Polynomial kernel
plt.subplot(1, 3, 3)
clf_poly = svm.SVC(C=4, kernel='poly', degree=4, gamma=0.1, decision_function_shape='ovr')
clf_poly.fit(X, y)
plot_decision_boundary(clf_poly, X, y, 'SVM with Polynomial Kernel')

# Show the plots
plt.tight_layout()
plt.show()