SVM支持向量机分类
1.描述统计
python
from numpy import *
from scipy import *
from pandas import *
import matplotlib.pyplot as plt
import seaborn as sns
glass=read_csv('../data/第5章数据/Glass.csv',sep=',')
glass.head()
glass['Type'].value_counts()
Type
2 76
1 70
7 29
3 17
5 13
6 9
Name: count, dtype: int64
划分训练与测试集
python
import random
random.seed(1234)
train_index=random.sample(list(glass.index),int(0.7*len(glass.index)))
test_index=list(set(list(glass.index))-set(train_index))
train_data=glass.iloc[train_index,:]
test_data=glass.iloc[test_index,:]
#训练集与测试集均包含所有类别
train_data['Type'].value_counts()
Type
1 55
2 50
7 21
3 10
5 7
6 6
Name: count, dtype: int64
python
test_data['Type'].value_counts()
Type
2 26
1 15
7 8
3 7
5 6
6 3
Name: count, dtype: int64
2.建立SVM模型
python
from sklearn import svm
clf=svm.SVC(C=4,tol=1e-6,kernel='linear',gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
test_datac = test_data.copy()
value = clf.predict(test_data[clf.feature_names_in_])
test_datac.loc[:,'SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result
| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 9.0 | 7.0 | 5.0 | 0.0 | 0.0 | 1.0 |
| 2 | 6.0 | 18.0 | 2.0 | 0.0 | 2.0 | 3.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
7 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 4.0 |
---|
SVM with rbf kernel
python
clf=svm.SVC(C=4,tol=1e-6,kernel='rbf',
gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data[clf.feature_names_in_])
test_datac = test_data.copy()
test_datac['SVM_pred']= value
test_datac.head()
result=test_datac.iloc[:,0].groupby([test_datac['SVM_pred'],test_datac['Type']]).count().unstack().fillna(0)
result
| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 12.0 | 6.0 | 6.0 | 0.0 | 0.0 | 1.0 |
| 2 | 3.0 | 19.0 | 1.0 | 1.0 | 2.0 | 2.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |
7 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 |
---|
SVM with polynomial kernel
python
clf=svm.SVC(C=4,tol=1e-6,kernel='poly',degree=4,
gamma=0.1,decision_function_shape='ovr')
clf.fit(train_data.iloc[:,0:9],train_data['Type'])
value = clf.predict(test_data.iloc[:,0:9])
test_datac = test_data.copy()
test_datac['SVM_pred'] = value
test_datac.head()
result=test_datac.iloc[:,0].groupby( [test_datac['SVM_pred'], test_datac['Type']]).count().unstack().fillna(0)
result
| Type | 1 | 2 | 3 | 5 | 6 | 7 |
| SVM_pred | | | | | | |
| 1 | 14.0 | 7.0 | 6.0 | 0.0 | 0.0 | 1.0 |
| 2 | 1.0 | 18.0 | 0.0 | 0.0 | 1.0 | 2.0 |
| 3 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 5 | 0.0 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 |
| 6 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1.0 |
7 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 4.0 |
---|
svm---libsvm3.21用法示例
python
#例5.2
import re
import numpy as np
from sklearn.datasets import dump_svmlight_file
#利用dump_svmlight_file可以生成svmlight文件
X_list = []
#UCI HAR Dataset
with open('../data/第5章数据/X_train.txt', 'r') as file:
for line in file:
row = re.split(r'\s+', line.strip())
X_list.append(row)
y_list=[]
with open('../UCI HAR Dataset/UCI HAR Dataset/train/y_train.txt', 'r') as file:
for line in file:
row = re.split(r'\s+', line.strip())
y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/train.txt')
X_list = []
with open('../UCI HAR Dataset/UCI HAR Dataset/test/X_test.txt', 'r') as file:
for line in file:
row = re.split(r'\s+', line.strip())
X_list.append(row)
y_list=[]
with open('../UCI HAR Dataset/UCI HAR Dataset/test/y_test.txt', 'r') as file:
for line in file:
row = re.split(r'\s+', line.strip())
y_list.append(row)
X = np.array(X_list).astype(float)
y = np.array(y_list).reshape(-1).astype(int)
dump_svmlight_file(X, y, '../data/第5章数据/test.txt')
from libsvm.svmutil import *
y,x =svm_read_problem('../data/第5章数据/train.txt')
y1,x1=svm_read_problem('../data/第5章数据/test.txt')
m1=svm_train(y,x,'-t 0')
m2=svm_train(y,x,'-t 1')
p_labs, p_acc, p_vals=svm_predict(y1, x1, m1)
p_labs, p_acc, p_vals=svm_predict(y1, x1, m2)
Accuracy = 96.4031% (2841/2947) (classification)
Accuracy = 90.7703% (2675/2947) (classification)
svm决策边界
python
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
# Function to plot decision boundary
def plot_decision_boundary(clf, X, y, title):
# Create a mesh to plot the decision boundary
h = .02 # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# Plot the decision boundary by assigning a color to each point in the mesh
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='k')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title(title)
# Example data (replace with your actual data)
X = train_data.iloc[:, 0:2].values # Use only the first two features for visualization
y = train_data['Type'].values
# Create a figure with subplots
plt.figure(figsize=(15, 5))
# Linear kernel
plt.subplot(1, 3, 1)
clf_linear = svm.SVC(C=4, kernel='linear', gamma=0.1, decision_function_shape='ovr')
clf_linear.fit(X, y)
plot_decision_boundary(clf_linear, X, y, 'SVM with Linear Kernel')
# RBF kernel
plt.subplot(1, 3, 2)
clf_rbf = svm.SVC(C=4, kernel='rbf', gamma=0.1, decision_function_shape='ovr')
clf_rbf.fit(X, y)
plot_decision_boundary(clf_rbf, X, y, 'SVM with RBF Kernel')
# Polynomial kernel
plt.subplot(1, 3, 3)
clf_poly = svm.SVC(C=4, kernel='poly', degree=4, gamma=0.1, decision_function_shape='ovr')
clf_poly.fit(X, y)
plot_decision_boundary(clf_poly, X, y, 'SVM with Polynomial Kernel')
# Show the plots
plt.tight_layout()
plt.show()