R语言实现SVM算法——分类与回归

复制代码
### 11.6	基于支持向量机进行类别预测 ###
# 构建数据子集
X <- iris[iris$Species!= 'virginica',2:3] # 自变量:Sepal.Width, Petal.Length
y <-  iris[iris$Species != 'virginica','Species'] # 因变量
plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
# 构建支持向量机分类器
library(e1071)
svm.model <- svm(x = X,y = y,kernel = 'linear',degree = 1,scale = FALSE)
summary(svm.model)
svm.model$index # 查看支持向量的序号
svm.model$nSV   # 查看各类的支持向量个数
svm.model$SV    # 查看支持向量的自变量值

# 绘制SVM分类器的判别边界实线、支持向量及最大间隔分类
plot_svc_decision_boundary <- function(svm.model,X) {
  w = t(svm.model$coefs) %*% svm.model$SV 
  b = -svm.model$rho 
  margin = 1/w[2]
  abline(a = -b/w[1,2],b=-w[1,1]/w[1,2],col = "red",lwd=2)
  points(X[svm.model$index,],col="blue",cex=2.5,lwd = 2)
  abline(a = -b/w[1,2]+margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
  abline(a = -b/w[1,2]-margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
}
# 增加分割线的散点图
plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
plot_svc_decision_boundary(svm.model,X) # 增加决策边界和标注支持向量

# SVM对特征缩放敏感
Xs <- data.frame(x1 = c(1,5,3,5),
                 x2 = c(50,20,80,60))
ys <- factor(c(0,0,1,1))

svm_clf <- svm(x = Xs,y = ys,cost=100,
               kernel = "linear",scale = FALSE)
Xs_scale <- apply(Xs,2,scale) # 标准化处理
svm_clf1 <- svm(x = Xs_scale,y = ys,cost=100,
                kernel = "linear",scale = FALSE)
par(mfrow=c(1,2))
plot(Xs,col=ys,pch=as.numeric(ys)+15,cex=1.5,main='Unscaled')
plot_svc_decision_boundary(svm_clf,Xs)
plot(Xs_scale,col = ys,pch=as.numeric(ys)+15,cex=1.5,main="scaled")
plot_svc_decision_boundary(svm_clf1,Xs_scale)
par(mfrow=c(1,1))

# 将参数scale设置为TRUE
svm_clf2 <- svm(x = Xs,y = ys,cost=100,
                kernel = "linear",scale = TRUE)
# 可以查看标准化的中心和标准差
svm_clf2$x.scale
# 查看手工标准化的均值和标准差
apply(Xs,2,function(x) {c('center' = mean(x,na.rm=TRUE),'scale' = sd(x,na.rm=TRUE))})

# 软间隔分类
X = iris[iris$Species!= 'virginica',1:2] # "Sepal.Length" "Sepal.Width"
y = iris[iris$Species != 'virginica','Species']
svm_smallC <- svm(x = X,y = y,cost = 1,
                  kernel = "linear",scale = FALSE)
svm_largeC <- svm(x = X,y = y,cost = 100,
                  kernel = "linear",scale = FALSE)
par(mfrow=c(1,2))
plot(X,col=y,pch=as.numeric(y)+15,main='small cost')
plot_svc_decision_boundary(svm_smallC,X)
plot(X,col=y,pch=as.numeric(y)+15,main='large cost')
plot_svc_decision_boundary(svm_largeC,X)
par(mfrow=c(1,1))

# 非线性支持向量机分类
# 导入数据集
moons <- read.csv('moons.csv')
# 查看数据结构
str(moons)

# 编写绘制决策边界函数
visualize_classifier <- function(model,X,y,xlim,ylim,title = NA){
  x1s <- seq(xlim[1],xlim[2],length.out=200)
  x2s <- seq(ylim[1],ylim[2],length.out=200)
  Z <- expand.grid(x1s,x2s)
  colnames(Z) <- colnames(X)
  y_pred <- predict(model,Z,type = 'class')
  y_pred <- matrix(y_pred,length(x1s))
  
  filled.contour(x1s,x2s,y_pred,
                 nlevels = 2,
                 col = RColorBrewer::brewer.pal(length(unique(y)),'Pastel1'),
                 key.axes = FALSE,
                 plot.axes = {axis(1);axis(2);
                   points(X[,1],X[,2],pch=as.numeric(y)+16,col=as.numeric(y)+2,cex=1.5)
                 },
                 xlab = colnames(X)[1],ylab = colnames(X)[2]
  )
  title(main = title)
}

xlim <- c(-1.5,2.5)
ylim <- c(-1,1.5)

# 构建线性支持向量机分类
svm_linear <- svm(x = moons[,1:2],y = factor(moons[,3]),
                kernel = 'linear',degree = 1,cost = 10)
# 绘制决策边界
visualize_classifier(svm_linear,moons[,1:2],moons[,3],
                     xlim,ylim,title = '线性支持向量机分类')

# 构建非线支持向量机分类
svm_poly <- svm(x = moons[,1:2],y = factor(moons[,3]),
                kernel = 'polynomial',degree = 3,cost = 5)
# 绘制决策边界
visualize_classifier(svm_poly,moons[,1:2],moons[,3],
                     xlim,ylim,title = '非线性支持向量机分类')

# 多项式核
svm_poly1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
                kernel = 'polynomial',degree = 3,cost = 5,coef0 = 1)
visualize_classifier(svm_poly1,moons[,1:2],moons[,3],
                     xlim,ylim,'多项式核')

# 增加相似性特征
svm_rbf <- svm(x = moons[,1:2],y = factor(moons[,3]),
               kernel='radial',gamma = 0.1, cost = 0.01)
svm_rbf1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
               kernel='radial',gamma = 0.1, cost = 1000)
svm_rbf2 <- svm(x = moons[,1:2],y = factor(moons[,3]),
                kernel='radial',gamma = 5, cost =1000)
visualize_classifier(svm_rbf,moons[,1:2],moons[,3],
                     xlim,ylim,'gamma = 0.1, cost = 0.01')
visualize_classifier(svm_rbf1,moons[,1:2],moons[,3],
                     xlim,ylim,'gamma = 0.1, cost = 1000')
visualize_classifier(svm_rbf2,moons[,1:2],moons[,3],
                     xlim,ylim,'gamma = 5, cost = 1000')

# 调整支持向量机
# 使用tune.svm函数调整支持向量机
moons$y <- as.factor(moons$y)
tuned <- tune.svm(y ~ .,data = moons,
                  gamma = 10^(-5:-1),cost = 10^(1:3))
summary(tuned) # 得到模型相关信息

# 利用最佳参数设置支持向量机
model.tuned <- svm(y ~ .,data = moons,
                   gamma = tuned$best.parameters$gamma,
                   cost = tuned$best.parameters$cost)
# 对训练集进行类别预测
pred <- predict(model.tuned,newdata = moons[,1:2])
#生成混淆矩阵,观察预测精度 
table('actual' = moons$y,
      'prediction'= pred)

相关推荐
Tipriest_10 分钟前
C++ 的 ranges 和 Python 的 bisect 在二分查找中的应用与实现
c++·python·算法·二分法
晨晖21 小时前
顺序查找:c语言
c语言·开发语言·算法
LYFlied2 小时前
【每日算法】LeetCode 64. 最小路径和(多维动态规划)
数据结构·算法·leetcode·动态规划
Salt_07282 小时前
DAY44 简单 CNN
python·深度学习·神经网络·算法·机器学习·计算机视觉·cnn
货拉拉技术2 小时前
AI拍货选车,开启拉货新体验
算法
MobotStone2 小时前
一夜蒸发1000亿美元后,Google用什么夺回AI王座
算法
Wang201220132 小时前
RNN和LSTM对比
人工智能·算法·架构
xueyongfu2 小时前
从Diffusion到VLA pi0(π0)
人工智能·算法·stable diffusion
永远睡不够的入3 小时前
快排(非递归)和归并的实现
数据结构·算法·深度优先
cheems95273 小时前
二叉树深搜算法练习(一)
数据结构·算法