1、Kmeans++的原理如下:
(1)首先选取样本中任一数据点作为第一个聚类中心;
(2)计算样本每一个数据点至现所有聚类中心的最近距离,并记录下来;
(3)逐一挑选所有数据点最近距离之中的最大值,即最远距离,最大值对应的数据点为待求聚类中心;
(4)剔除已选为聚类中心的样本点,重新计算(2)、(3)步骤,得到指定的最终的聚类中心点数。
2、实现结果如下:
注:当噪点太多时,初始K个聚类中心的计算会出现偏差,从而导致整个聚类结果出现偏差。
3、原始数据读入格式如下:
点号-X坐标-Y坐标(非此格式的数据无法正常读入,程序会报错)
4、根据Kmeans++选择k个初始聚类中心的Kmeans聚类算法整体代码如下:
cpp
//Kmeans.cpp文件
#include "Kmeans.h"
Kmeans::Kmeans(QWidget *parent)
: QWidget(parent)
{
start = false;
//dd = blank;
dd = to2K;
ui.setupUi(this);
connect(ui.pushButton, SIGNAL(clicked()), this, SLOT(onBtReadData()));
connect(ui.pushButton_2, SIGNAL(clicked()), this, SLOT(onBtCalKmeans()));
connect(ui.pushButton_3, SIGNAL(clicked()), this, SLOT(onBtReadK()));
}
void Kmeans::onBtReadData()
{
K = ui.lineEdit->text().toInt();
p.clear();
//打开文件对话框
QString fileName = QFileDialog::getOpenFileName(this, tr("打开"));
QFile file(fileName);
bool isOpen = 1;
if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
{
isOpen = 0;
QMessageBox::StandardButton btnValue = QMessageBox::information(this, tr("提示"), tr("打开失败!"));
}
//逐行读取文本文件
QTextStream stream(&file);
while (!stream.atEnd())
{
Pointp pt;
QString str = stream.readLine();
QStringList list = str.split(",");
pt.no = list.at(0);
pt.x = list.at(1).toDouble();
pt.y = list.at(2).toDouble();
p.push_back(pt);
}
file.close();
//判断是否读取完毕
if (stream.atEnd()&&isOpen)
{
QMessageBox box;
box.setText("数据读取完毕");
box.exec();
}
}
void Kmeans::onBtReadK()
{
QString fileName = QFileDialog::getOpenFileName(this, tr("打开"));
QFile file(fileName);
bool isOpen = 1;
if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
{
isOpen = 0;
QMessageBox::StandardButton btnValue = QMessageBox::information(this, tr("提示"), tr("打开失败!"));
}
QTextStream stream(&file);
while (!stream.atEnd())
{
QString str = stream.readLine();
QStringList list = str.split(",");
Pointp k1;
k1.no = list.at(0);
k1.x = list.at(1).toDouble();
k1.y = list.at(2).toDouble();
k.push_back(k1);
}
//判断是否读取完毕
if (stream.atEnd() && isOpen)
{
QMessageBox box;
box.setText("数据读取完毕");
box.exec();
}
dd = readK;
}
void Kmeans::toK()
{
//随机选取k个初始聚类中心
for (int i = 0; i < K; i++)
{
Pointp k1;
k1.no = i + 1;
k1.x = p.at(i).x;
k1.y = p.at(i).y;
k.push_back(k1);
}
}
int Kmeans::onBtCalKmeans()
{
K = ui.lineEdit->text().toInt();
if (S.size()&&p.size()==S.size())
{
QMessageBox box;
box.setText("已经计算完成");
box.exec();
return 0;
}
//if (dd == to2K)
//{
// toK();
//}
CalK();
CalDis();//S
Calcentroid();//用到S,得dis
//CKmeans();//用到dis,得new k.
int iCount = 0;
while (iCount < K)
{
if (dis.size())
{
for (int i = 0; i < k.size(); i++)
{
for (int j = 0; j < dis.size(); j++)
{
if (k.at(i).no == dis.at(j).noK)
{
//qDebug() <<"k:" <<k.at(i).no<< k.at(i).x << k.at(i).y;
//qDebug() <<"dis:" <<dis.at(i).noK.toInt()<< dis.at(j).sx << dis.at(j).sy<<endl;
double detaX = k.at(i).x - dis.at(j).sx;
double detaY = k.at(i).y - dis.at(j).sy;
double sk = sqrt(detaX * detaX + detaY * detaY);
//qDebug() << sk;
if (sk == 0)
{
iCount++;
}
else
{
CKmeans();
}
}
}
}
}
dis.clear();
S.clear();
CalDis();
Calcentroid();
}
start = true;
qDebug() << "S" << S.size();
drawPoint();
//drawK();
QMessageBox box;
box.setText("计算完成");
box.exec();
qDebug() << "k" << k.size();
//for (int i = 0; i < S.size(); i++)
//{
// qDebug() << "S:" <<S.at(i).no<< S.at(i).noK;
//}
return 1;
}
Kmeans::~Kmeans()
{}
//计算质心
void Kmeans::Calcentroid()
{
centroid s;
for (int i = 0; i < k.size(); i++)
{
s.sx = 0; s.sy = 0; int iCt = 0;
for (int j = 0; j < S.size(); j++)
{
if (k.at(i).no == S.at(j).noK)
{
s.sx = s.sx + S.at(j).x;
s.sy = s.sy + S.at(j).y;
iCt++;
}
}
s.noK = k.at(i).no;
s.sx = s.sx / iCt;
s.sy = s.sy / iCt;
dis.push_back(s);
}
}
//计算每个对象至聚类中心的距离
void Kmeans::CalDis()
{
for (int i = 0; i < p.size(); i++)
{
double s0 = 0; QString no; Dis ss; int t = 0;
double x1 = p.at(i).x;
double y1 = p.at(i).y;
//double x = k.at(0).x;
//double y = k.at(0).y;
//s0 = sqrt((x1 - x) * (x1 - x) + (y1 - y) * (y1 - y));
for (int j = 0; j < k.size(); j++)
{
double x2 = k.at(j).x;
double y2 = k.at(j).y;
double s1 = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2));
t++;
if (t == 1)
{
s0 = s1;
no = k.at(j).no;
}
if (s1 < s0)
{
s0 = s1;
no = k.at(j).no;
}
}
ss.s = s0;
ss.no = p.at(i).no;
ss.x = p.at(i).x;
ss.y = p.at(i).y;
ss.noK = no;
S.push_back(ss);
}
}
//将新的质心坐标赋值给k
void Kmeans::CKmeans()
{
for (int i = 0; i < k.size(); i++)
{
for (int j = 0; j < dis.size(); j++)
{
if (k.at(i).no == dis.at(j).noK)
{
k.at(i).x = dis.at(j).sx;
k.at(i).y = dis.at(j).sy;
}
}
}
}
//绘图函数
void Kmeans::drawPoint()
{
QPicture pp;
pp.setBoundingRect(ui.label_2->rect());
QPainter painterP(&pp);
QPen pen;
painterP.setRenderHint(QPainter::Antialiasing, true);
Pointp p1;
p1.no = p.at(0).no;
p1.x = p.at(0).x;
p1.y = p.at(0).y;
for (int i = 1; i < p.size(); i++)
{
if (p1.x > p.at(i).x)
{
p1.x = p.at(i).x;
}
if (p1.y > p.at(i).y)
{
p1.y = p.at(i).y;
}
}
double xmin = p1.x;
double ymin = p1.y;
for (int i = 1; i < p.size(); i++)
{
if (p1.x < p.at(i).x)
{
p1.x = p.at(i).x;
}
if (p1.y < p.at(i).y)
{
p1.y = p.at(i).y;
}
}
double xmax = p1.x;
double ymax = p1.y;
int w=ui.label_2->width();
int h=ui.label_2->height();
double a = w/(xmax -xmin);
double b1 = h/(ymax -ymin);
for (int i = 0; i < k.size(); i++)
{
int r = qrand() % 256;
int g = qrand() % 256;
int b = qrand() % 256;
QColor color = QColor(r, g, b);
for (int j = 0; j < S.size(); j++)
{
if (k.at(i).no == S.at(j).noK)
{
pen.setColor(color);
painterP.setPen(pen);
int radius = 5;
double x = S.at(j).x;
double y = S.at(j).y;
x = (x - xmin)*a;
y = (y - ymin)*b1;
painterP.drawEllipse(x - radius, y - radius, radius * 2, radius * 2);
}
}
}
ui.label_2->setPicture(pp);
}
void Kmeans::CalK()
{
k.push_back(p.at(0));
CalDistance();
while (k.size() != K)
{
qDebug() << k.size() << k.at(k.size() - 1).no;
S.clear();
CalDistance();
for (auto& val : k)
{
qDebug() << "CalK.k1" << val.no << val.x << val.y;
}
std::vector<Pointp> vk; int t3 = k.size();
while (vk.size() != t3)
{
Pointp p9 = k.at(0); int t2 = 0;
for (int i = 1; i < k.size(); i++)
{
Pointp p2 = k.at(i);
if (p9.no.toInt() < k.at(i).no.toInt())
{
p9 = k.at(i);
t2 = i;
}
}
k.erase(k.begin() + t2);//删除下标为t2的元素;
vk.push_back(p9);
}
for (int i = vk.size() - 1; i >= 0; i--)
{
k.push_back(vk.at(i));
}
for (auto& val : k)
{
qDebug() <<"CalK.k" << val.no << val.x << val.y;
}
int cv = 1;
for (auto& val : k)
{
S.erase(S.begin() + (val.no.toInt() - cv));//删除下标为val.number的元素;
cv++;
}
double s0 = 0;
Pointp kk;
kk = { 0,0,0 };
for (auto& valS : S)
{
if (s0 <= valS.s)
{
s0 = valS.s;
kk.no = valS.no;
kk.x = valS.x;
kk.y = valS.y;
}
}
k.push_back(kk);
}
int count = 1;
for (auto& val : k)
{
val.no = count;
count++;
//qDebug() << val.no << val.x << val.y;
}
}
void Kmeans::drawK()
{
QPicture pp;
pp.setBoundingRect(ui.label_2->rect());
QPainter painterP(&pp);
QPen pen;
painterP.setRenderHint(QPainter::Antialiasing, true);
Pointp p1;
p1.no = p.at(0).no;
p1.x = p.at(0).x;
p1.y = p.at(0).y;
for (int i = 1; i < p.size(); i++)
{
if (p1.x > p.at(i).x)
{
p1.x = p.at(i).x;
}
if (p1.y > p.at(i).y)
{
p1.y = p.at(i).y;
}
}
double xmin = p1.x;
double ymin = p1.y;
for (int i = 1; i < p.size(); i++)
{
if (p1.x < p.at(i).x)
{
p1.x = p.at(i).x;
}
if (p1.y < p.at(i).y)
{
p1.y = p.at(i).y;
}
}
double xmax = p1.x;
double ymax = p1.y;
int w = ui.label_2->width();
int h = ui.label_2->height();
double a = w / (xmax - xmin);
double b1 = h / (ymax - ymin);
QColor color = QColor(123,223,46);
for (int i = 0; i < k.size(); i++)
{
pen.setColor(color);
pen.setWidth(3);
painterP.setPen(pen);
int radius = 10;
double x = k.at(i).x;
double y = k.at(i).y;
x = (x - xmin) * a;
y = (y - ymin) * b1;
painterP.drawEllipse(x - radius, y - radius, radius * 2, radius * 2);
}
ui.label_2->setPicture(pp);
}
void Kmeans::CalDistance()
{
Dis ss;
for (auto& valP : p)
{
double s0 = 0; int c = 1;
double x1 = valP.x;
double y1 = valP.y;
for (auto& valK : k)
{
double x2 = valK.x;
double y2 = valK.y;
x2 = x2 - x1;
y2 = y2 - y1;
double s = sqrt(x2 * x2 + y2 * y2);
if (c == 1)
{
s0 = s;
ss.no = valP.no;
ss.noK = valK.no;
ss.x = valP.x;
ss.y = valP.y;
ss.s = s;
c++;
}
if (s0 == 0)
{
ss.no = valP.no;
ss.noK = valK.no;
ss.x = valP.x;
ss.y = valP.y;
ss.s = s;
break;
}
if (s < s0)
{
s0 = s;
ss.no = valP.no;
ss.noK = valK.no;
ss.x = valP.x;
ss.y = valP.y;
ss.s = s;
}
}
S.push_back(ss);
}
}
cpp
#pragma once
#include <QtWidgets/QWidget>
#include "ui_Kmeans.h"
#include<QFileDialog>
#include<QFile>
#include<QMessageBox>
#include<QTextStream>
#include<vector>
#pragma execution_character_set("UTF-8")
#include<qDebug>
#include<QPainter>
#include<QColor>
#include<QColorDialog>
#include<QPicture>
#include <algorithm>
struct Pointp
{
double x;
double y;
QString no;
};
struct Dis
{
double x;
double y;
QString no;
QString noK;
double s;
};
struct centroid
{
QString noK;
double sx;
double sy;
};
enum Pd
{
readK,
to2K,
blank
};
class Kmeans : public QWidget
{
Q_OBJECT
public:
Kmeans(QWidget *parent = nullptr);
~Kmeans();
public slots:
void onBtReadData();
int onBtCalKmeans();
void onBtReadK();
public:
std::vector<Pointp> p;//原始数据点
std::vector<Pointp> k;//各簇质心坐标
int K;
std::vector<Dis> S;
std::vector<centroid> dis;
bool start;
Pd dd;
public:
void Calcentroid();
void CKmeans();
void CalDis();
void drawPoint();
void CalK();
void drawK();
void toK();
void CalDistance();
private:
Ui::KmeansClass ui;
};