程序流程圖:

DBSCAN核心功能函數,計算每個point的eps范圍內的point數量pts;
對于所有pts >Minpts的point,記為Core point;
對于所有的corepoint,將其eps范圍內的core point下標添加到vector<int>::corepts中;
對于所有的corepoint,采用深度優先的方式遍歷core point的所有cluster,使得相互連接的core point具有相同的cluster編號;
計算所有pts < Minpts且在Core point范圍內的,記為Borderpoint;
將所有Borderpoint加入到任意一個關聯的core point;
剩余的point的為Noise point,文件寫寫入時忽略;
將point信息寫入clustering文件,程序結束。
~~~
/*
DBSCAN Algorithm
15S103182
Ethan
*/
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <ctime>
#include <cstdlib>
#include <limits>
#include <cmath>
#include <stack>
using namespace std;
class point{
public:
float x;
float y;
int cluster=0;
int pointType=1;//1 noise 2 border 3 core
int pts=0;//points in MinPts
vector<int> corepts;
int visited = 0;
point (){}
point (float a,float b,int c){
x = a;
y = b;
cluster = c;
}
};
float stringToFloat(string i){
stringstream sf;
float score=0;
sf<<i;
sf>>score;
return score;
}
vector<point> openFile(const char* dataset){
fstream file;
file.open(dataset,ios::in);
if(!file)
{
cout <<"Open File Failed!" <<endl;
vector<point> a;
return a;
}
vector<point> data;
int i=1;
while(!file.eof()){
string temp;
file>>temp;
int split = temp.find(',',0);
point p(stringToFloat(temp.substr(0,split)),stringToFloat(temp.substr(split+1,temp.length()-1)),i++);
data.push_back(p);
}
file.close();
cout<<"successful!"<<endl;
return data;
}
float squareDistance(point a,point b){
return sqrt((a.x-b.x)*(a.x-b.x)+(a.y-b.y)*(a.y-b.y));
}
void DBSCAN(vector<point> dataset,float Eps,int MinPts){
int len = dataset.size();
//calculate pts
cout<<"calculate pts"<<endl;
for(int i=0;i<len;i++){
for(int j=i+1;j<len;j++){
if(squareDistance(dataset[i],dataset[j])<Eps)
dataset[i].pts++;
dataset[j].pts++;
}
}
//core point
cout<<"core point "<<endl;
vector<point> corePoint;
for(int i=0;i<len;i++){
if(dataset[i].pts>=MinPts) {
dataset[i].pointType = 3;
corePoint.push_back(dataset[i]);
}
}
cout<<"joint core point"<<endl;
//joint core point
for(int i=0;i<corePoint.size();i++){
for(int j=i+1;j<corePoint.size();j++){
if(squareDistance(corePoint[i],corePoint[j])<Eps){
corePoint[i].corepts.push_back(j);
corePoint[j].corepts.push_back(i);
}
}
}
for(int i=0;i<corePoint.size();i++){
stack<point*> ps;
if(corePoint[i].visited == 1) continue;
ps.push(&corePoint[i]);
point *v;
while(!ps.empty()){
v = ps.top();
v->visited = 1;
ps.pop();
for(int j=0;j<v->corepts.size();j++){
if(corePoint[v->corepts[j]].visited==1) continue;
corePoint[v->corepts[j]].cluster = corePoint[i].cluster;
corePoint[v->corepts[j]].visited = 1;
ps.push(&corePoint[v->corepts[j]]);
}
}
}
cout<<"border point,joint border point to core point"<<endl;
//border point,joint border point to core point
for(int i=0;i<len;i++){
if(dataset[i].pointType==3) continue;
for(int j=0;j<corePoint.size();j++){
if(squareDistance(dataset[i],corePoint[j])<Eps) {
dataset[i].pointType = 2;
dataset[i].cluster = corePoint[j].cluster;
break;
}
}
}
cout<<"output"<<endl;
//output
fstream clustering;
clustering.open("clustering.txt",ios::out);
for(int i=0;i<len;i++){
if(dataset[i].pointType == 2)
clustering<<dataset[i].x<<","<<dataset[i].y<<","<<dataset[i].cluster<<"\n";
}
for(int i=0;i<corePoint.size();i++){
clustering<<corePoint[i].x<<","<<corePoint[i].y<<","<<corePoint[i].cluster<<"\n";
}
clustering.close();
}
int main(int argc, char** argv) {
vector<point> dataset = openFile("dataset3.txt");
DBSCAN(dataset,1.5,2);
return 0;
}
~~~
數據文件格式:(x,y)

運行結果格式:(x,y,cluster)

圖形化展現:
特殊形狀:

橋梁:

變化密度:

總結:
DBSCAN算法能夠很好處理不同形狀與大小的數據,并且抗噪音數據。但對于變化的密度,DBSCAN算法具有局限性。在實現Core point連接時,遇到了一點小小的麻煩,很難將相互連接的core point的cluster編號統一,后來通過給每個core point增加一個數組用于記錄相連core point的下標信息,并采用深度優先進行遍歷的方式,不僅提高了計算速度,同時也保證了準確性。對于實驗數據結果,如果不對其進行圖形化展現,很難看出聚類的效果,采用WPF(C#)技術對數據點進行處理,對不同cluster編號的點,賦予不同的顏色,進行圖形展現。
- 前言
- 插入排序
- 歸并排序
- 快速排序
- 最長公共子序列
- 斐波那契數列-臺階問題
- 求n*n階矩陣最大子矩陣階數
- 01背包
- 整數序列合并問題
- 動態規劃算法的一般解題思路
- 01背包-近似算法
- 樹搜索策略
- 求數組中的逆序對
- 并行機器最短調度問題
- 隨機算法
- 判斷兩多項式之積是否等于另一多項式
- 頂點覆蓋問題
- Apriori算法 (Introduction to data mining)
- 聚類算法-DBSCAN-C++實現
- 聚類算法-K-means-C++實現
- 聚類算法-Hierarchical(MIN)-C++
- 爬山法、分支限界法求解哈密頓環問題
- Best-First求解八數碼問題
- Naive Bayesian文本分類器