实验报告聚类分析.docx
- 文档编号:930924
- 上传时间:2022-10-14
- 格式:DOCX
- 页数:20
- 大小:313.68KB
实验报告聚类分析.docx
《实验报告聚类分析.docx》由会员分享,可在线阅读,更多相关《实验报告聚类分析.docx(20页珍藏版)》请在冰豆网上搜索。
实验报告聚类分析
实验报告聚类分析
实验原理:
K均值聚类、中心点聚类、系统聚类和EM算法聚类分析技术。
实验题目:
用鸢尾花的数据集,进行聚类挖掘分析。
实验要求:
探索鸢尾花数据的基本特征,利用不同的聚类挖掘方法,获得基本结论并简明解释。
实验题目--分析报告:
data(iris)
>rm(list=ls())
>gc()
used(Mb)gctrigger(Mb)maxused(Mb)
Ncells431730929718607591
Vcells78760583886081592403
>data(iris)
>datav-iris
>head(data)
1
Species
setosa
2
setosa
3
setosa
4
setosa
5
setosa
6
setosa
#Kmear聚类分析
>newiris<-iris
>newiris$Species<-NULL
>(kc<-kmeans(newiris,3))
K-meansclusteringwith3clustersofsizes62,50,38
Clustermeans:
Clusteringvector:
[1]2222222222222222222222222222222222222
[41]2222222222113111111111111111111111111
311[81]1111111111111111111131333313333331133
331[121]313133113333313333133313331331
Withinclustersumofsquaresbycluster:
[1]
(between_SS/total_SS=%)
Availablecomponents:
[1]"cluster""centers""totss""withinss
⑹"betweenss""size""iter""ifault"
>table(iris$Species,kc$cluster)
123
setosa0500
versicolor4802
virginica14036
>plot(newiris[c("","")],col=kc$cluster)
>points(kc$centers[,c("","")],col=1:
3,pch=8,cex=2)
Q
455055flO0570758D
Sepal.Length
Llp-ZsE吕e
#K-Mediods进行聚类分析
>("cluster")
>library(cluster)
><-pam(iris,3)
>table(iris$Species,$clustering)
123
setosa5000
versicolor0347
virginica0491
>layout(matrix(c(1,2),1,2))
>plot
>
Coirijjonenl1
Tn®牌twocomponertsexplain&&.02%ofmepoiniw
>layout(matrix
(1))
Silhouetteplotofpam(x=iris,k=3)nwl503AJSteisCj
j.i^ave^cjs;
l.50|O.6C
2520.41
0.00.20.4D.S0.61.0
SilfKiuelewiddl〒
SiHowHiewidWi-0.57
#hc
><-hclust(dist(iris[,1:
4]))
>plot(,hang=-1)
>plclust(,labels=FALSE,hang=-1)
>re<-,k=3)
>
<-cutree,3)
dist(iris[:
1:
4]}
hclust仁"complete")
#利用剪枝函数cutree()参数h控制输出height=18时的系谱类别
>sapply(unique,
+function(g)iris$Species[==g])
[[1]]
[1]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa
[12]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa
[23]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa
[34]setosasetosasetosasetosasetosasetosasetosasetosasetosasetosasetosa
[45]setosasetosasetosasetosasetosasetosa
Levels:
setosaversicolorvirginica
[1]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[8]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[15]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[22]versicolorversicolorvirginicavirginicavirginicavirginicavirginica
[29]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[36]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[43]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[50]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[57]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[64]virginicavirginicavirginicavirginicavirginicavirginicavirginica
[71]virginicavirginica
Levels:
setosaversicolorvirginica
[[3]]
[1]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[8]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[15]versicolorversicolorversicolorversicolorversicolorversicolorversicolor
[22]versicolorversicolorversicolorversicolorversicolorversicolorvirginica
Levels:
setosaversicolorvirginica
>plot
>,k=4,border="lightgrey")#用浅灰色矩形框出4分类聚类结果
>,k=3,border="darkgrey")#用浅灰色矩形框出3分类聚类结果
>,k=7,which=c(2,6),border="darkgrey")
CluiterDendrogrtiim
#DBSCAN基于密度的聚类
>("fpc")
>library(fpc)
>
半径参数为1,密度阈值为5
ds仁dbscan(iris[,1:
4],eps=1,MinPts=5)#
>ds1dbscanPts=150MinPts=5eps=1
12border01
seed5099
total50100
>ds2=dbscan(iris[,1:
4],eps=4,MinPts=5)
>ds3=dbscan(iris[,1:
4],eps=4,MinPts=2)
>ds4=dbscan(iris[,1:
4],eps=8,MinPts=2)
>par(mfcol=c(2,2))
>plot(ds1,iris[,1:
4],main="1:
MinPts=5eps=1")
>plot(ds3,iris[,1:
4],main="3:
MinPts=2eps=4")
>plot(ds2,iris[,1:
4],main="2:
MinPts=5eps=4")
>plot(ds4,iris[,1:
4],main="4:
MinPts=2eps=8")
4:
MinPts=2eps=8
2.G3.GM05IF25
>d=dist(iris[,1:
4])#计算数据集的距离矩阵d
>max(d);min(d)#计算数据集样本的距离的最值
[1]0
>("ggpiot2")
>Iibrary(ggplot2)
>interval=cut_interval(d,30)
>table(interval)
interval
[0,],],],],]
]
88
585
876
891
831
688
],],],]
],]
543
369
379
339
335
406
],],],]
],]
458
459
465
480
468
505
],],],]
],]
349
385
321
291
187
138
],],],]
],]
97
92
78
50
18
4
>(table(interval))
]
4
>for(iin3:
5)
+{for(jin1:
10)
+{ds=dbscan(iris[,1:
4],eps=i,M
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 实验 报告 聚类分析