决策树ID3分类算法Word文档格式.docx
- 文档编号:22014957
- 上传时间:2023-02-02
- 格式:DOCX
- 页数:30
- 大小:507.10KB
决策树ID3分类算法Word文档格式.docx
《决策树ID3分类算法Word文档格式.docx》由会员分享,可在线阅读,更多相关《决策树ID3分类算法Word文档格式.docx(30页珍藏版)》请在冰豆网上搜索。
voidGenerate_decision_tree(Tree_Node*&
root,vector<
int>
Samples,vector<
attribute_list,intclass_id)
该函数由给定的训练数据产生一棵判定树。
完整代码:
#include<
stdio.h>
iostream>
vector>
math.h>
string.h>
usingnamespacestd;
typedefstructtnode
{
chartdata[100];
}tnode;
typedefstructTree_Node
charname[100];
boolisLeaf;
//标记是否叶子节点
vector<
tnode>
att_list;
//属性名称列表
Tree_Node*>
child_list;
}Tree_Node,*pTreeNpde;
typedefstructdnode
row;
}dnode;
typedefstructD_Node
dnode>
DB;
attr_name;
tnodeclass_name;
}D_Node;
D_NodeG_DB;
pTreeNpdeRoot=NULL;
typedefstructFreeQNode
intcount;
Set_ID;
}FreeQNode;
typedefstructFreeQNodeDouble
row_id;
FreeQNode>
classes;
//存放分类属性列表及相应的出现次数
}FreeQNodeDouble;
typedefstructattr_node
intattr_id;
count_list;
}attr_node;
vector<
attr_node>
G_Attr_List;
typedefstructbinNode
structbinNode*lchild;
structbinNode*rchild;
}binNode;
typedefstructbinNodeDouble
structbinNodeDouble*lchild;
structbinNodeDouble*rchild;
}binNodeDouble;
voidinsert_tree(binNode*&
r,charstr[100])
if(NULL==r)
{
binNode*node=newbinNode;
strcpy(node->
name,str);
node->
count=1;
//printf("
[%s,%d]\n"
node->
name,node->
count);
lchild=node->
rchild=NULL;
r=node;
}
else
if(strcmp(r->
name,str)==0)
{
r->
count++;
}
elseif(strcmp(r->
name,str)<
0)
insert_tree(r->
lchild,str);
else
rchild,str);
}
voiddelete_bin_tree(binNode*&
r)
if(r!
=NULL)
delete_bin_tree(r->
lchild);
rchild);
delete(r);
r=NULL;
voidBin_tree_inorder(binNode*r,vector<
&
Fq)
Bin_tree_inorder(r->
lchild,Fq);
FreeQNodeft;
%s,%d\n"
r->
name,r->
strcpy(ft.name,r->
name);
ft.count=r->
count;
for(inti=0;
i<
r->
Set_ID.size();
i++)
ft.Set_ID.push_back(r->
Set_ID[i]);
//保存子集对应的ID号
Fq.push_back(ft);
//此处少了这条语句,造成结果无法返回
rchild,Fq);
voidGet_attr(binNode*r,attr_node&
attr)
Get_attr(r->
lchild,attr);
tnodet;
strcpy(t.tdata,r->
attr.attr_name.push_back(t);
attr.count_list.push_back(r->
//保存出现次数
rchild,attr);
voidinsert_tree_double(binNodeDouble*&
r,intDB_ID,charattr_name[100],charclass_name[100])
{
binNodeDouble*node=newbinNodeDouble;
name,attr_name);
row_id.push_back(DB_ID);
FreeQNodefq;
strcpy(fq.name,class_name);
fq.count=1;
fq.Set_ID.push_back(DB_ID);
//保存子集所对应的ID号
classes.push_back(fq);
r=node;
name,attr_name)==0)
//这里也需要保存相应的ID号
boolfound=false;
for(inti=0;
i<
classes.size();
{
if(strcmp(r->
classes[i].name,class_name)==0)
{
r->
classes[i].count++;
classes[i].Set_ID.push_back(DB_ID);
//保存子集对应的ID号
found=true;
//发现相同的变量名,计数器增1,
break;
//并退出循环
}
}
if(!
found)
FreeQNodefq;
strcpy(fq.name,class_name);
fq.count=1;
fq.Set_ID.push_back(DB_ID);
//保存子集所对应的ID号
r->
name,attr_name)<
insert_tree_double(r->
lchild,DB_ID,attr_name,class_name);
rchild,DB_ID,attr_name,class_name);
voiddelete_bin_tree_double(binNodeDouble*&
delete_bin_tree_double(r->
voidBin_tree_inorder_double(binNodeDouble*&
r,vector<
FreeQNodeDouble>
Fq)
Bin_tree_inorder_double(r->
FreeQNodeDoubleft;
//保存候属性的名称
for(intk=0;
k<
row_id.size();
k++)
ft.row_id.push_back(r->
row_id[k]);
doubleTree.%s,%d\n"
for(inti=0;
i<
FreeQNodefq;
strcpy(fq.name,r->
classes[i].name);
fq.count=r->
classes[i].count;
for(intj=0;
j<
classes[i].Set_ID.size();
j++)
fq.Set_ID.push_back(r->
classes[i].Set_ID[j]);
ft.classes.push_back(fq);
ft.classes.erase(ft.classes.begin(),ft.classes.end());
//使用完,必须清空
voidgetFqI(vector<
S,intclass_id,vector<
binNode*root=NULL;
for(inti=0;
S.size();
insert_tree(root,G_DB.DB[S[i]].row[class_id].tdata);
Bin_tree_inorder(root,Fq);
delete_bin_tree(root);
voidgetFqIA(vector<
S,intattr_id,intclass_id,vector<
binNodeDouble*root=NULL;
insert_tree_double(root,S[i],G_DB.DB[S[i]].row[attr_id].tdata,G_DB.DB[S[i]].row[class_id].tdata);
Bin_tree_inorder_double(root,Fq);
delete_bin_tree_double(root);
voidreaddata(char*filename)
charstr[1000];
FILE*fp;
fp=fopen(filename,"
r"
);
fgets(str,1000,fp);
intlen=strlen(str);
intattr_no=0;
//属性个数
introw_num=0;
if(str!
row_num=1;
len;
if(str[i]=='
\t'
)
attr_no++;
attr_no++;
//最后一个是回车,整个属性值+1
printf("
%d\n"
attr_no);
while(fgets(str,1000,fp)!
row_num++;
//统计行数
fclose(fp);
fopen(filename,"
tnodet;
for(i=0;
attr_no;
fscanf(fp,"
%s"
t.tdata);
G_DB.attr_name.push_back(t);
printf("
%s\n"
strcpy(G_DB.class_name.tdata,G_DB.attr_name[attr_no-1].tdata);
for(intj=1;
j<
row_num;
dnodedt;
tnodetemp;
fscanf(fp,"
temp.tdata);
dt.row.push_back(temp);
G_DB.DB.push_back(dt);
dt.row.erase(dt.row.begin(),dt.row.end());
G_DB.DB.size());
G_DB.DB.size();
for(intj=0;
G_DB.DB[i].row.size();
printf("
%s\t"
G_DB.DB[i].row[j].tdata);
\n"
doubleFnc_I(vector<
S,intclass_id)
//给定一个子集,计算其按照class_id所对应的分类属性进行分类时的期望I
//printf("
calledFnc_I(%d)\n"
class_id);
Fq;
getFqI(S,class_id,Fq);
//调用getFqI获取按照Class_id为分类标准的分类结果,当Fq中为一条数据时,则子集S都属于一个分类
//否则,从中找到出现此时最大的,作为返回结果
//printf("
begintocomputeI\n"
doubletotal=0;
Fq.size();
total+=Fq[i].count;
Fq[i].name,Fq[i].count);
doubleresult=0;
if(0==total)
return0;
doublep=Fq[i].count/total;
result+=-1*(p*log(p)/log
(2));
FNC_Ireturn\n\n"
returnresult;
doubleFnc_IA(vector<
getFqIA(S,attr_id,class_id,Fq);
boolpr=false;
doublestotal=Fq[i].count;
doublesresult=0;
if(pr)printf("
Fq[i].classes.size();
if(pr)printf("
Fq[i].classes[j].name,Fq[i].classes[j].count);
for(intk=0;
k<
Fq[i].classes[j].count;
//printf("
%d\t"
Fq[i].classes[j].Set_ID[k]+1);
doublesp=Fq[i].classes[j].count/stotal;
//计算子集的频率
sresult+=-1*(sp*log(sp)/log
(2));
result+=(stotal/total)*sresult;
intSelectBestAttribute(vector<
Samples,vector<
//输入训练数据集Samples,候选属性列表attribute_list
//分类属性标记class_id
//返回best_attribute
doublefi=Fnc_I(Samples,5);
%lf\n"
fi);
doubleIA=999999999;
intbest_attrib=-1;
attribute_list.size();
vector<
fqd;
doubletfa=Fnc_IA(Samples,attribute_list[i],class_id,fqd);
%d,FIA=%lf\n"
i,tfa);
if(IA>
tfa)
IA=tfa;
best_attrib=i;
//printf("
IA);
gain(%d)=%lf-%lf=%lf\n"
best_attrib,fi,IA,fi-IA);
returnattribute_list[best_attrib];
voidfnc_getattr(vector<
Samples,intatt_id,attr_node&
at)
Samples.size();
insert_tree(root,G_DB.DB[Samples[i]].row[att_id].tdata);
Get_attr(root,at);
voidget_class_num_and_name(vector<
Samples,intclass_id,int&
class_num,tnode&
class_name)
attr_nodeat;
binNode*ro
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 决策树 ID3 分类 算法