ID3算法源程序Word文档格式.docx
- 文档编号:16625117
- 上传时间:2022-11-24
- 格式:DOCX
- 页数:13
- 大小:18.79KB
ID3算法源程序Word文档格式.docx
《ID3算法源程序Word文档格式.docx》由会员分享,可在线阅读,更多相关《ID3算法源程序Word文档格式.docx(13页珍藏版)》请在冰豆网上搜索。
structnode*on;
/*Addressof'
on'
node*/
structnode*off;
off'
structnode*parent;
/*Addessofparentnode*/
}NODE;
typedefstructne_struct{
REALne;
UINTstatus;
}NEGENTROPY;
typedefstructmatrix{
UINTwidth;
UINTheight;
REAL**data;
}MATRIX;
enumUINT{INACTIVE,OFF,ON};
#defineLN_20.693147180559945309417
#defineentropy(x)(x>
0?
x*log(x)/LN_2:
0.0)
/*
*FILE:
id3.c
*
*Author:
AndrewColin
*DISCLAIMER:
Noliabilityisassumedbytheauthorforanyusemade
*ofthisprogram.
*DISTRIBUTION:
Anyusemaybemadeofthisprogram,aslongasthe
*clearacknowledgmentismadetotheauthorincodeandruntime
*executables
*/
#include<
stdio.h>
stdlib.h>
math.h>
float.h>
limits.h>
string.h>
conio.h>
time.h>
#include"
id3.h"
proto.h"
/*-------------------------------------------------------------------*/
MATRIX*build_matrix(UINTwidth,UINTheight)
{
MATRIX*_matrix;
UINTi;
_matrix=(MATRIX*)malloc(sizeof(MATRIX));
if(!
_matrix)
err_exit(__FILE__,__LINE__);
_matrix->
width
=width;
height=height;
data=(REAL**)malloc(height*sizeof(REAL*));
if(_matrix->
data==NULL)
err_exit(__FILE__,__LINE__);
for(i=0;
i<
height;
i++)
data[i]=(REAL*)malloc(width*sizeof(REAL));
data[i]==NULL)
}
return_matrix;
*Standarderrorhandlerfunction
voiderr_exit(CHAR*file,UINTline)
printf("
\nFatalerrorinfile%s,line%u"
file,line);
exit(0);
voidfile_size(CHAR*file_name,UINT*width,UINT*height)
*Giventhenameofafileofnumericdata,thisroutinecounts
*thenumbersofrowsandcolumns.It'
sassumedthatthenumber
*ofentriesisthesameineachrow,andanerrorisflaggedifthis
*isnotthecase.
FILE*f;
UINTbuf_size=0xFF,_width=0;
CHAR*buffer,*ptr;
*width=*height=0;
buffer=(CHAR*)malloc(buf_size*sizeof(CHAR));
if(buffer==NULL)
/*Openpricefile-abortiffilenameinvalid*/
f=fopen(file_name,"
r"
);
if(f==NULL)
\nFilenotfound:
%s\n"
file_name);
/*Getnumberofentriesinfirstrow*/
if(fgets(buffer,buf_size,f)!
=NULL)
++*height;
ptr=strtok(buffer,"
"
while(ptr!
++*width;
ptr=strtok(NULL,"
/*Countnumbersofsubsequentrows*/
while(!
feof(f))
if(strlen(buffer)>
strlen("
\n"
))
/*iflineismorethanaNLchar*/
_width=0;
++_width;
if(*width!
=_width)
\nNumberofentriesinfile%sdidnotagree"
free(buffer);
voidfree_matrix(MATRIX*_matrix)
free(_matrix->
data[i]);
data);
free(_matrix);
voidfree_tags(CHAR**varname,UINTwidth)
width;
free(varname[i]);
free(varname);
*node)
*
Freesthememoryallocatedtoatreestructure
*/
if(node==NULL)
return;
else
free_tree(node->
on);
off);
free(node);
NODE*ID3(MATRIX*matrix,NODE*parent,UINTtarget,UINTstate)
/*Routinetobuildadecisiontree,basedonQuinlan'
sID3algorithm.*/
NEGENTROPYnegentropy_struct;
NODE*node;
UINTn_vars=matrix->
width,n_samples=matrix->
height,i,j,split;
REAL**data=matrix->
data;
REALbest_threshold,min_negentropy,_negentropy;
/*Allocatememoryforthisnode*/
node=(NODE*)malloc(sizeof(NODE));
node)
/*Setuplinksindecisiontree*/
node->
parent=parent;
/*Setaddressofparentnode*/
if(parent!
=NULL)/*parenttochild;
notrelevantforrootnode*/
/*Passaddressofthisnodetotheparentnode*/
if(state==ON)
parent->
on=node;
if(state==OFF)
off=node;
*Selectattributewithlowestnegentropyforsplitting.Scanthrough
*ALLattributes(exceptthetarget)andALLdatasamples.Thisis
*prettyinefficientfordatasetswithrepeatedvalues,butwilldo
*forillustrativepurposes
min_negentropy=1.0;
n_vars;
for(j=0;
j<
n_samples;
j++)
if(i!
=target)
/*Settrialvaluesforthisnode...*/
idx=i;
threshold=data[j][i];
/*...andcalculatethenegentropyofthispartition*/
negentropy_struct=negentropy(data,n_samples,node,target);
_negentropy=negentropy_struct.ne;
/*Ifthisnegentropyislowerthananyother,retainthe
indexandthresholdforfutureuse*/
if(_negentropy<
min_negentropy)
min_negentropy=_negentropy;
split=i;
best_threshold=data[j][i];
}/*if(i!
=target)*/
}/*for(j=0;
j++)*/
}/*for(i=0;
i++)*/
/*Savethecombinationofbestattributeandthresholdvalue*/
idx=split;
threshold=best_threshold;
*Ifthenegentropyroutinefounditselfatanend-of-branch
*forthedecisiontree,the'
status'
flagin'
negentropy_struct'
*issettoONorOFFandthenodelabelledaccordingly.Otherwise,
*ID3continuestocallitselfuntilallend-of-branchnodesare
*found.
if
(negentropy_struct.status!
=INACTIVE)
on=node->
off=NULL;
idx=negentropy_struct.status;
on
=ID3(matrix,node,target,ON);
off=ID3(matrix,node,target,OFF);
returnnode;
voidmain(intargv,char*argc[])
MATRIX*matrix;
UINTtarget,n_vars,n_samples;
CHARdata_file[13],tag_file[13];
/*LongestfilenameinDOS*/
CHAR**tag_names;
/*Setupfilenames*/
if(argv!
=2)
\nUsage:
id3[datafile]"
\nWelcometoID3"
\nLastcompiledon%s,%s"
__DATE__,__TIME__);
strcpy(data_file,argc[1]);
strcpy(tag_file,
argc[1]);
strcat(data_file,"
.dat"
strcat(tag_file,
"
.tag"
/*Readdimensionsofdatafile*/
file_size(data_file,&
n_vars,&
n_samples);
/*Readlabelsforcolumnsofdata*/
tag_names=read_tags(tag_file,n_vars);
/*Allocatestoragefordata...*/
matrix=build_matrix(n_vars,n_samples);
/*...andreaditfromdisk*/
read_matrix(data_file,matrix);
/*Classificationtargetislastcolumn*/
target=n_vars-1;
/*Returnrootofdecisiontree-ID3continuestocallitself
recursively*/
node=ID3(matrix,NULL,target,0);
print_tree(node,tag_names);
free_tags(tag_names,n_vars);
free_matrix(matrix);
free_tree(node);
NEGENTROPYnegentropy(REAL**data,
UINT
n_samples,
NODE
*local,
target)
*Calculatestheentropyofclassif
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- ID3 算法 源程序