HITS算法的二部图实现.docx
- 文档编号:7403513
- 上传时间:2023-01-23
- 格式:DOCX
- 页数:18
- 大小:20.30KB
HITS算法的二部图实现.docx
《HITS算法的二部图实现.docx》由会员分享,可在线阅读,更多相关《HITS算法的二部图实现.docx(18页珍藏版)》请在冰豆网上搜索。
HITS算法的二部图实现
packageextrcting;
importjava.io.*;
importjava.util.ArrayList;
importjava.util.HashMap;
importjava.util.HashSet;
importjava.util.Iterator;
publicclassHITS
{
//publicArrayList
publicHashMap
publicHashMap
publicHashMap
publicHashMap
publicHashMap
publicHashMap
publicHashSet
publicHashMap
publicHashMap
publicHashMap
publicHashMap
publicfinalStringdata="20110317";
publicIntegernounCount=0;
publicIntegeremoCount=0;
publicIntegernounCountEmo=0;
publicIntegeremoCountEmo=0;
publicintrowNumber;//所有行
publicintrowNumberEmo=0;;//所有含有情感词的行
publicHITS()throwsIOException//构造函数
{
BufferedReaderreader=newBufferedReader(newInputStreamReader(newFileInputStream("Lexicon.txt")));
Stringwords="";
rowNumber=0;
Lexicon=newHashSet
authScore=newHashMap
centerScore=newHashMap
graphFacet=newHashMap
graphEmo=newHashMap
wordLex=newHashMap
facetScoreA=newHashMap
facetScoreC=newHashMap
emoScoreA=newHashMap
emoScoreC=newHashMap
while((words=reader.readLine())!
=null)
{
Lexicon.add(words+"/n");
}
extract();
//doHITS();
//finalScore();
//doubleaverageNoun=nounCount/(double)rowNumber;
//doubleaverageEmo=emoCount/(double)rowNumber;
//doubleaverageNounEmo=nounCountEmo/(double)rowNumberEmo;
//doubleaverageEmoEmo=emoCountEmo/(double)rowNumberEmo;
//System.out.println("名词个数"+nounCount);
//System.out.println("情感词个数"+emoCount);
//System.out.println("平均名词个数"+averageNoun);
//System.out.println("平均情感词个数"+averageEmo);
//System.out.println("所有行数"+rowNumber);
//System.out.println("有情感的行数"+rowNumberEmo);
//System.out.println("情感句平均名词个数"+averageNounEmo);
//System.out.println("情感句平均情感词个数"+averageEmoEmo);
}
publicvoiddoHITS()throwsIOException
{
intiterOver=0;//代表迭代结束
finalintiterTime=1000;//迭代次数
intcount=0;//计数,迭代次数
//Filelog=newFile("E:
\\HITSlog");//日志主要是查看盐的相关信息的
//if(log.exists())
//{
//log.delete();
//}
//log.createNewFile();
//FileWriterwrite=newFileWriter(log);
//BufferedWriterbw=newBufferedWriter(write);
intcodeNumber=authScore.size();
//while(iterOver!
=1)
while(iterOver!
=1&&count { doubleauthMax=.0; doublecenterMax=.0;//新的归一化算法 doubleauthSum=.0; doublecenterSum=.0;//用于归一化 HashMap //做实验表明,必须new,不new存的是地址 HashMap System.out.println("第"+count+"迭代"+"节点数"+codeNumber+"个"); IteratoriteratorFacet=graphFacet.keySet().iterator();//对属性部分遍历 while(iteratorFacet.hasNext()) { StringsFacet=""; sFacet=(String)iteratorFacet.next(); ArrayList DoubleauthScoreArray=.0; DoublecenterScoreArray=.0; for(inti=0;i /* *每个点的中心度是各个点的权威度之和 *每个点的权威度是各个点的中心度之和 */ { //if(sFacet.equals("盐/n")) //{ //bw.write(iterEmoArray.get(i)); //bw.newLine(); //bw.flush(); //} authScoreArray=authScoreArray+centerScoreLast.get(iterEmoArray.get(i)); authSum=authSum+centerScoreLast.get(iterEmoArray.get(i)); centerScoreArray=centerScoreArray+authScoreLast.get(iterEmoArray.get(i)); centerSum=centerSum+authScoreLast.get(iterEmoArray.get(i)); } authScore.put(sFacet,authScoreArray); centerScore.put(sFacet,centerScoreArray); if(authScoreArray>authMax)//新归一化 { authMax=authScoreArray; } if(centerScoreArray>centerMax) { centerMax=centerScoreArray; } } IteratoriteratorEmo=graphEmo.keySet().iterator();//对情感部分遍历 while(iteratorEmo.hasNext()) { StringsEmo=""; sEmo=(String)iteratorEmo.next(); ArrayList DoubleauthScoreArray=.0; DoublecenterScoreArray=.0; for(inti=0;i /* *每个点的中心度是各个点的权威度之和 *每个点的权威度是各个点的中心度之和 */ { authScoreArray=authScoreArray+centerScoreLast.get(iterFacetArray.get(i)); authSum=authSum+centerScoreLast.get(iterFacetArray.get(i)); centerScoreArray=centerScoreArray+authScoreLast.get(iterFacetArray.get(i)); authScoreLast.get(iterFacetArray.get(i)); } authScore.put(sEmo,authScoreArray); centerScore.put(sEmo,centerScoreArray); if(authScoreArray>authMax)//新归一化 { authMax=authScoreArray; } if(centerScoreArray>centerMax) { centerMax=centerScoreArray; } } /* *归一化机制 */ IteratoriteratorAuthNorm=authScore.keySet().iterator(); while(iteratorAuthNorm.hasNext()) { StringitFacet=(String)iteratorAuthNorm.next(); //DoubleauthScoreNorm=authScore.get(itFacet)/authSum; DoubleauthScoreNorm=authScore.get(itFacet)/authMax;//新归一化 authScore.put(itFacet,authScoreNorm); } IteratoriteratorCenterNorm=centerScore.keySet().iterator(); while(iteratorCenterNorm.hasNext()) { StringitEmo=(String)iteratorCenterNorm.next(); //DoublecenterScoreNorm=centerScore.get(itEmo)/centerSum; DoublecenterScoreNorm=centerScore.get(itEmo)/centerMax;//新归一化 centerScore.put(itEmo,centerScoreNorm); } /* *归一化机制 */ IteratoriteratorAuth=authScore.keySet().iterator();//遍历中心度 intcountJudge=0; while(iteratorAuth.hasNext()) { Stringword=""; word=(String)iteratorAuth.next(); if(((authScore.get(word)-authScoreLast.get(word)<0.00001)&& (authScore.get(word)-authScoreLast.get(word)>-0.00001)) &&((centerScore.get(word)-centerScoreLast.get(word)<0.00001)&& centerScore.get(word)-centerScoreLast.get(word)>-0.00001)) { countJudge++; } } //System.out.println(authScore); //System.out.println(authScoreLast); //System.out.println("查看迭代终止的另一个条件countjudge"+countJudge); if(countJudge==authScore.size()) { iterOver=1; } count++; } /* *迭代结束后的赋值阶段,最终得分阶段 */ System.out.println(graphFacet); System.out.println(graphEmo); } publicvoidfinalScore() { IteratoriteFacet=graphFacet.keySet().iterator();//把得分乘以出现的次数 while(iteFacet.hasNext()) { Stringfacet=""; facet=(String)iteFacet.next(); doublescoreFA=.0; doublescoreFC=.0; doubletimeF=Math.log10(wordLex.get(facet)); scoreFA=authScore.get(facet)*timeF; facetScoreA.put(facet,scoreFA); scoreFC=centerScore.get(facet)*timeF; facetScoreC.put(facet,scoreFC); } IteratoriteEmo=graphEmo.keySet().iterator(); while(iteEmo.hasNext()) { Stringemo=""; emo=(String)iteEmo.next(); doublescoreEA=.0; doublescoreEC=.0; doubletimeE=Math.log10(wordLex.get(emo)); scoreEA=authScore.get(emo)*timeE; emoScoreA.put(emo,scoreEA); scoreEC=authScore.get(emo)*timeE; emoScoreC.put(emo,scoreEC); } } publicvoidextract()throwsIOException { System.out.println("开始抽取"); StringinputDataPath="E: \\日本地震_2\\corpus_sum_split\\sum_split"+data; //StringinputDataPath="E: \\日本地震_2\\corpus_sum_split\\sum_splitSum"; FileinputFile=newFile(inputDataPath); FileReaderread=newFileReader(inputFile); BufferedReaderre=newBufferedReader(read); Stringrow=""; while((row=re.readLine())! =null) { intemoSig=0; intemoEmoCountRow=0; intemoNounCountRow=0; System.out.println("处理第"+rowNumber+"行"); //System.out.println(row); ArrayList //一句话的信息,每一个Arraylist为子句的信息,就是用,分格出来的字句 for(inti=0;i<50;i++) { sentenceInfo[i]=newArrayList } intcount=0; intcontinueNoun=0;//记录名词后面是否有其他词,如果有的话,就不考虑“的”的问题了。 String[]words=newString[100]; words=row.split(""); for(inti=0;i /* *for循环的作用是遍历每一个词,加入到句子信息中 */ { if(words[i].indexOf("/nl")! =-1||words[i].indexOf("/nr")! =-1 ||words[i].indexOf("/nrf")! =-1||words[i].indexOf("/nrj")! =-1 ||words[i].indexOf("/nr1")! =-1||words[i].indexOf("/nr2")! =-1 ||words[i].indexOf("/nsf")! =-1||words[i].indexOf("/ns")! =-1 ||words[i].indexOf("/nt")! =-1||words[i].indexOf("/nz")! =-1 ||words[i].indexOf("/n")! =-1) /* *ng不要,是两次神马的 */ { if(words[i]! =null) //System.out.println(words[i]); { if(words[i].substring(0,words[i].indexOf("/")).matches("[\u4e00-\u9fa5]+") ||words[i].substring(0,words[i].indexOf("/")).matches("[a-zA-Z]+") ||words[i].substring(0,words[i].indexOf("/")).matches("[0-9]+")) { //if(rowNumber>11183) //{ //System.out.println(words[i]); //System.out.println(row); //System.out.println(count); //Sy
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- HITS 算法 二部 实现
![提示](https://static.bdocx.com/images/bang_tan.gif)