书签分享收藏举报版权申诉 / 13

立即下载加入VIP,免费下载

当前位置：首页 > 求职职场 > 职业规划 > educoder平台HDFS和MapReduce综合实验.docx

educoder平台HDFS和MapReduce综合实验.docx

文档编号：23446614
上传时间：2023-05-17
格式：DOCX
页数：13
大小：16.85KB

《educoder平台HDFS和MapReduce综合实验.docx》由会员分享，可在线阅读，更多相关《educoder平台HDFS和MapReduce综合实验.docx（13页珍藏版）》请在冰豆网上搜索。

educoder平台HDFS和MapReduce综合实验.docx

educoder平台HDFS和MapReduce综合实验

=========================第一关=======================

第1关：

实践题已完成

WordCount词频统计

importjava.io.IOException;

importjava.util.StringTokenizer;

importorg.apache.hadoop.conf.Configuration;

importorg.apache.hadoop.fs.Path;

importorg.apache.hadoop.io.IntWritable;

importorg.apache.hadoop.io.Text;

importorg.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.Mapper;

importorg.apache.hadoop.mapreduce.Reducer;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.util.GenericOptionsParser;

publicclassWordCount{

publicstaticclassTokenizerMapper

extendsMapper{

privatefinalstaticIntWritableone=newIntWritable

（1）;

privateTextword=newText（）;

publicvoidmap（Objectkey,Textvalue,Contextcontext

）throwsIOException,InterruptedException{

StringTokenizeritr=newStringTokenizer（value.toString（））;

while（itr.hasMoreTokens（））{

word.set（itr.nextToken（））;

context.write（word,one）;

}

publicstaticclassIntSumReducer

extendsReducer{

privateIntWritableresult=newIntWritable（）;

publicvoidreduce（Textkey,Iterablevalues,

Contextcontext

）throwsIOException,InterruptedException{

intsum=0;

for（IntWritableval:

values）{

sum+=val.get（）;

}

result.set（sum）;

context.write（key,result）;

}

publicstaticvoidmain（String[]args）throwsException{

Configurationconf=newConfiguration（）;

String[]otherArgs=newGenericOptionsParser（conf,args）.getRemainingArgs（）;

if（otherArgs.length!

=2）{

System.err.println（"Usage:

wordcount"）;

System.exit

（2）;

}

Jobjob=newJob（conf,"wordcount"）;

job.setJarByClass（WordCount.class）;

job.setMapperClass（TokenizerMapper.class）;

job.setCombinerClass（IntSumReducer.class）;

job.setReducerClass（IntSumReducer.class）;

job.setOutputKeyClass（Text.class）;

job.setOutputValueClass（IntWritable.class）;

FileInputFormat.addInputPath（job,newPath（otherArgs[0]））;

FileOutputFormat.setOutputPath（job,newPath（otherArgs[1]））;

System.exit（job.waitForCompletion（true）?

0:

1）;

}

=========================第二关=======================

第2关：

实践题已完成

HDFS文件读写

importjava.io.IOException;

importjava.sql.Date;

importorg.apache.hadoop.conf.Configuration;

importorg.apache.hadoop.fs.FSDataInputStream;

importorg.apache.hadoop.fs.FSDataOutputStream;

importorg.apache.hadoop.fs.FileStatus;

importorg.apache.hadoop.fs.FileSystem;

importorg.apache.hadoop.fs.Path;

publicclasshdfs{

publicstaticvoidmain（String[]args）throwsIOException{

Configurationconf=newConfiguration（）;

FileSystemfs=FileSystem.get（conf）;

System.out.println（fs.getUri（））;

Pathfile=newPath（"/user/hadoop/myfile"）;

if（fs.exists（file））{

System.out.println（"Fileexists."）;

}else

{

FSDataOutputStreamoutStream=fs.create（file）;

outStream.writeUTF（"chinacstorcstorcstorchina"）;

outStream.close（）;

}

FSDataInputStreaminStream=fs.open（file）;

Stringdata=inStream.readUTF（）;

FileSystemhdfs=file.getFileSystem（conf）;

FileStatus[]fileStatus=hdfs.listStatus（file）;

for（FileStatusstatus:

fileStatus）

{

System.out.println（"FileOwer:

"+status.getOwner（））;

System.out.println（"FileReplication:

"+status.getReplication（））;

System.out.println（"FileModificationTime:

"+newDate（status.getModificationTime（）））;

System.out.println（"FileBlockSize:

"+status.getBlockSize（））;

}

System.out.println（data）;

System.out.println（"Filename:

"+file.getName（））;

inStream.close（）;

fs.close（）;

}

=========================第三关=======================

第3关：

实践题已完成

倒排索引

importjava.io.IOException;

importjava.util.HashMap;

importjava.util.Hashtable;

importjava.util.StringTokenizer;

importorg.apache.hadoop.conf.Configuration;

importorg.apache.hadoop.fs.Path;

importorg.apache.hadoop.io.IntWritable;

importorg.apache.hadoop.io.LongWritable;

importorg.apache.hadoop.io.Text;

importorg.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.Mapper;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.mapreduce.lib.input.FileSplit;

importjava.util.Iterator;

importorg.apache.hadoop.mapreduce.Reducer;

importorg.apache.hadoop.util.GenericOptionsParser;

publicclassInvertedIndex{

publicstaticclassInvertedIndexMapperextendsMapper

{

publicvoidmap（LongWritablekey,Textvalue,Contextcontext）

throwsIOException,InterruptedException

{

FileSplitfileSplit=（FileSplit）context.getInputSplit（）;

StringfileName=fileSplit.getPath（）.getName（）;

Stringword;

IntWritablefrequence=newIntWritable（）;

intone=1;

Hashtablehashmap=newHashtable（）;

StringTokenizeritr=newStringTokenizer（value.toString（））;

for（;itr.hasMoreTokens（）;）

{

word=itr.nextToken（）;

if（hashmap.containsKey（word））{

hashmap.put（word,hashmap.get（word）+1）;

}else{

hashmap.put（word,one）;

}

for（Iteratorit=hashmap.keySet（）.iterator（）;it.hasNext（）;）{

word=it.next（）;

frequence=newIntWritable（hashmap.get（word））;

TextfileName_frequence=newText（fileName+"@"+frequence.toString（））;

context.write（newText（word）,fileName_frequence）;

}

publicstaticclassInvertedIndexCombinerextendsReducer{

protectedvoidreduce（Textkey,Iterablevalues,Contextcontext）

throwsIOException,InterruptedException{

StringfileName="";

intsum=0;

Stringnum;

Strings;

for（Textval:

values）{

s=val.toString（）;

fileName=s.substring（0,val.find（"@"））;

num=s.substring（val.find（"@"）+1,val.getLength（））;

sum+=Integer.parseInt（num）;

}

IntWritablefrequence=newIntWritable（sum）;

context.write（key,newText（fileName+"@"+frequence.toString（）））;

}

publicstaticclassInvertedIndexReducerextendsReducer

{@Override

protectedvoidreduce（Textkey,Iterablevalues,Contextcontext）

throwsIOException,InterruptedException

{Iteratorit=values.iterator（）;

StringBuilderall=newStringBuilder（）;

if（it.hasNext（））all.append（it.next（）.toString（））;

for（;it.hasNext（）;）{

all.append（";"）;

all.append（it.next（）.toString（））;

}

context.write（key,newText（all.toString（）））;

}

publicstaticvoidmain（String[]args）

{

if（args.length!

=2）{

System.err.println（"Usage:

InvertedIndex"）;

System.exit

（2）;

}

try{

Configurationconf=newConfiguration（）;

String[]otherArgs=newGenericOptionsParser（conf,args）.getRemainingArgs（）;

Jobjob=newJob（conf,"invertedindex"）;

job.setJarByClass（InvertedIndex.class）;

job.setMapperClass（InvertedIndexMapper.class）;

job.setCombinerClass（InvertedIndexCombiner.class）;

job.setReducerClass（InvertedIndexReducer.class）;

job.setOutputKeyClass（Text.class）;

job.setOutputValueClass（Text.class）;

FileInputFormat.addInputPath（job,newPath（otherArgs[0]））;

FileOutputFormat.setOutputPath（job,newPath（otherArgs[1]））;

System.exit（job.waitForCompletion（true）?

0:

1）;

}catch（Exceptione）{

e.printStackTrace（）;

}

=========================第四关=======================

第4关：

实践题未完成

网页排序——PageRank算法

importjava.io.IOException;

importjava.text.DecimalFormat;

importjava.text.NumberFormat;

importjava.util.StringTokenizer;

importjava.util.Iterator;

importorg.apache.hadoop.conf.Configuration;

importorg.apache.hadoop.fs.Path;

importorg.apache.hadoop.io.IntWritable;

importorg.apache.hadoop.io.Text;

importorg.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.Mapper;

importorg.apache.hadoop.mapreduce.Reducer;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.util.GenericOptionsParser;

publicclassPageRank{

publicstaticclassMyMapperextendsMapper

{

privateTextid=newText（）;

publicvoidmap（Objectkey,Textvalue,Contextcontext）throwsIOException,InterruptedException

{

Stringline=value.toString（）;

if（line.substring（0,1）.matches（"[0-9]{1}"））

{

booleanflag=false;

if（line.contains（"_"））

{

line=line.replace（"_",""）;

flag=true;

}

String[]values=line.split（"\t"）;

Textt=newText（values[0]）;

String[]vals=values[1].split（""）;

Stringurl="_";

doublepr=0;

inti=0;

intnum=0;

if（flag）

{

i=2;

pr=Double.valueOf（vals[1]）;

num=vals.length-2;

}

else

{

i=1;

pr=Double.valueOf（vals[0]）;

num=vals.length-1;

}

for（;i

{

url=url+vals[i]+"";

id.set（vals[i]）;

Textprt=newText（String.valueOf（pr/num））;

context.write（id,prt）;

}

context.write（t,newText（url））;

}

publicstaticclassMyReducerextendsReducer

{

privateTextresult=newText（）;

privateDoublepr=newDouble（0）;

publicvoidreduce（Textkey,Iterablevalues,Contextcontext）throwsIOException,InterruptedException

{

doublesum=0;

Stringurl="";

for（Textval:

values）

{

if（!

val.toString（）.contains（"_"））

{

sum=sum+Double.valueOf（val.toString（））;

}

else

{

url=val.toString（）;

}

pr=0.15+0.85*sum;

Stringstr=String.format（"%.3f",pr）;

result.set（newText（str+""+url））;

context.write（key,result）;

}

publicstaticvoidmain（String[]args）throwsException

{

Stringpaths="file:

///tmp/input/Wiki0";

Stringpath1=paths;

Stringpath2="";

for（inti=1;i<=20;i++）

{

System.out.println（"Thisisthe"+i+"thjob!

"）;

System.out.println（"path1:

"+path1）;

System.out.println（"path2:

"+path2）;

Configurationconf=newConfiguration（）;

Jobjob=newJob（

文档加载中……请稍候！
如果长时间未打开，您也可以点击刷新试试。

下载文档到电脑，查找使用更方便

下载	加入VIP,免费下载

版权申诉 word格式文档无特别注明外均可编辑修改；预览文档经过压缩，下载后原文更清晰！ 立即下载

配套讲稿：: 如PPT文件的首页显示word图标，表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
特殊限制：: 部分文档作品中含有的国旗、国徽等图片，仅作为作品整体效果示例展示，禁止商用。设计者仅对作品中独创性部分享有著作权。
关键词：: educoder 平台 HDFS MapReduce 综合实验

冰豆网所有资源均是用户自行上传分享，仅供网友学习交流，未经上传用户书面授权，请勿作他用。

关于本文

本文标题：educoder平台HDFS和MapReduce综合实验.docx
链接地址：https://www.bdocx.com/doc/23446614.html

educoder平台HDFS和MapReduce综合实验.docx

热门标签