educoder平台HDFS和MapReduce综合实验.docx
- 文档编号:23446614
- 上传时间:2023-05-17
- 格式:DOCX
- 页数:13
- 大小:16.85KB
educoder平台HDFS和MapReduce综合实验.docx
《educoder平台HDFS和MapReduce综合实验.docx》由会员分享,可在线阅读,更多相关《educoder平台HDFS和MapReduce综合实验.docx(13页珍藏版)》请在冰豆网上搜索。
educoder平台HDFS和MapReduce综合实验
=========================第一关=======================
第1关:
实践题已完成
WordCount词频统计
importjava.io.IOException;
importjava.util.StringTokenizer;
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.mapreduce.Job;
importorg.apache.hadoop.mapreduce.Mapper;
importorg.apache.hadoop.mapreduce.Reducer;
importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;
importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
importorg.apache.hadoop.util.GenericOptionsParser;
publicclassWordCount{
publicstaticclassTokenizerMapper
extendsMapper
privatefinalstaticIntWritableone=newIntWritable
(1);
privateTextword=newText();
publicvoidmap(Objectkey,Textvalue,Contextcontext
)throwsIOException,InterruptedException{
StringTokenizeritr=newStringTokenizer(value.toString());
while(itr.hasMoreTokens()){
word.set(itr.nextToken());
context.write(word,one);
}
}
}
publicstaticclassIntSumReducer
extendsReducer
privateIntWritableresult=newIntWritable();
publicvoidreduce(Textkey,Iterable
Contextcontext
)throwsIOException,InterruptedException{
intsum=0;
for(IntWritableval:
values){
sum+=val.get();
}
result.set(sum);
context.write(key,result);
}
}
publicstaticvoidmain(String[]args)throwsException{
Configurationconf=newConfiguration();
String[]otherArgs=newGenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!
=2){
System.err.println("Usage:
wordcount
System.exit
(2);
}
Jobjob=newJob(conf,"wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,newPath(otherArgs[0]));
FileOutputFormat.setOutputPath(job,newPath(otherArgs[1]));
System.exit(job.waitForCompletion(true)?
0:
1);
}
}
=========================第二关=======================
第2关:
实践题已完成
HDFS文件读写
importjava.io.IOException;
importjava.sql.Date;
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.fs.FSDataInputStream;
importorg.apache.hadoop.fs.FSDataOutputStream;
importorg.apache.hadoop.fs.FileStatus;
importorg.apache.hadoop.fs.FileSystem;
importorg.apache.hadoop.fs.Path;
publicclasshdfs{
publicstaticvoidmain(String[]args)throwsIOException{
Configurationconf=newConfiguration();
FileSystemfs=FileSystem.get(conf);
System.out.println(fs.getUri());
Pathfile=newPath("/user/hadoop/myfile");
if(fs.exists(file)){
System.out.println("Fileexists.");
}else
{
FSDataOutputStreamoutStream=fs.create(file);
outStream.writeUTF("chinacstorcstorcstorchina");
outStream.close();
}
FSDataInputStreaminStream=fs.open(file);
Stringdata=inStream.readUTF();
FileSystemhdfs=file.getFileSystem(conf);
FileStatus[]fileStatus=hdfs.listStatus(file);
for(FileStatusstatus:
fileStatus)
{
System.out.println("FileOwer:
"+status.getOwner());
System.out.println("FileReplication:
"+status.getReplication());
System.out.println("FileModificationTime:
"+newDate(status.getModificationTime()));
System.out.println("FileBlockSize:
"+status.getBlockSize());
}
System.out.println(data);
System.out.println("Filename:
"+file.getName());
inStream.close();
fs.close();
}
}
=========================第三关=======================
第3关:
实践题已完成
倒排索引
importjava.io.IOException;
importjava.util.HashMap;
importjava.util.Hashtable;
importjava.util.StringTokenizer;
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.LongWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.mapreduce.Job;
importorg.apache.hadoop.mapreduce.Mapper;
importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;
importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
importorg.apache.hadoop.mapreduce.lib.input.FileSplit;
importjava.util.Iterator;
importorg.apache.hadoop.mapreduce.Reducer;
importorg.apache.hadoop.util.GenericOptionsParser;
publicclassInvertedIndex{
publicstaticclassInvertedIndexMapperextendsMapper
{
publicvoidmap(LongWritablekey,Textvalue,Contextcontext)
throwsIOException,InterruptedException
{
FileSplitfileSplit=(FileSplit)context.getInputSplit();
StringfileName=fileSplit.getPath().getName();
Stringword;
IntWritablefrequence=newIntWritable();
intone=1;
Hashtable
StringTokenizeritr=newStringTokenizer(value.toString());
for(;itr.hasMoreTokens();)
{
word=itr.nextToken();
if(hashmap.containsKey(word)){
hashmap.put(word,hashmap.get(word)+1);
}else{
hashmap.put(word,one);
}
}
for(Iterator
word=it.next();
frequence=newIntWritable(hashmap.get(word));
TextfileName_frequence=newText(fileName+"@"+frequence.toString());
context.write(newText(word),fileName_frequence);
}
}
}
publicstaticclassInvertedIndexCombinerextendsReducer
protectedvoidreduce(Textkey,Iterable
throwsIOException,InterruptedException{
StringfileName="";
intsum=0;
Stringnum;
Strings;
for(Textval:
values){
s=val.toString();
fileName=s.substring(0,val.find("@"));
num=s.substring(val.find("@")+1,val.getLength());
sum+=Integer.parseInt(num);
}
IntWritablefrequence=newIntWritable(sum);
context.write(key,newText(fileName+"@"+frequence.toString()));
}
}
publicstaticclassInvertedIndexReducerextendsReducer
{@Override
protectedvoidreduce(Textkey,Iterable
throwsIOException,InterruptedException
{Iterator
StringBuilderall=newStringBuilder();
if(it.hasNext())all.append(it.next().toString());
for(;it.hasNext();){
all.append(";");
all.append(it.next().toString());
}
context.write(key,newText(all.toString()));
}
}
publicstaticvoidmain(String[]args)
{
if(args.length!
=2){
System.err.println("Usage:
InvertedIndex
System.exit
(2);
}
try{
Configurationconf=newConfiguration();
String[]otherArgs=newGenericOptionsParser(conf,args).getRemainingArgs();
Jobjob=newJob(conf,"invertedindex");
job.setJarByClass(InvertedIndex.class);
job.setMapperClass(InvertedIndexMapper.class);
job.setCombinerClass(InvertedIndexCombiner.class);
job.setReducerClass(InvertedIndexReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job,newPath(otherArgs[0]));
FileOutputFormat.setOutputPath(job,newPath(otherArgs[1]));
System.exit(job.waitForCompletion(true)?
0:
1);
}catch(Exceptione){
e.printStackTrace();
}
}
}
=========================第四关=======================
第4关:
实践题未完成
网页排序——PageRank算法
importjava.io.IOException;
importjava.text.DecimalFormat;
importjava.text.NumberFormat;
importjava.util.StringTokenizer;
importjava.util.Iterator;
importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
importorg.apache.hadoop.io.Text;
importorg.apache.hadoop.mapreduce.Job;
importorg.apache.hadoop.mapreduce.Mapper;
importorg.apache.hadoop.mapreduce.Reducer;
importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;
importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
importorg.apache.hadoop.util.GenericOptionsParser;
publicclassPageRank{
publicstaticclassMyMapperextendsMapper
{
privateTextid=newText();
publicvoidmap(Objectkey,Textvalue,Contextcontext)throwsIOException,InterruptedException
{
Stringline=value.toString();
if(line.substring(0,1).matches("[0-9]{1}"))
{
booleanflag=false;
if(line.contains("_"))
{
line=line.replace("_","");
flag=true;
}
String[]values=line.split("\t");
Textt=newText(values[0]);
String[]vals=values[1].split("");
Stringurl="_";
doublepr=0;
inti=0;
intnum=0;
if(flag)
{
i=2;
pr=Double.valueOf(vals[1]);
num=vals.length-2;
}
else
{
i=1;
pr=Double.valueOf(vals[0]);
num=vals.length-1;
}
for(;i { url=url+vals[i]+""; id.set(vals[i]); Textprt=newText(String.valueOf(pr/num)); context.write(id,prt); } context.write(t,newText(url)); } } } publicstaticclassMyReducerextendsReducer { privateTextresult=newText(); privateDoublepr=newDouble(0); publicvoidreduce(Textkey,Iterable { doublesum=0; Stringurl=""; for(Textval: values) { if(! val.toString().contains("_")) { sum=sum+Double.valueOf(val.toString()); } else { url=val.toString(); } } pr=0.15+0.85*sum; Stringstr=String.format("%.3f",pr); result.set(newText(str+""+url)); context.write(key,result); } } publicstaticvoidmain(String[]args)throwsException { Stringpaths="file: ///tmp/input/Wiki0"; Stringpath1=paths; Stringpath2=""; for(inti=1;i<=20;i++) { System.out.println("Thisisthe"+i+"thjob! "); System.out.println("path1: "+path1); System.out.println("path2: "+path2); Configurationconf=newConfiguration(); Jobjob=newJob(
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- educoder 平台 HDFS MapReduce 综合 实验