分布式计算、云计算与大数据 第2版 课件汇总 第6--11章 云存储技术--- 保险大数据分析案例 .ppt
基于MAPREDUCE程序实例(HDFS)基于新API的WordCount分析基于MAPREDUCE程序实例(HDFS)基于新API的WordCount分析1.源代码程序publicclassWordCount{??publicstaticclassTokenizerMapper??extendsMapperObject,Text,Text,IntWritable{????privatefinalstaticIntWritableone=newIntWritable(1);????privateTextword=newText();????publicvoidmap(Objectkey,Textvalue,Contextcontext)????throwsIOException,InterruptedException{??????StringTokenizeritr=newStringTokenizer(value.toString());??????while(itr.hasMoreTokens()){????????this.word.set(itr.nextToken());????????context.write(this.word,one);??????}????}??}??publicstaticclassIntSumReducer??extendsReducerText,IntWritable,Text,IntWritable{????privateIntWritableresult=newIntWritable();????publicvoidreduce(Textkey,IterableIntWritablevalues,Contextcontext)????throwsIOException,InterruptedException{??????intsum=0;??????for(Iteratori=values.iterator();i.hasNext();sum+=val.get()){????????val=(IntWritable)i.next();??????}????????this.result.set(sum);??????context.write(key,this.result);????}??}基于MAPREDUCE程序实例(HDFS)基于新API的WordCount分析1.源代码程序??publicstaticvoidmain(String[]args)throwsIOException,ClassNotFoundException,InterruptedException{??Configurationconf=newConfiguration();??String[]otherArgs=newGenericOptionsParser(conf,args).getRemainingArgs();??if(otherArgs.length!=2){????System.err.println(Usage:wordcountinout);????System.exit(2);??}??Jobjob=Job.getInstance(conf,wordcount);??job.setJarByClass(WordCount.class);??job.setMapperClass(WordCount.TokenizerMapper.class);??job.setCombinerClass(WordCount.IntSumReducer.class);??job.setReducerClass(WordCount.IntSumReducer.class);??job.setOutputKeyClass(Text.class);??job.setOutputValueClass(IntWritable.class);??FileInputFormat.addInpu