<ruby id="bdb3f"></ruby>

    <p id="bdb3f"><cite id="bdb3f"></cite></p>

      <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
        <p id="bdb3f"><cite id="bdb3f"></cite></p>

          <pre id="bdb3f"></pre>
          <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

          <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
          <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

          <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                <ruby id="bdb3f"></ruby>

                ??碼云GVP開源項目 12k star Uniapp+ElementUI 功能強大 支持多語言、二開方便! 廣告
                [TOC] # 分析 ![](https://box.kancloud.cn/ef92484a827af35468ead934d9977907_301x396.png) # 準備數據 ~~~ hello--a.txt 1 hello--b.txt 2 hello--c.txt 1 allen--b.txt 2 jerry--a.txt 2 allen--a.txt 1 jerry--c.txt 2 ~~~ # 代碼 ~~~ package com.index; import com.folwsum.FlowSumSort; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; public class IndexStepTwo { public static class IndexStepTwoMapper extends Mapper<LongWritable, Text, Text, Text> { Text k = new Text(); Text v = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(" "); String word_file = fields[0]; String count = fields[1]; String[] split = word_file.split("--"); String word = split[0]; String file = split[1]; k.set(word); v.set(file+"--"+count); context.write(k, v); // k hello v a.txt--1 } } public static class IndexStepTwoReduce extends Reducer<Text, Text, Text, Text> { Text v = new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuffer sBuffer = new StringBuffer(); for (Text value : values) { //拼接下格式 sBuffer.append(value.toString()).append(" "); } v.set(sBuffer.toString()); context.write(key, v); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(); job.setJarByClass(IndexStepTwo.class); //告訴程序,我們的程序所用的mapper類和reducer類是什么 job.setMapperClass(IndexStepTwoMapper.class); job.setReducerClass(IndexStepTwoReduce.class); //告訴框架,我們程序輸出的數據類型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //這里可以進行combiner組件的設置 job.setCombinerClass(IndexStepTwoReduce.class); //告訴框架,我們程序使用的數據讀取組件 結果輸出所用的組件是什么 //TextInputFormat是mapreduce程序中內置的一種讀取數據組件 準確的說 叫做 讀取文本文件的輸入組件 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //告訴框架,我們要處理的數據文件在那個路勁下 FileInputFormat.setInputPaths(job, new Path("/Users/jdxia/Desktop/website/hdfs/index/input/")); //如果有這個文件夾就刪除 Path out = new Path("/Users/jdxia/Desktop/website/hdfs/index/output/"); FileSystem fileSystem = FileSystem.get(conf); if (fileSystem.exists(out)) { fileSystem.delete(out, true); } //告訴框架,我們的處理結果要輸出到什么地方 FileOutputFormat.setOutputPath(job, out); boolean res = job.waitForCompletion(true); System.exit(res ? 0 : 1); } } ~~~ # 結果展示 里面的crc是個校驗文件 ~~~ allen a.txt--1 b.txt--2 hello c.txt--1 b.txt--2 a.txt--1 jerry c.txt--2 a.txt--2 ~~~ # 代碼前提 如果準備的數據是這樣 ![](https://box.kancloud.cn/6f75bbd78ca78bd88029d4e9b9ef0eda_169x174.png) 那就要把他先變成這樣 ~~~ hello--a.txt 1 hello--b.txt 2 hello--c.txt 1 allen--b.txt 2 jerry--a.txt 2 allen--a.txt 1 jerry--c.txt 2 ~~~ 代碼 ~~~ public class IndexStepOne { public static class IndexStepOneMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ Text k = new Text(); IntWritable v = new IntWritable(1); @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); FileSplit Split = (FileSplit)context.getInputSplit(); String filename = Split.getPath().getName(); //輸出key :單詞--文件名 value:1 for(String word : words){ k.set(word +"--"+ filename); context.write(k, v); } } } public static class IndexStepOneReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ IntWritable v = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for(IntWritable value : values){ count += value.get(); } v.set(count); context.write(key, v); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(IndexStepOne.class); //告訴程序,我們的程序所用的mapper類和reducer類是什么 job.setMapperClass(IndexStepOneMapper.class); job.setReducerClass(IndexStepOneReducer.class); //告訴框架,我們程序輸出的數據類型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //這里可以進行combiner組件的設置 job.setCombinerClass(IndexStepOneReducer.class); //告訴框架,我們程序使用的數據讀取組件 結果輸出所用的組件是什么 //TextInputFormat是mapreduce程序中內置的一種讀取數據組件 準確的說 叫做 讀取文本文件的輸入組件 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //告訴框架,我們要處理的數據文件在那個路勁下 FileInputFormat.setInputPaths(job, new Path("D:/index/input")); //告訴框架,我們的處理結果要輸出到什么地方 FileOutputFormat.setOutputPath(job, new Path("D:/index/output-1")); boolean res = job.waitForCompletion(true); System.exit(res?0:1); } } ~~~
                  <ruby id="bdb3f"></ruby>

                  <p id="bdb3f"><cite id="bdb3f"></cite></p>

                    <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
                      <p id="bdb3f"><cite id="bdb3f"></cite></p>

                        <pre id="bdb3f"></pre>
                        <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

                        <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
                        <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

                        <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                              <ruby id="bdb3f"></ruby>

                              哎呀哎呀视频在线观看