hadoop 总结 - 技术栈

1.hadoop 配置文件 core-site hdfs-site yarn-site.xml worker

基本架构 jobMannager resourceManager TaskMananger 一些流程

2.hadoop 命令行操作

hdfs dfs -put [-f] [-p] <localsrc> ... <dst>
hdfs dfs -get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>
hadoop hdfs dfs --put [本地目录] [hadoop目录] 
hadoop fs -mkdir -p < hdfs dir >

3.hadoop java 操作

Mapper，Reducer，InputFormat OutPutFormat Comparator Partition Comperess

bash 复制代码

public class WordCountMapper  extends Mapper<LongWritable,Text,Text,LongWritable> {

    /**
     * 初始化
     *
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        super.setup(context);
    }

    /**
     *
     * 用户业务
     *
     * @param key
     * @param value
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String str = value.toString();
        String [] words  = StringUtils.split(str);
        for(String word:words){
            context.write(new Text(word),new LongWritable(1));
        }
    }

    /**
     * 清理资源
     *
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        super.cleanup(context);
    }
}

bash 复制代码

public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long count =0;
        for(LongWritable value:values){
            count += value.get();
        }
        context.write(key,new LongWritable(count));
    }
}

bash 复制代码

public class WordCountDriver {
    public static void main(String[] args) {
        Configuration config = new Configuration();
        System.setProperty("HADOOP_USER_NAME", "xiemeng");
        config.set("fs.defaultFS","hdfs://192.168.64.128:9870");
        config.set("mapreduce.framework.name","yarn");
        config.set("yarn.resourcemanager.hostname","192.168.64.128");
        config.set("mapreduce.app-submission.cross-platform", "true");
        config.set("mapreduce.job.jar","file:/D:/code/hadoop-start-demo/target/hadoop-start-demo-1.0-SNAPSHOT.jar");
        try {
            Job job = Job.getInstance(config);
            job.setJarByClass(WordCountDriver.class);
            job.setMapperClass(WordCountMapper.class);
            job.setCombinerClass(WordCountCombiner.class);
            job.setReducerClass(WordCountReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);


            FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
            FileOutputFormat.setOutputPath(job,new Path("/wordcount2/output"));

            instance.setGroupingComparatorClass(OrderGroupintComparator.class);

            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);


            boolean complete = job.waitForCompletion(true);
            System.exit(complete ? 0:1);
        } catch (Exception e) {
            e.printStackTrace();
        }   
}

bash 复制代码

public class OrderGroupintComparator extends WritableComparator {

    public OrderGroupintComparator() {
        super(OrderBean.class,true);
    }

    @Override
    public int compare(Object o1, Object o2) {
        OrderBean orderBean = (OrderBean) o1;
        OrderBean orderBean2 = (OrderBean)o2;
        if(orderBean.getOrderId() > orderBean2.getOrderId()){
            return 1;
        }else if(orderBean.getOrderId() < orderBean2.getOrderId()){
            return -1;
        }else {
            return 0;
        }
    }
}

bash 复制代码

public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable> {
    @Override
    public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        CustomWriter customWriter = new CustomWriter(taskAttemptContext);
        return customWriter;
    }

    protected static class CustomWriter extends RecordWriter<Text, NullWritable> {

        private FileSystem fs;

        private FSDataOutputStream fos;

        private TaskAttemptContext context;

        public CustomWriter(TaskAttemptContext context) {
            this.context = context;
        }

        @Override
        public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
            fs = FileSystem.get(context.getConfiguration());
            String key = text.toString();
            Path path = null;
            if (StringUtils.startsWith(key, "137")) {
                path = new Path("file:/D:/hadoop/output/format/out/137/");
            } else {
                path = new Path("file:/D:/hadoop/output/format/out/138/");
            }
            fos = fs.create(path,true);
            byte[] bys = new byte[text.getLength()];
            fos.write(text.toString().getBytes());
        }

        @Override
        public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            IOUtils.closeQuietly(fos);
            IOUtils.closeQuietly(fs);
        }
    }
}

bash 复制代码

public class WholeFileInputFormat extends FileInputFormat<Text, BytesWritable> {

    @Override
    protected boolean isSplitable(JobContext context, Path filename) {
        return false;
    }

    @Override
    public RecordReader<Text, BytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        WholeRecordReader reader  = new WholeRecordReader();
        reader.initialize(inputSplit, taskAttemptContext);
        return reader;
    }
}

bash 复制代码

@Data
public class FlowBeanObj  implements Writable, WritableComparable<FlowBeanObj> {

    private long upFlow;

    private long downFlow;

    private long sumFlow;

    @Override
    public int compareTo(FlowBeanObj o) {
       if(o.getSumFlow() > this.getSumFlow()){
           return -1;
       }else if(o.getSumFlow() < this.getSumFlow()){
           return 1;
       }else {
           return 0;
       }
    }
}

bash 复制代码

public class WholeRecordReader extends RecordReader<Text, BytesWritable> {

    private Configuration config;

    private FileSplit fileSplit;

    private boolean isProgress = true;

    private BytesWritable value = new BytesWritable();

    private Text k = new Text();

    private FileSystem fs;

    private FSDataInputStream fis;

    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
        fileSplit = (FileSplit) inputSplit;
        this.config = context.getConfiguration();
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        try {
            if (isProgress) {
                byte[] contents = new byte[(int) fileSplit.getLength()];
                Path path = fileSplit.getPath();
                fs = path.getFileSystem(config);
                fis = fs.open(path);
                IOUtils.readFully(fis,contents, 0,contents.length);
                value.set(contents, 0, contents.length);
                k.set(fileSplit.getPath().toString());
                isProgress = false;
                return true;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            IOUtils.closeQuietly(fis);
        }
        return false;
    }

    @Override
    public Text getCurrentKey() throws IOException, InterruptedException {
        return k;
    }

    @Override
    public BytesWritable getCurrentValue() throws IOException, InterruptedException {
        return value;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        return 0;
    }

    @Override
    public void close() throws IOException {
        fis.close();
        fs.close();
    }
}

bash 复制代码

public class HdfsClient {
    public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
        Configuration config = new Configuration();
        config.set("fs.defaultFS","hdfs://localhost:9000");
        config.set("dfs.replication","2");
        FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"),config,"xieme");
        fs.mkdirs(new Path("/hive3"));
        fs.copyFromLocalFile(new Path("file:/d:/elasticsearch.txt") ,new Path("/hive3"));
        fs.copyToLocalFile(false,new Path("/hive3/elasticsearch.txt"), new Path("file:/d:/hive3/elasticsearch2.txt"));
        fs.rename(new Path("/hive3/elasticsearch.txt"),new Path("/hive3/elasticsearch2.txt"));
        RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(new Path("/"), true);
        while(locatedFileStatusRemoteIterator.hasNext()){
            LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
            System.out.print(next.getPath().getName()+"\t");
            System.out.print(next.getLen()+"\t");
            System.out.print(next.getGroup()+"\t");
            System.out.print(next.getOwner()+"\t");
            System.out.print(next.getPermission()+"\t");
            System.out.print(next.getPath()+"\t");
            BlockLocation[] blockLocations = next.getBlockLocations();
            for(BlockLocation queue: blockLocations){
                for(String host :queue.getHosts()){
                    System.out.print(host+"\t");
                }
            }
            System.out.println("");
        }*/
        //fs.delete(new Path("/hive3"),true);
        /*FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
        for(FileStatus fileStatus:fileStatuses){
            if(fileStatus.isDirectory()){
                System.out.println(fileStatus.getPath().getName());
            }
        }*/

        // 流copy
        FileInputStream fis = new FileInputStream("d:/elasticsearch.txt");
        FSDataOutputStream fos = fs.create(new Path("/hive/elasticsearch.txt"));
        IOUtils.copyBytes(fis,fos, config);
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);

        FSDataInputStream fis2 = fs.open(new Path("/hive/elasticsearch.txt"));
        FileOutputStream fos2 = new FileOutputStream("d:/elasticsearch.tar.gz.part1");
        fis2.seek(1);
        IOUtils.copyBytes(fis2,fos2,config);
        /*byte [] buf = new byte[1024];
        for(int i=0; i<128;i++){
            while(fis2.read(buf)!=-1){
                fos2.write(buf);
            }
        }*/
        IOUtils.closeStream(fis2);
        IOUtils.closeStream(fos2);
        fs.close();
    }
}

hadoop 优化