Hadoop之javaAPI写HDFS的shell命令

⼀. IDE远程管理HDFS

1. 概述

很多时候,我们都是在IDE开发⼯具中,通过Java、Python等代码来对Hadoop进⾏操作。接下

来,我们来学习如何通过IDE⼯具,实现对HDFS的操作。

2. 添加依赖

我们添加的Hadoop相关依赖包版本,尽量与⾃⼰的Hadoop服务器版本⼀致,具体代码可以见他人文章

3. 获取⽂件系统对象

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;

public class testGetFileSystem {
    
    public void testGetFileSystem() throws IOException {
        //1.创建配置对象,⽤于加载配置信息(四个默认的配置⽂件:core-default.xml,hdfs-default.xml
        //,mapred-default.xml,yarn-default.xml)
        Configuration conf = new Configuration();
        //2.修改fs.defaultFS属性的值
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        //3.使⽤FileSystem类的静态⽅法get(Configuration conf),返回fs.defaultFS定义的⽂件系统
        FileSystem fs = FileSystem.get(conf);
        System.out.println("⽂件系统对象的类型名:"+fs.getClass().getName());
    }
    
    public static void main(String[] args) throws IOException {
        testGetFileSystem test = new testGetFileSystem();
        test.testGetFileSystem();
    }
}

4. ⽂件上传

fs.copyFromLocalFile(localPath,hdfsPath);

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;

public class testFileUpload {
    
    public void testFileUpload() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        //将本地的⼀个⽂件E:/file1,上传到HDFS上 /file1
        //1.使⽤Path描述两个⽂件
        Path localPath = new Path("F:/file1.txt");
        Path hdfsPath = new Path("/file1");
        //2.调⽤上传⽅法
        fs.copyFromLocalFile(localPath,hdfsPath);
        //3.关闭
        fs.close();
        System.out.println("上传成功");
    }
    
    public static void main(String[] args) throws IOException {
        testFileUpload test = new testFileUpload();
        test.testFileUpload();
    }
}

5. ⽂件下载

fs.copyToLocalFile(hdfsfile,local);

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;

public class testFileDownload {
    
    public void testFileDownload() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        //从HDFS上下载⼀个⽂件/file1,下载到本地 F:/file2
        //1.使⽤Path描述两个⽂件
        Path hdfsfile = new Path("/file1");
        Path local = new Path("F:/file2.txt");
        //2.调⽤下载⽅法进⾏下载
        fs.copyToLocalFile(hdfsfile,local);
        fs.close();
        System.out.println("下载成功");
    }
    
    public static void main(String[] args) throws IOException {
        testFileDownload test = new testFileDownload();
        test.testFileDownload();
    }
}

6. 创建⽬录

fs.mkdirs(hdfsfile);

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;

public class testMkdir {
    
    public void testMkdir() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        
        //1.测试创建⽬录,描述⼀个⽬录
        Path hdfsfile = new Path("/dir1");
        //2.调⽤创建⽬录的⽅法
        fs.mkdirs(hdfsfile);
        fs.close();
        System.out.println("创建成功");
    }
    
    public static void main(String[] args) throws IOException {
        testMkdir test = new testMkdir();
        test.testMkdir();
    }
}

7. 删除⽬录

fs.delete(hdfsfile,true);

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;

public class testDelete {
    
    public void testDelete() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        
        //1.测试删除⽬录,描述⼀个⽬录
        Path hdfsfile = new Path("/dir1");
        //2.调⽤创建⽬录的⽅法
        fs.delete(hdfsfile,true);
        fs.close();
        System.out.println("删除成功");
    }
    
    public static void main(String[] args) throws IOException {
        testDelete test = new testDelete();
        test.testDelete();
    }
}

8. ⽂件重命名

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;

public class testRename {
    
    public void testRename() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        
        //1.测试重命名,将file1改为file01
        Path oldName = new Path("/file1");
        Path newName = new Path("/file01");
        //2.调⽤重命名⽅法
        fs.rename(oldName,newName);
        fs.close();
        System.out.println("命名成功");
    }
    
    public static void main(String[] args) throws IOException {
        testRename test = new testRename();
        test.testRename();
    }
}

9. IOUtil上传⽂件

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;

public class testPutFile { // 类名改为大写
    
    public void uploadFileToHdfs() throws IOException {
        // 设置HDFS操作用户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.215.130:9820"); // 修正配置项名称
        
        // 使用try-with-resources自动管理资源
        try (FileSystem fs = FileSystem.get(conf);
             InputStream input = Files.newInputStream(Paths.get("F://a.txt"));
             FSDataOutputStream out = fs.create(new Path("/gg.txt"))) {
            
            // IO流拷贝
            IOUtils.copyBytes(input, out, 4096, false);
            System.out.println("上传完毕");
        }
    }
    
    public static void main(String[] args) {
        testPutFile uploader = new testPutFile();
        try {
            uploader.uploadFileToHdfs();
        } catch (IOException e) {
            System.err.println("文件上传失败: " + e.getMessage());
            e.printStackTrace();
        }
    }
}

10. IOUtil下载⽂件

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;

public class testGetFile { // 类名改为大写
    
    public void testGetFile() throws IOException {
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.215.130:9820"); // 可选:使用配置方式
        
        try (FileSystem fs = FileSystem.get(conf); // 使用配置获取FileSystem
            FSDataInputStream input = fs.open(new Path("/file01"));
            var output = Files.newOutputStream(Paths.get("F://gg.txt"))) {
            
            IOUtils.copyBytes(input, output, conf);
            System.out.println("下载完成");
        }
    }
        
    public static void main(String[] args) {
        testGetFile test = new testGetFile();
        try {
            test.testGetFile();
        } catch (IOException e) {
            System.err.println("文件下载失败: " + e.getMessage());
            e.printStackTrace();
        }
    }
}

11. 查看⽂件状态

java 复制代码
package org.shell;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.BlockLocation;
import java.io.IOException;
import java.util.Arrays;

public class testFileStatus {
    
    public void testFileStatus() throws IOException {
        // 在修改HDFS的⽂件的时候,如果出现权限不⾜的情况,可以修改操作HDFS的⽤户
        System.setProperty("HADOOP_USER_NAME", "root");
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.215.130:9820");
        FileSystem fs = FileSystem.get(conf);
        //1.描述你要读取的⽂件 /file02
        Path path = new Path("/file01");
        //获取⽂件的状态信息
        RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(path);
        while(it.hasNext()){
            // 取出对象
            LocatedFileStatus status = it.next();
            System.out.println("name:"+status.getPath());
            //获取位置
            BlockLocation[] locate = status.getBlockLocations();
            for(BlockLocation bl:locate){
                System.out.println("当前块的所有副本位置:"+Arrays.toString(bl.getHosts()));
                System.out.println("当前块⼤⼩:"+bl.getLength());
                System.out.println("当前块的副本的ip地址信息:"+ Arrays.toString(bl.getNames()));
            }
            System.out.println("系统的块⼤⼩:"+status.getBlockSize());
            System.out.println("⽂件总⻓度:"+status.getLen());
        }
    }
    
    public static void main(String[] args) throws IOException {
        testFileStatus test = new testFileStatus();
        test.testFileStatus();
    }
}
相关推荐
泰克教育官方账号1 小时前
泰涨知识 | 10分钟快速入门Hive之基本操作篇
数据仓库·hive·hadoop
天下无敌笨笨熊1 小时前
ES作为向量库研究
大数据·python·elasticsearch
paperxie_xiexuo4 小时前
如何用自然语言生成科研图表?深度体验PaperXie AI科研绘图模块在流程图、机制图与结构图场景下的实际应用效果
大数据·人工智能·流程图·大学生
Mr_sun.4 小时前
Day07——RabbitMQ-高级
分布式·rabbitmq
Qiuner6 小时前
Spring Boot 配置文件高级实战指南 热更新/动态配置/安全加密/分布式同步/环境变量注入
spring boot·分布式·安全
旗讯数字6 小时前
旗讯 OCR 技术解析:金融行业手写表格识别方案与系统集成实践
大数据·金融·ocr
Just_Do_IT_OK6 小时前
Docker--Apache/hadoop
hadoop·docker·apache