Hadoop学习之hdfs的操作
1.将HDFS中的文件复制到本地
java
复制代码
package com.shujia.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class Demo02Download {
FileSystem fileSystem;
// 创建执行对象
// @Before: 前置通知, 在方法执行之前执行
@Before
public void getFileSystem() throws IOException {
Configuration entries = new Configuration();
entries.set("fs.defaultFS", "hdfs://master:9000");
fileSystem = FileSystem.get(entries);
}
// 实现文件复制到本地
// @Test的作用,省略了public static void main(String[] args) {,表示测试类的方法
@Test
public void getData() throws IOException {
String hdfsPath = "/NOTICE.txt";
String localPath = "data/";
// 将HDFS中的文件复制到本地
fileSystem.copyToLocalFile(new Path(hdfsPath),new Path(localPath));
}
// @After: 后置通知, 在方法执行之后执行 。
@After
public void close() throws IOException {
fileSystem.close();
}
}
2.上传数据到HDFS中
java
复制代码
package com.shujia.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Demo04PutData {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
/*
上传数据到HDFS中
*/
putData();
putData2();
}
public static void putData() throws IOException {
// 没有设置用户信息
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(entries);
// 从本地上传文件到HDFS上
fileSystem.copyFromLocalFile(new Path("hadoop/data/students.txt"),new Path("/data/"));
fileSystem.close();
}
public static void putData2() throws IOException, URISyntaxException, InterruptedException {
// 设置了用户信息
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
/*
FileSystem get(final URI uri, final Configuration conf,final String user)
*/
URI uri = new URI("hdfs://master:9000");
// 获取FileSystem的实体类对象(传递uri到get函数中吗,会更改上传到HDFS中文件的用户信息)
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
fileSystem.copyFromLocalFile(new Path("hadoop/data/students.txt"),new Path("/data/"));
fileSystem.close();
}
}
3.在HDFS上创建文件目录
java
复制代码
package com.shujia.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Demo05MakeDir {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
/*
上传数据到HDFS中
*/
mkdir();
}
public static void mkdir() throws IOException, URISyntaxException, InterruptedException {
// 设置了用户信息
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
URI uri = new URI("hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
fileSystem.mkdirs(new Path("/api"));
// fileSystem.mkdirs(new Path("/api/1/2"));
fileSystem.close();
}
}
4.删除HDFS上的文件目录
java
复制代码
package com.shujia.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Demo06Delete {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
delete();
}
public static void delete() throws IOException, URISyntaxException, InterruptedException {
// 设置了用户信息
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
URI uri = new URI("hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
// fileSystem.delete(new Path("/api/1/2"));
//TODO 参数recursive:如果path是一个目录并设置为true,则删除该目录,否则抛出异常。
// 在文件的情况下,递归可以设置为true或false。
fileSystem.delete(new Path("/api"),true);
fileSystem.close();
}
}
5.查看HDFS文件系统中文件和目录的元数据
java
复制代码
package com.shujia.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Demo07Liststatus {
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException {
getBlockLocation();
}
public static void getBlockLocation() throws IOException, URISyntaxException, InterruptedException {
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
URI uri = new URI("hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
FileStatus fileStatus = fileSystem.getFileStatus(new Path("/hadoop-3.1.3.tar.gz"));
System.out.println("路径:"+fileStatus.getPath());
System.out.println("长度:"+fileStatus.getLen());
System.out.println("副本数:"+fileStatus.getReplication());
/*
获取一个文件的文件指定开始和结束的部分数据所在的Block块位置
BlockLocation[] getFileBlockLocations(FileStatus file,
long start, long len)
*/
BlockLocation[] fileBlockLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
for (BlockLocation fileBlockLocation : fileBlockLocations) {
System.out.println("整个长度:"+fileBlockLocation.getLength());
System.out.println("偏移量,从文件的什么位置开始:"+fileBlockLocation.getOffset());
System.out.println("整个主机:"+fileBlockLocation.getHosts());
System.out.println("整个名称:"+fileBlockLocation.getNames());
}
fileSystem.close();
}
public static void getFileStatus() throws IOException, URISyntaxException, InterruptedException {
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
URI uri = new URI("hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
// getFileStatus()获取FileStatus对象
// FileStatus对象封装了文件系统中文件和目录的元数据,包括文件的长度、块大小、备份数、修改时间、所有者以及权限等信息。
FileStatus fileStatus = fileSystem.getFileStatus(new Path("/hadoop-3.1.3.tar.gz"));
System.out.println("路径:"+fileStatus.getPath());
System.out.println("长度:"+fileStatus.getLen());
System.out.println("副本数:"+fileStatus.getReplication());
fileSystem.close();
}
public static void listStatus() throws IOException, URISyntaxException, InterruptedException {
// 没有设置用户信息
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://master:9000");
URI uri = new URI("hdfs://master:9000");
FileSystem fileSystem = FileSystem.get(uri,entries,"root");
// listStatus()获取FileStatus对象数组,遍历根目录下的所有文件和目录的元数据
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
// 判断其是否为文件(检查这个抽象路径名表示的文件是否是普通文件),若为目录则输出其路径
if (fileStatus.isFile()) {
long blockSize = fileStatus.getBlockSize();
System.out.println(fileStatus.getPath());
System.out.println("Block块大小:"+blockSize);
System.out.println("长度:"+fileStatus.getLen());
}else {
System.out.println(fileStatus.getPath());
}
}
fileSystem.close();
}
}