HDFS java API操作

it2023-08-20  66

HDFS的javaAPI操作

     目标:掌握如何使用APIHDFS上的目录和数据进行增、删、改、查操作。

1.idea创建maven工程

2.修改pom.xml文件如下:

(需要下载jar包,时间可能稍长)

<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com</groupId> <!-- 自己的组名 --> <artifactId>aa</artifactId> <!-- 自己的项目名 --> <version>1.0-SNAPSHOT</version> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-client</artifactId> <version>2.6.0-mr1-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-common</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-hdfs</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-mapreduce-client-core</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <!-- https://mvnrepository.com/artifact/junit/junit --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> <version>RELEASE</version> </dependency> <dependency> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> <version>3.4.9</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.0</version> <configuration> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> <!-- <verbal>true</verbal>--> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.4.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <minimizeJar>true</minimizeJar> </configuration> </execution> </executions> </plugin> </plugins> </build> </project>

3.下载完成后编写java类     可一个一个尝试

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.net.URI; public class HDFSDemo { public static void main(String[] args) throws Exception { /* //创建文件夹 Configuration configuration = new Configuration(); FileSystem fs=FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration); boolean b = fs.mkdirs(new Path("/003")); if (b){ System.out.println("成功!"); }else { System.out.println("失败!"); } */ /* //删除文件夹 Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://192.168.10.101:8020"); FileSystem fs = FileSystem.newInstance(new URI("/"), configuration); boolean b = fs.delete(new Path("/003"), true); if (b){ System.out.println("成功!"); }else { System.out.println("失败!"); } */ /* //修改文件夹 Configuration configuration = new Configuration(); FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration); boolean b = fs.rename(new Path("/002"),new Path("/003")); if (b){ System.out.println("成功!"); }else { System.out.println("失败!"); } */ /* //查看文件夹 Configuration configuration = new Configuration(); FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration); FileStatus[] fi = fs.listStatus(new Path("/")); for (FileStatus f : fi) { System.out.println("path:"+f.getPath()); System.out.println("name:"+f.getPath().getName()); } */ /* //上传数据 Configuration configuration = new Configuration(); FileSystem fs= FileSystem.get(new URI("hdfs://192.168.10.101:8020"),configuration); fs.copyFromLocalFile(new Path(""),new Path("/")); fs.close(); */ //下载数据 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://192.168.10.101:8020"), configuration); fs.copyToLocalFile(new Path(""), new Path("/")); fs.close(); } }

如果执行出现以下错误,也可以不用理会,不会影响程序的执行。配置hadoo环境变量之后重启开发工具就可以了

获取FileSystem的几种方式 

 第一种: @Test public void getFileSystem() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), configuration); System.out.println(fileSystem.toString()); }  第二种: @Test public void getFileSystem2() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://192.168.52.100:8020"); FileSystem fileSystem = FileSystem.get(new URI("/"), configuration); System.out.println(fileSystem.toString()); }

3. 第三种:

@Test public void getFileSystem3() throws URISyntaxException, IOException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://192.168.52.100:8020"), configuration); System.out.println(fileSystem.toString()); }

 4.第四种:

@Test public void getFileSystem4() throws Exception{ Configuration configuration = new Configuration(); configuration.set("fs.defaultFS","hdfs://192.168.52.100:8020"); FileSystem fileSystem = FileSystem.newInstance(configuration); System.out.println(fileSystem.toString()); }

 递归遍历文件系统当中的所有文件

通过递归遍历hdfs文件系统

@Test public void listFile() throws Exception{ FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), new Configuration()); FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/")); for (FileStatus fileStatus : fileStatuses) { if(fileStatus.isDirectory()){ Path path = fileStatus.getPath(); listAllFiles(fileSystem,path); }else{ System.out.println("文件路径为"+fileStatus.getPath().toString()); } } } public void listAllFiles(FileSystem fileSystem,Path path) throws Exception{ FileStatus[] fileStatuses = fileSystem.listStatus(path); for (FileStatus fileStatus : fileStatuses) { if(fileStatus.isDirectory()){ listAllFiles(fileSystem,fileStatus.getPath()); }else{ Path path1 = fileStatus.getPath(); System.out.println("文件路径为"+path1); } } }

官方提供的API直接遍历

/** * 递归遍历官方提供的API版本 * @throws Exception */ @Test public void listMyFiles()throws Exception{ //获取fileSystem类 FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), new Configuration()); //获取RemoteIterator 得到所有的文件或者文件夹,第一个参数指定遍历的路径,第二个参数表示是否要递归遍历 RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true); while (locatedFileStatusRemoteIterator.hasNext()){ LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); System.out.println(next.getPath().toString()); } fileSystem.close(); }

 

 

 

 

 

 

 

 

最新回复(0)