基本使用 · Hadoop2.x

hadoop雖然安裝在Linux上，但是在Windows上寫代碼時也需要配置hadoop環境。 [TOC] # 1. windows環境搭建 1. 官網下載與Linux中一致的hadoop安裝包 https://archive.apache.org/dist/hadoop/common/hadoop-2.6.0/ ![](https://img.kancloud.cn/4d/05/4d05267dafd00df04cc8e9a1502c7463_1040x256.png) Windows和Linux使用的是同一個.tar.gz文件。 2. 將安裝包解壓到D盤或其他盤符下 ![](https://img.kancloud.cn/89/75/897519331713ae9c760ff3ad33299f98_1145x38.png) 3. 添加 hadoop.dll 和 winutils.exe 到 D:\hadoop-2.6.0-cdh5.14.2\bin 目錄下（去網上找） 4. 添加hadoop到Windows的環境變量中 ![](https://img.kancloud.cn/5b/1f/5b1f39fcad70c06ff8644b324ee5fa52_841x219.png) ![](https://img.kancloud.cn/ad/48/ad486f29d72d09fc30df5c365044ba1b_1219x347.png) <br/> # 2. Java API 1. 使用IDEA創建一個Maven工程 ![](https://img.kancloud.cn/ce/73/ce73b4a55efbe6cd9efbec22fd7e964d_1055x464.png) 2. 添加依賴 *`pom.xml`* ```xml  <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>RELEASE</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> <version>2.8.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0</version> </dependency> </dependencies> ``` *`resources/log4j.properties`* ```xml log4j.rootLogger=INFO, stdout log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n log4j.appender.logfile=org.apache.log4j.FileAppender log4j.appender.logfile.File=target/spring.log log4j.appender.logfile.layout=org.apache.log4j.PatternLayout log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n ``` 3. Java程序 *`com/exa/hdfs001/HdfsClient.java`* ```java package com.exa.hdfs001; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.Test; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; public class HdfsClient { /** * 創建HDFS文件目錄，原目錄存在則覆蓋 */ @Test public void hdfsMkdir() throws IOException, URISyntaxException, InterruptedException { // 1. 獲取文件系統 Configuration configuration = new Configuration(); // 配置在集群上運行 // configuration.set("fs.defaultFS", "hdfs://hadoop101:9000"); // FileSystem fs = FileSystem.get(configuration); /* 客戶端去操作 hdfs 時，是有一個用戶身份的。默認情況下，hdfs 客戶端 api 會從 jvm 中獲取一個參數來作為自己的用戶身份：-DHADOOP_USER_NAME=root，root 為用戶名稱。 */ FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 2. 創建目錄 fs.mkdirs(new Path("/user/hadoop/input002")); // 3. 關閉資源 fs.close(); } /** * 上傳文件到HDFS系統，原文件存在則覆蓋 */ @Test public void copyFromLocalFile() throws URISyntaxException, IOException, InterruptedException { // 1. 獲取文件系統 Configuration configuration = new Configuration(); // 可以在三個地方設置副本的優先級，從高到低為 Java代碼中的設置 -> Java項目根目錄下的hdfs-site.xml配置 // -> 服務器中的默認設置 // configuration.set("dfs.replication", "2"); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 2. 上傳文件 fs.copyFromLocalFile(new Path("d:/hello.txt"), new Path("/user/hadoop/input002/hello.txt")); // 3. 關閉資源 fs.close(); } /** * HDFS文件下載到本地，如果原文件存在則覆蓋 */ @Test public void copyToLocalFile() throws URISyntaxException, IOException, InterruptedException { // 1. 獲取文件系統 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem) // delSrc false不將原文件刪除，true將原文件刪除 // src 被下載的文件 // dst 將文件下載到哪 // useRawLocalFileSystem true開啟文件校驗、false不開啟文件校驗 fs.copyToLocalFile(false, new Path("/user/hadoop/input002/hello.txt"), new Path("d:/hello.txt"), true); fs.close(); } /** * 更改HDFS文件名 */ @Test public void hdfsRename() throws URISyntaxException, IOException, InterruptedException { // 獲取文件系統 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 重命名 fs.rename(new Path("/user/hadoop/input002/hello.txt"), new Path("/user/hadoop/input002/hello002.txt")); fs.close(); } /** * HDFS文件詳細查詢 */ @Test public void hdfsListFiles() throws URISyntaxException, IOException, InterruptedException { // 獲取文件系統 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 獲取文件詳情 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); while (listFiles.hasNext()) { LocatedFileStatus status = listFiles.next(); System.out.println("文件名：" + status.getPath().getName()); System.out.println("長度：" + status.getLen()); System.out.println("權限：" + status.getPermission()); System.out.println("所屬組：" + status.getGroup()); // 獲取塊信息 BlockLocation[] blockLocations = status.getBlockLocations(); for (BlockLocation blockLocation : blockLocations) { // 獲取塊存儲的主機節點 String[] hosts = blockLocation.getHosts(); for (String host : hosts) { System.out.println("host：" + host); } } System.out.println("----------------------------------"); } fs.close(); } /** * HDFS文件和文件夾判斷 */ @Test public void hdfsListStatus() throws URISyntaxException, IOException, InterruptedException { // 獲取文件系統 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 獲取 / 目錄下的所有子目錄的類型，不包括孫子目錄 FileStatus[] listStatus = fs.listStatus(new Path("/")); for (FileStatus fileStatus : listStatus) { if (fileStatus.isFile()) { // 是文件 System.out.println("f:" + fileStatus.getPath().getName()); } else { System.out.println("d:" + fileStatus.getPath().getName()); } } fs.close(); } /** * 刪除HDFS文件或目錄 */ @Test public void hdfsDelete() throws URISyntaxException, IOException, InterruptedException { // 獲取文件系統 Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://hadoop101:9000"), configuration, "root"); // 刪除 fs.delete(new Path("/user/hadoop/input002"), true); fs.close(); } } ``` 上面講到的設置副本的優先級的 hdfs-site.xml 配置內容如下： ```xml <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration> ```