HDFS的API操作 | 易学教程

大家好, 我是上白书妖!
知识源于积累,登峰造极源于自律
今天我根据以前所以学的一些文献,笔记等资料整理出一些小知识点,有不当之处,欢迎各位斧正
package com.alibaba.hdfs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * @author 上白书妖
 * @date 2020/2/26 21:48
 * @Desription:获取客户端的连接对象,操作hadoop集群
 *
 */
public class HdfsClient
{


    /*
     创建目录
     */
    @Test
    public void testMkdirs() throws IOException, InterruptedException, URISyntaxException
    {

        // 1 获取文件系统
        Configuration configuration = new Configuration();

        // 配置在集群上运行
        // configuration.set("fs.defaultFS", "hdfs://hadoop102:9000");
        // FileSystem fs = FileSystem.get(configuration);

        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao" );

        // 2 创建目录
        Boolean result = fs.mkdirs(new Path("/1108/xw/shangbaishuyao"));

        System.out.println("result:" + result);

        // 3 关闭资源
        fs.close();
    }



    /*
    HDFS文件上传（测试参数优先级）
    */
    @Test
    public void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException
    {

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        configuration.set("dfs.replication", "2");
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 上传文件
        fs.copyFromLocalFile(new Path("H:"+ File.separator+"hello1.txt"), new Path("/bigdata0615"));

        // 3 关闭资源
        fs.close();

        System.out.println("over");
    }



    // 文件下载
    @Test
    public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 执行下载操作
        // boolean delSrc 指是否将原文件删除
        // Path src 指要下载的文件路径
        // Path dst 指将文件下载到的路径
        // boolean useRawLocalFileSystem 是否开启文件校验
        fs.copyToLocalFile(false, new Path("/banzhang.txt"), new Path("H:/banhua.txt"), true);

        // 3 关闭资源
        fs.close();

        System.out.println("over");
    }



    /*
    HDFS文件夹删除
     */
    @Test
        public  void testDelete() throws  IOException,InterruptedException,URISyntaxException
    {

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 执行删除
        fs.delete(new Path("/1108/"), true);

        // 3 关闭资源
        fs.close();
    }


    /*
     HDFS文件详情查看
   */
    @Test
    public void testListFiles() throws IOException, InterruptedException, URISyntaxException{

        // 1获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 获取文件详情
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);

        while(listFiles.hasNext()){
            LocatedFileStatus status = listFiles.next();

            // 输出详情
            // 文件名称
            System.out.println("文件名:"+status.getPath().getName());
            // 长度
            System.out.println("文件长度:"+status.getLen());
            // 权限
            System.out.println("文件长度:"+status.getPermission());
            // 分组
            System.out.println("文件分组:"+status.getGroup());

            // 获取存储的块信息
            BlockLocation[] blockLocations = status.getBlockLocations();

            for (BlockLocation blockLocation : blockLocations) {

                // 获取块存储的主机节点
                String[] hosts = blockLocation.getHosts();

                for (String host : hosts) {
                    System.out.println("文件块所在的主机节点:"+host);
                }
            }

            System.out.println("-----------班长的分割线----------");
        }

            // 3 关闭资源
             fs.close();

        System.out.println("over");
    }


    /*
     HDFS文件和文件夹判断,如果要判断其子子孙孙的话,就要用到递归
     */
    @Test
    public void testListStatus() throws IOException, InterruptedException, URISyntaxException
    {

        // 1 获取文件配置信息
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        System.out.println("========================递归判断其子子孙孙是文件还是文件夹========================");
        listDirectoryAndFile("/",fs);
        System.out.println("==================================递归判断完毕====================================");





        // 2 判断是文件还是文件夹
        FileStatus[] listStatus = fs.listStatus(new Path("/"));

        for (FileStatus fileStatus : listStatus) {

            // 如果是文件
            if (fileStatus.isFile())
            {
                System.out.println("文件:"+fileStatus.getPath().getName());
            }else {
                System.out.println("目录:"+fileStatus.getPath().getName());
            }
        }
        // 3 关闭资源
        fs.close();

        System.out.println("运行结束");


    }
    
    /*
     需求: 指定一个目录,递归查看多有子目录,及文件
     */

    public void listDirectoryAndFile(String path , FileSystem fs) throws IOException
    {

        //处理当前目录下的子目录及文件
        FileStatus[] fileStatuses = fs.listStatus(new Path(path));

        for (FileStatus fileStatus : fileStatuses)
        {
            if ( fileStatus.isFile())
            {

                if (path.equals("/"))
                {
                    System.out.println("文件:" + path +fileStatus.getPath().getName());
                }
                else
                {
                    System.out.println("文件:" + path + "/" + fileStatus.getPath().getName());
                }
                //文件
                System.out.println("文件:" + path+ "/"+fileStatus.getPath().getName());
            }
            else
            {
                //获取整个路径 即 :  hdfs://hadoop102:9000/bigdata0615
                String currentpath = fileStatus.getPath().toString().substring("hdfs://hadoop102:9000".length());
                //System.out.println(fileStatus.getPath());
                //目录
                //System.out.println("目录:" + fileStatus.getPath().getName());
                System.out.println("目录:" +currentpath);

                //递归显示当前目录下的子目录及文件
                listDirectoryAndFile(currentpath,fs);
            }

        }

    }


    /*
     HDFS文件名更改
     */
    @Test
    public void testRename() throws IOException, InterruptedException, URISyntaxException{

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 修改文件名称
        fs.rename(new Path("/shangbaishuyao.txt"), new Path("/shangbaishuyao.txt"));

        // 3 关闭资源
        fs.close();
    }


 /*
     HDFS文件上传
     需求：把本地H盘上的shangbaishuyao.txt文件上传到HDFS根目录
     */
    @Test
    public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException {

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 创建输入流
        FileInputStream fis = new FileInputStream(new File("H:"+File.separator+"shangbaishuyao.txt"));

        // 3 获取输出流
        FSDataOutputStream fos = fs.create(new Path("/shangbaishuyao.txt"));

        // 4 流对拷
        IOUtils.copyBytes(fis, fos, configuration);

        // 5 关闭资源
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        fs.close();
    }


    /*
    定位文件读取

    需求：分块读取HDFS上的大文件，比如根目录下的/hadoop-2.7.2.tar.gz

    （1）下载第一块
     */

    @Test
    public void readFileSeek1() throws IOException, InterruptedException, URISyntaxException{

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 获取输入流
        FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));

        // 3 创建输出流
        FileOutputStream fos = new FileOutputStream(new File("H:/hadoop-2.7.2.tar.gz.part1"));

        // 4 流的拷贝
        byte[] buf = new byte[1024];

        for(int i =0 ; i < 1024 * 128; i++){
            fis.read(buf);
            fos.write(buf);
        }

        // 5关闭资源
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
        fs.close();
    }

    /*
    （2）下载第二块
     */
    @Test
    public void readFileSeek2() throws IOException, InterruptedException, URISyntaxException{

        // 1 获取文件系统
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop102:9000"), configuration, "shangbaishuyao");

        // 2 打开输入流
        FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));

        // 3 定位输入数据位置
        fis.seek(1024*1024*128);

        // 4 创建输出流
        FileOutputStream fos = new FileOutputStream(new File("H:/hadoop-2.7.2.tar.gz.part2"));

        // 5 流的对拷
        IOUtils.copyBytes(fis, fos, configuration);

        // 6 关闭资源
        IOUtils.closeStream(fis);
        IOUtils.closeStream(fos);
    }

    /*
        （3）合并文件
                在Window命令窗口中进入到目录H:\，然后执行如下命令，对数据进行合并
                type hadoop-2.7.2.tar.gz.part2 >> hadoop-2.7.2.tar.gz.part1
                合并完成后，将hadoop-2.7.2.tar.gz.part1重新命名为hadoop-2.7.2.tar.gz。解压发现该tar包非常完整。
     */
}