java 操作 hdfs 实例

根据之前几篇文章搜集的资料,写了这么个简单的实例,仅供参考。

package org.apache.hadoop.examples;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;


public class WordCount {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		conf.set("mapred.job.tracker", "master:9001");
		conf.set("fs.default.name", "hdfs://master:9000");
		System.out.println("fs.default.name:" + conf.get("fs.default.name"));
		System.out.println("mapred.job.tracker:" + conf.get("mapred.job.tracker"));
		//从本地拷贝文件到hdfs
        FileSystem fs = FileSystem.get(conf);
        Path src = new Path("D:/hello.txt");
        Path dst = new Path("hdfs://master:9000/abc");
        fs.copyFromLocalFile(src, dst);
        //创建文件
        byte[] buff="hello hadoop world!\n".getBytes();
        Path dfs = new Path("/test");
        FSDataOutputStream outputStream = fs.create(dfs);
        outputStream.write(buff,0,buff.length); 
        //创建目录
        Path dfs1 = new Path("/TestDir");
        fs.mkdirs(dfs1); 
        //重命名
        Path frpaht = new Path("/test");    //旧的文件名
        Path topath = new Path("/test1");    //新的文件名
        boolean isRename=fs.rename(frpaht, topath);
        String result = isRename ? "成功" : "失败";
        System.out.println("文件重命名结果为:" + result); 
        //删除HDFS上的文件或目录
        Path delef = new Path("/TestDir"); 
        fs.delete(delef, true);//true代表递归删除
        //查看某个HDFS文件是否存在
        Path findf = new Path("/test1");
        boolean isExists = fs.exists(findf);
        System.out.println("Exist?"+isExists); 
        //查看HDFS文件的最后修改时间
        Path fpath = new Path("/test1");
        FileStatus fileStatus = fs.getFileStatus(fpath);
        long modiTime = fileStatus.getModificationTime();
        System.out.println("test1的修改时间是"+modiTime); 
        //读取HDFS某个目录下的所有文件
        Path fpath1 = new Path("/");
        //fs.getPath(fpath1);
        FileStatus[] status = fs.listStatus(fpath1);
        for (FileStatus file : status) {
            System.out.println(file.getPath().getName());
        }
        //查找某个文件在HDFS集群的位置
        Path fpath2 = new Path("/qj");
        FileStatus filestatus = fs.getFileStatus(fpath2);
        BlockLocation[] blkLocations = fs.getFileBlockLocations(filestatus, 0, filestatus.getLen());
        int blockLen = blkLocations.length;
        for(int i=0;i<blockLen;i++){
	        String[] hosts = blkLocations[i].getHosts();
	        System.out.println("block_"+i+"_location:"+hosts[0]); 
        }
        //获取HDFS集群上所有节点名称信息
        DistributedFileSystem hdfs = (DistributedFileSystem)fs;
        DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
        for(int i=0;i<dataNodeStats.length;i++){
        	System.out.println("DataNode_"+i+"_Name:"+dataNodeStats[i].getHostName());
        }
	}
}

本文参考了以下两篇:

Hadoop教程之编写HelloWorld(2) – 程序园

Java创建hdfs文件实例-大数据(hadoop系列)学习-about云开发