-
导出 FSimage
hdfs dfsadmin -fetchImage meta/
-
将文件转换为 csv
hdfs oiv -i fsimage_0000000000616506859 -o fsimage.csv -p Delimited
-
将文件映射为 Hive 表
hdfs dfs -copyFromLocal fsimage.csv /tmp/fsimage.csv -- 创建meta表 CREATE EXTERNAL TABLE test.HDFS_META ( path STRING, repl INT, ModificationTime STRING, AccessTime STRING, PreferredBlockSize INT, BlocksCount INT, FileSize INT, NSQUOTA INT, DSQUOTA INT, Permission STRING, UserName STRING, GroupName STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' tblproperties ( "skip.header.line.count" = "1") -- 加载数据 load data inpath '/tmp/fsimage.csv' overwrite into table test.hdfs_meta; -- 统计sql SELECT strleft ( path, instr( path, '/', 1, 5 )- 1 ) basepath, sum( blockscount ) blockscount, sum( filesize ) filesizes, count(*) file_nums FROM hdfs_meta GROUP BY basepath ORDER BY blockscount DESC
统计如下:
统计 HDFS 小文件
Posted on:2022年2月2日