admin 管理员组文章数量: 887016
- 统计指定目录子文件类型和占用大小分布
# @Time : 2021/12/16 15:11
# @Author : wyh
# @FileName: file_size.py
# @Software: PyCharm
import os
import datetime
"""
统计指定目录子文件类型和占用大小分布
"""
type_set = set()
type_size = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0}
KB = 1024
MB = 1048576
GB = 1073741824
TB = 1099511627776
def get_size_type(path):
files = os.listdir(path)
for filename in files:
temp_path = os.path.join(path, filename)
if os.path.isdir(temp_path):
get_size_type(temp_path) # 递归
elif os.path.isfile(temp_path):
type_name = os.path.splitext(temp_path)[1]
file_size = os.path.getsize(temp_path)
# print(temp_path)
# print(file_size)
if file_size in range(KB, MB):
type_size[1] = type_size[1] + 1
elif file_size in range(1 * MB, 5 * MB):
type_size[2] = type_size[2] + 1
elif file_size in range(5 * MB, 15 * MB):
type_size[3] = type_size[3] + 1
elif file_size in range(15 * MB, 50 * MB):
type_size[4] = type_size[4] + 1
elif file_size in range(50 * MB, 100 * MB):
type_size[5] = type_size[5] + 1
elif file_size in range(100 * MB, 300 * MB):
type_size[6] = type_size[6] + 1
elif file_size in range(300 * MB, 700 * MB):
type_size[7] = type_size[7] + 1
elif file_size in range(700 * MB, GB):
type_size[8] = type_size[8] + 1
elif file_size in range(GB, 2 * GB):
type_size[9] = type_size[9] + 1
elif file_size in range(2 * GB, 5 * GB):
type_size[10] = type_size[10] + 1
elif file_size in range(5 * GB, 10 * GB):
type_size[11] = type_size[11] + 1
elif file_size in range(10 * GB, 100 * GB):
type_size[12] = type_size[12] + 1
elif file_size in range(0, KB):
type_size[0] = type_size[0] + 1
else:
type_size[13] = type_size[13] + 1
type_set.add(type_name)
start_time = datetime.datetime.now()
path = r"D:\PythonProject" # 需要统计的根目录
get_size_type(path)
# 文件类型数
print(len(type_set))
# 文件类型列表
print(type_set)
# 具体Size分布
print(type_size)
end_time = datetime.datetime.now()
# print(end_time - start_time)
在本地测试 23GB文件 需要运行 2.6秒左右
把脚本丢给运维的同学用去之后发现了一个问题,他们是要处理大小达到10TB级别的文件,有一个问题文件目录层数很深,会报一个错误如下:
FilNotFoundError: 系统找不到指定的路径
解决方案: 在绝对路径的前面加上\\?\
# @Time : 2021/12/16 15:11
# @Author : wyh
# @FileName: file_size.py
# @Software: PyCharm
import os
import datetime
"""
统计指定目录子文件类型和占用大小分布
"""
size_dict = {}
type_dict = {}
type_set = set()
type_size = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0, 12: 0, 13: 0}
KB = 1024
MB = 1048576
GB = 1073741824
TB = 1099511627776
def get_size_type(path):
files = os.listdir('\\\?\\' + path)
for filename in files:
temp_path = os.path.join(path, filename)
if os.path.isdir(temp_path):
get_size_type(temp_path) # 递归
elif os.path.isfile(temp_path):
type_name = os.path.splitext(temp_path)[1]
file_size = os.path.getsize(temp_path)
# print(temp_path)
# print(file_size)
if not type_name:
type_dict.setdefault("None", 0)
type_dict["None"] += 1
size_dict.setdefault("None", 0)
size_dict["None"] += os.path.getsize(temp_path)
else:
type_dict.setdefault(type_name, 0)
type_dict[type_name] += 1
size_dict.setdefault(type_name, 0)
size_dict[type_name] += os.path.getsize(temp_path) # 获取文件大小
if file_size in range(KB, MB):
type_size[1] = type_size[1] + 1
elif file_size in range(1 * MB, 5 * MB):
type_size[2] = type_size[2] + 1
elif file_size in range(5 * MB, 15 * MB):
type_size[3] = type_size[3] + 1
elif file_size in range(15 * MB, 50 * MB):
type_size[4] = type_size[4] + 1
elif file_size in range(50 * MB, 100 * MB):
type_size[5] = type_size[5] + 1
elif file_size in range(100 * MB, 300 * MB):
type_size[6] = type_size[6] + 1
elif file_size in range(300 * MB, 700 * MB):
type_size[7] = type_size[7] + 1
elif file_size in range(700 * MB, GB):
type_size[8] = type_size[8] + 1
elif file_size in range(GB, 2 * GB):
type_size[9] = type_size[9] + 1
elif file_size in range(2 * GB, 5 * GB):
type_size[10] = type_size[10] + 1
elif file_size in range(5 * GB, 10 * GB):
type_size[11] = type_size[11] + 1
elif file_size in range(10 * GB, 100 * GB):
type_size[12] = type_size[12] + 1
elif file_size in range(0, KB):
type_size[0] = type_size[0] + 1
else:
type_size[13] = type_size[13] + 1
type_set.add(type_name)
start_time = datetime.datetime.now()
path = r"D:\PythonProject" # 需要统计的根目录
get_size_type(path)
# 文件类型数
print(len(type_set))
# 文件类型列表
print(type_set)
# 具体Size分布
print(type_size)
end_time = datetime.datetime.now()
# print(end_time - start_time)
file_path = r"D:\Statistic.txt"
with open(file_path, 'w') as f:
f.write("文件类型数" + str(len(type_set)))
f.write('\n')
f.write("文件大小分布" + str(type_size))
f.write('\n')
for each_type in type_dict.keys():
f.writelines(("类型" + str(each_type), "数量" + str(type_dict[each_type]),
"大小" + str(size_dict[each_type] / (1024 * 1024)) + "MB"))
f.write('\n')
print("共有【 %s 】的文件【 %d 】个 ,占用硬盘【 %.2f 】MB" %
(each_type, type_dict[each_type], size_dict[each_type] / (1024 * 1024)))
版权声明:本文标题:Windows下统计指定目录子文件类型和大小 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.freenas.com.cn/jishu/1726657321h1007056.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论