# -*- encoding:utf-8 -*-
# @Time : 2019/3/1 22:10
# @Author : gfjiang
# @Site :
# @File : utils.py
# @Software: PyCharm
import os
import os.path as osp
import shutil
from tqdm import tqdm
import cvtools
[文档]def splitpath(path):
filepath, tempfilename = osp.split(path)
filename, extension = osp.splitext(tempfilename)
return filepath, filename, extension
[文档]def find_in_path(name, path):
"""Find a file in a search path"""
# Adapted fom
# http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
for dir in path.split(os.pathsep):
binpath = osp.join(dir, name)
if osp.exists(binpath):
return osp.abspath(binpath)
return None
# 递归文件夹下所有文件夹,得到文件列表(含路径)
def _get_files_list(root_dir):
"""get all files under the given path.
Args:
root_dir(str): must use absolute path to get files.
Returns:
list: all files under the given path.
"""
# for Linux, isdir cannot recognize ~ home path
root_dir = osp.expanduser(root_dir)
if not osp.isdir(root_dir):
return [root_dir]
files_list = []
for lists in os.listdir(root_dir): # recursive
files_list += _get_files_list(osp.join(root_dir, lists))
return files_list
# 递归路径输出特定类型文件列表
[文档]def get_files_list(root, file_type=None, basename=False):
"""file_type is a str or list."""
root = osp.abspath(root)
files_list = _get_files_list(root)
if file_type is not None:
if isinstance(file_type, str):
file_type = [file_type]
files_list = [file for type in file_type
for file in files_list
if type == osp.splitext(file)[1]]
if basename:
# 似乎不太符合最小惊讶原则
files_list = [file.replace(root+os.sep, '')
for file in files_list]
# files_list = [osp.basename(file) for file in files_list]
return files_list
# 递归路径输出图片列表
[文档]def get_images_list(root_dir):
return get_files_list(root_dir, file_type=['.jpg', '.jpeg', '.png'])
# 将list随机按比例分成两部分
[文档]def split_list(data_list, test_size=0.1):
import random
random.shuffle(data_list)
train_list = data_list[int(len(data_list)*test_size):]
test_list = data_list[0:int(len(data_list)*test_size)]
return train_list, test_list
# 将多个txt数据随机按比例分成两部分, dst无须后缀
[文档]def split_data(root, files, dst, test_size=0.1):
data_list = cvtools.read_files_to_list(root, files)
train_list, test_list = split_list(data_list, test_size)
# import time
# now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
cvtools.write_list_to_file(train_list, dst+'_train.txt')
cvtools.write_list_to_file(test_list, dst+'_test.txt')
return train_list, test_list
# 将dict随机按比例分成两部分
[文档]def split_dict(data_dict, test_size=0.1):
import random
dict_key = list(data_dict.keys())
random.shuffle(dict_key)
train_list = dict_key[int(len(dict_key)*test_size):]
test_list = dict_key[0:int(len(dict_key)*test_size)]
train_dict = {}
for key in train_list:
train_dict[key] = data_dict[key]
test_dict = {}
for key in test_list:
test_dict[key] = data_dict[key]
return train_dict, test_dict
# 批量将文件名中空格替换为下划线
[文档]def replace_filename_space(src_root, dst_root):
files = get_files_list(src_root)
if not osp.exists(dst_root):
os.mkdir(dst_root)
for file in files:
temp = file.split('/')[-1].replace(' ', '_')
os.rename(file, dst_root+temp)
# 检测文件数据是否有重复行,空行排除
[文档]def check_rept(file):
with open(file, 'r') as f:
str_list = f.readlines()
count_dict = {}
blank_line = 0
# 如果字典里有该单词则加1,否则添加入字典
for str in str_list:
if str == '\n' or str == '': # 白名单
blank_line += 1
continue
if str in count_dict:
count_dict[str] += 1
else:
count_dict[str] = 1
return len(count_dict) != (len(str_list)-blank_line)
[文档]def makedirs(path):
"""对os.makedirs进行扩展
从路径中创建文件夹,可创建多层。如果仅是文件名,则无须创建,返回False;
如果是已存在文件或路径,则无须创建,返回False
Args:
path: 路径,可包含文件名。纯路径最后一个字符需要是os.sep
"""
if path is None or path == '': # 空
return False
if osp.isfile(path): # 是文件并且已存在
return False
# 不能使用os.sep,因为有时在windows平台下用户也会传入使用'/'分割的路径
if '/' not in path and '\\' not in path: # 不含路径
return False
path = osp.dirname(path)
if osp.exists(path):
return False
try:
os.makedirs(path)
except Exception as e:
print(e, 'make dirs failed!')
return False
return True
[文档]def sample_label_from_images(images_src, labels_src, dst):
assert osp.exists(images_src)
assert osp.exists(labels_src)
images = _get_files_list(images_src)
if not osp.exists(dst):
os.makedirs(dst)
for image in tqdm(images):
image = osp.basename(image)
filename, extension = osp.splitext(image)
if extension == '.jpg':
filename = osp.join(labels_src, filename + '.json')
if osp.exists(filename):
shutil.copy(filename, dst)
else:
print('!!!Warning: %s not exists' % filename)
# 文件夹名批量替换子串
[文档]def folder_name_replace(path, list_replace):
if list_replace is None:
return
# 三重循环可能效率较低
for root, dirs, _ in os.walk(path, topdown=True):
for key, value in list_replace.items():
for dir in dirs:
if key not in dir:
continue
try:
fold = osp.join(root, dir)
new_fold = osp.join(root, dir.replace(key, value))
os.rename(fold, new_fold) # change inplace
except Exception as e:
print(e)
[文档]def files_name_replace(path, file_type=None, folder=False, list_replace=None):
file_list = get_files_list(path, file_type)
for file in file_list:
if list_replace is not None:
for key, value in list_replace.items():
if key in file:
new_file = file.replace(key, value)
try:
os.rename(file, new_file) # change inplace
except Exception as e:
print(e)
if folder:
folder_name_replace(path, list_replace)
[文档]def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
if not osp.isfile(filename):
raise FileNotFoundError(msg_tmpl.format(filename))
[文档]def isfile_casesensitive(path):
if not os.path.isfile(path):
return False # exit early
directory, filename = os.path.split(path)
return filename in os.listdir(directory)
[文档]def is_image_file(filename):
extensions = ['.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.JPEG']
return any(filename.endswith(extension) for extension in extensions)
if __name__ == "__main__":
# # 测试通过,2019.6.28
# txt_laber_list = get_files_list('F:/data/detection', file_type=('.txt', '.jpeg'))
# # 测试通过,2019.3.7
# root = osp.abspath('..')+'/datasets/'
# files = ['elevator_20180106.txt', 'elevator_20180115.txt', 'elevator_20181230.txt', 'elevator_20181231.txt']
# data = read_files_to_list(root, files)
# # 测试通过,2019.3.7
# root = osp.abspath('..')+'/datasets/'
# files = ['elevator_20180106.txt', 'elevator_20181230.txt', 'elevator_20181231.txt']
# files = ['elevator_20180601_convert.txt']
# split_data(root, files)
# # 测试通过,2019.3.7
# src_root = '/home/arc-fsy8515/data/elevator/20190106/'
# dst_root = '/home/arc-fsy8515/data/elevator/20190106/'
# replace_filename_space(src_root, dst_root)
# # 测试通过,2019.3.11
# file = '../datasets/train/elevator_train.txt'
# temp = check_rept(file)
# print(file, temp)
# root = '../datasets/'
# files = ['elevator_20180601.txt', 'elevator_20181230.txt', 'elevator_20181231.txt', 'elevator_20190106.txt']
# files = ['elevator_20190106_convert.txt']
# split_data(root, files, '../datasets/elevator_20190106_convert', test_size=0.1)
# w = 500
# h = 400
# labels = np.array([[580, -2, 600, 360], [-11, 565, 144, 1000]])
# temp = [labels[:, 0:2] < 0]
# labels[:, 0:2][labels[:, 0:2] < 0] = 0 # 左上角坐标限幅
# labels[:, 2][labels[:, 2] > w] = w # 右下角坐标限幅
# labels[:, 3][labels[:, 3] > h] = h
# images_src = 'F:/bdd/bdd100k/images/10k/val'
# labels_src = 'F:/bdd/bdd100k/labels/100k/val'
# dst = 'F:/bdd/bdd100k/labels/10k/val'
# sample_label_from_images(images_src, labels_src, dst)
# # 测试通过,2019.6.28
# replace = {
# ' ': '_',
# '递交数据': '_submitted',
# '人头标注': '_head_labeling'
# }
# folder_name_replace('F:/data/detection', replace)
replace = {
' ': '_',
',': '_',
'月': '_mouth_',
'日': '_day_',
'递交数据': '_submitted',
'提交数据': '_submitted',
'人头标注': '_head_labeling',
'视频': 'video',
'质检完成': 'quality_inspection'
}
files_name_replace('/media/data/detection', folder=True, list_replace=replace)
pass