# -*- encoding:utf-8 -*-
import os
import os.path as osp
import shutil
from tqdm import tqdm

import cvtools

[文档]def splitpath(path): filepath, tempfilename = osp.split(path) filename, extension = osp.splitext(tempfilename) return filepath, filename, extension
[文档]def find_in_path(name, path): """Find a file in a search path""" # Adapted fom # for dir in path.split(os.pathsep): binpath = osp.join(dir, name) if osp.exists(binpath): return osp.abspath(binpath) return None
# 递归文件夹下所有文件夹,得到文件列表(含路径) def _get_files_list(root_dir): """get all files under the given path. Args: root_dir(str): must use absolute path to get files. Returns: list: all files under the given path. """ # for Linux, isdir cannot recognize ~ home path root_dir = osp.expanduser(root_dir) if not osp.isdir(root_dir): return [root_dir] files_list = [] for lists in os.listdir(root_dir): # recursive files_list += _get_files_list(osp.join(root_dir, lists)) return files_list # 递归路径输出特定类型文件列表
[文档]def get_files_list(root, file_type=None, basename=False): """file_type is a str or list.""" root = osp.abspath(root) files_list = _get_files_list(root) if file_type is not None: if isinstance(file_type, str): file_type = [file_type] files_list = [file for type in file_type for file in files_list if type == osp.splitext(file)[1]] if basename: # 似乎不太符合最小惊讶原则 files_list = [file.replace(root+os.sep, '') for file in files_list] # files_list = [osp.basename(file) for file in files_list] return files_list
# 递归路径输出图片列表
[文档]def get_images_list(root_dir): return get_files_list(root_dir, file_type=['.jpg', '.jpeg', '.png'])
# 将list随机按比例分成两部分
[文档]def split_list(data_list, test_size=0.1): import random random.shuffle(data_list) train_list = data_list[int(len(data_list)*test_size):] test_list = data_list[0:int(len(data_list)*test_size)] return train_list, test_list
# 将多个txt数据随机按比例分成两部分, dst无须后缀
[文档]def split_data(root, files, dst, test_size=0.1): data_list = cvtools.read_files_to_list(root, files) train_list, test_list = split_list(data_list, test_size) # import time # now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) cvtools.write_list_to_file(train_list, dst+'_train.txt') cvtools.write_list_to_file(test_list, dst+'_test.txt') return train_list, test_list
# 将dict随机按比例分成两部分
[文档]def split_dict(data_dict, test_size=0.1): import random dict_key = list(data_dict.keys()) random.shuffle(dict_key) train_list = dict_key[int(len(dict_key)*test_size):] test_list = dict_key[0:int(len(dict_key)*test_size)] train_dict = {} for key in train_list: train_dict[key] = data_dict[key] test_dict = {} for key in test_list: test_dict[key] = data_dict[key] return train_dict, test_dict
# 批量将文件名中空格替换为下划线
[文档]def replace_filename_space(src_root, dst_root): files = get_files_list(src_root) if not osp.exists(dst_root): os.mkdir(dst_root) for file in files: temp = file.split('/')[-1].replace(' ', '_') os.rename(file, dst_root+temp)
# 检测文件数据是否有重复行,空行排除
[文档]def check_rept(file): with open(file, 'r') as f: str_list = f.readlines() count_dict = {} blank_line = 0 # 如果字典里有该单词则加1,否则添加入字典 for str in str_list: if str == '\n' or str == '': # 白名单 blank_line += 1 continue if str in count_dict: count_dict[str] += 1 else: count_dict[str] = 1 return len(count_dict) != (len(str_list)-blank_line)
[文档]def makedirs(path): """对os.makedirs进行扩展 从路径中创建文件夹,可创建多层。如果仅是文件名,则无须创建,返回False; 如果是已存在文件或路径,则无须创建,返回False Args: path: 路径,可包含文件名。纯路径最后一个字符需要是os.sep """ if path is None or path == '': # 空 return False if osp.isfile(path): # 是文件并且已存在 return False # 不能使用os.sep,因为有时在windows平台下用户也会传入使用'/'分割的路径 if '/' not in path and '\\' not in path: # 不含路径 return False path = osp.dirname(path) if osp.exists(path): return False try: os.makedirs(path) except Exception as e: print(e, 'make dirs failed!') return False return True
[文档]def sample_label_from_images(images_src, labels_src, dst): assert osp.exists(images_src) assert osp.exists(labels_src) images = _get_files_list(images_src) if not osp.exists(dst): os.makedirs(dst) for image in tqdm(images): image = osp.basename(image) filename, extension = osp.splitext(image) if extension == '.jpg': filename = osp.join(labels_src, filename + '.json') if osp.exists(filename): shutil.copy(filename, dst) else: print('!!!Warning: %s not exists' % filename)
# 文件夹名批量替换子串
[文档]def folder_name_replace(path, list_replace): if list_replace is None: return # 三重循环可能效率较低 for root, dirs, _ in os.walk(path, topdown=True): for key, value in list_replace.items(): for dir in dirs: if key not in dir: continue try: fold = osp.join(root, dir) new_fold = osp.join(root, dir.replace(key, value)) os.rename(fold, new_fold) # change inplace except Exception as e: print(e)
[文档]def files_name_replace(path, file_type=None, folder=False, list_replace=None): file_list = get_files_list(path, file_type) for file in file_list: if list_replace is not None: for key, value in list_replace.items(): if key in file: new_file = file.replace(key, value) try: os.rename(file, new_file) # change inplace except Exception as e: print(e) if folder: folder_name_replace(path, list_replace)
[文档]def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): if not osp.isfile(filename): raise FileNotFoundError(msg_tmpl.format(filename))
[文档]def isfile_casesensitive(path): if not os.path.isfile(path): return False # exit early directory, filename = os.path.split(path) return filename in os.listdir(directory)
[文档]def is_image_file(filename): extensions = ['.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.JPEG'] return any(filename.endswith(extension) for extension in extensions)
