cvtools.utils.file 源代码

# -*- encoding:utf-8 -*-
# @Time    : 2019/3/1 22:10
# @Author  : gfjiang
# @Site    : 
# @File    : utils.py
# @Software: PyCharm
import os
import os.path as osp
import shutil
from tqdm import tqdm

import cvtools


[文档]def splitpath(path):
    filepath, tempfilename = osp.split(path)
    filename, extension = osp.splitext(tempfilename)
    return filepath, filename, extension


[文档]def find_in_path(name, path):
    """Find a file in a search path"""
    # Adapted fom
    # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
    for dir in path.split(os.pathsep):
        binpath = osp.join(dir, name)
        if osp.exists(binpath):
            return osp.abspath(binpath)
    return None


# 递归文件夹下所有文件夹，得到文件列表(含路径)
def _get_files_list(root_dir):
    """get all files under the given path.

    Args:
        root_dir(str): must use absolute path to get files.

    Returns:
        list: all files under the given path.
    """
    # for Linux, isdir cannot recognize ~ home path
    root_dir = osp.expanduser(root_dir)
    if not osp.isdir(root_dir):
        return [root_dir]
    files_list = []
    for lists in os.listdir(root_dir):  # recursive
        files_list += _get_files_list(osp.join(root_dir, lists))
    return files_list


# 递归路径输出特定类型文件列表
[文档]def get_files_list(root, file_type=None, basename=False):
    """file_type is a str or list."""
    root = osp.abspath(root)
    files_list = _get_files_list(root)
    if file_type is not None:
        if isinstance(file_type, str):
            file_type = [file_type]
        files_list = [file for type in file_type
                      for file in files_list
                      if type == osp.splitext(file)[1]]
    if basename:
        # 似乎不太符合最小惊讶原则
        files_list = [file.replace(root+os.sep, '')
                      for file in files_list]
        # files_list = [osp.basename(file) for file in files_list]
    return files_list


# 递归路径输出图片列表
[文档]def get_images_list(root_dir):
    return get_files_list(root_dir, file_type=['.jpg', '.jpeg', '.png'])


# 将list随机按比例分成两部分
[文档]def split_list(data_list, test_size=0.1):
    import random
    random.shuffle(data_list)
    train_list = data_list[int(len(data_list)*test_size):]
    test_list = data_list[0:int(len(data_list)*test_size)]
    return train_list, test_list


# 将多个txt数据随机按比例分成两部分, dst无须后缀
[文档]def split_data(root, files, dst, test_size=0.1):
    data_list = cvtools.read_files_to_list(root, files)
    train_list, test_list = split_list(data_list, test_size)
    # import time
    # now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    cvtools.write_list_to_file(train_list, dst+'_train.txt')
    cvtools.write_list_to_file(test_list, dst+'_test.txt')
    return train_list, test_list


# 将dict随机按比例分成两部分
[文档]def split_dict(data_dict, test_size=0.1):
    import random
    dict_key = list(data_dict.keys())
    random.shuffle(dict_key)
    train_list = dict_key[int(len(dict_key)*test_size):]
    test_list = dict_key[0:int(len(dict_key)*test_size)]
    train_dict = {}
    for key in train_list:
        train_dict[key] = data_dict[key]
    test_dict = {}
    for key in test_list:
        test_dict[key] = data_dict[key]
    return train_dict, test_dict


# 批量将文件名中空格替换为下划线
[文档]def replace_filename_space(src_root, dst_root):
    files = get_files_list(src_root)
    if not osp.exists(dst_root):
        os.mkdir(dst_root)
    for file in files:
        temp = file.split('/')[-1].replace(' ', '_')
        os.rename(file, dst_root+temp)


# 检测文件数据是否有重复行，空行排除
[文档]def check_rept(file):
    with open(file, 'r') as f:
        str_list = f.readlines()
    count_dict = {}
    blank_line = 0
    # 如果字典里有该单词则加1，否则添加入字典
    for str in str_list:
        if str == '\n' or str == '':    # 白名单
            blank_line += 1
            continue
        if str in count_dict:
            count_dict[str] += 1
        else:
            count_dict[str] = 1
    return len(count_dict) != (len(str_list)-blank_line)


[文档]def makedirs(path):
    """对os.makedirs进行扩展

    从路径中创建文件夹，可创建多层。如果仅是文件名，则无须创建，返回False；
    如果是已存在文件或路径，则无须创建，返回False

    Args:
        path: 路径，可包含文件名。纯路径最后一个字符需要是os.sep
    """
    if path is None or path == '':  # 空
        return False
    if osp.isfile(path):    # 是文件并且已存在
        return False
    # 不能使用os.sep，因为有时在windows平台下用户也会传入使用'/'分割的路径
    if '/' not in path and '\\' not in path:  # 不含路径
        return False
    path = osp.dirname(path)
    if osp.exists(path):
        return False
    try:
        os.makedirs(path)
    except Exception as e:
        print(e, 'make dirs failed!')
        return False
    return True


[文档]def sample_label_from_images(images_src, labels_src, dst):
    assert osp.exists(images_src)
    assert osp.exists(labels_src)
    images = _get_files_list(images_src)
    if not osp.exists(dst):
        os.makedirs(dst)
    for image in tqdm(images):
        image = osp.basename(image)
        filename, extension = osp.splitext(image)
        if extension == '.jpg':
            filename = osp.join(labels_src, filename + '.json')
            if osp.exists(filename):
                shutil.copy(filename, dst)
            else:
                print('!!!Warning: %s not exists' % filename)


# 文件夹名批量替换子串
[文档]def folder_name_replace(path, list_replace):
    if list_replace is None:
        return
    # 三重循环可能效率较低
    for root, dirs, _ in os.walk(path, topdown=True):
        for key, value in list_replace.items():
            for dir in dirs:
                if key not in dir:
                    continue
                try:
                    fold = osp.join(root, dir)
                    new_fold = osp.join(root, dir.replace(key, value))
                    os.rename(fold, new_fold)  # change inplace
                except Exception as e:
                    print(e)


[文档]def files_name_replace(path, file_type=None, folder=False, list_replace=None):
    file_list = get_files_list(path, file_type)
    for file in file_list:
        if list_replace is not None:
            for key, value in list_replace.items():
                if key in file:
                    new_file = file.replace(key, value)
                    try:
                        os.rename(file, new_file)   # change inplace
                    except Exception as e:
                        print(e)
    if folder:
        folder_name_replace(path, list_replace)


[文档]def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
    if not osp.isfile(filename):
        raise FileNotFoundError(msg_tmpl.format(filename))


[文档]def isfile_casesensitive(path):
    if not os.path.isfile(path):
        return False   # exit early
    directory, filename = os.path.split(path)
    return filename in os.listdir(directory)


[文档]def is_image_file(filename):
    extensions = ['.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.JPEG']
    return any(filename.endswith(extension) for extension in extensions)


if __name__ == "__main__":
    # # 测试通过，2019.6.28
    # txt_laber_list = get_files_list('F:/data/detection', file_type=('.txt', '.jpeg'))

    # # 测试通过，2019.3.7
    # root = osp.abspath('..')+'/datasets/'
    # files = ['elevator_20180106.txt', 'elevator_20180115.txt', 'elevator_20181230.txt', 'elevator_20181231.txt']
    # data = read_files_to_list(root, files)

    # # 测试通过，2019.3.7
    # root = osp.abspath('..')+'/datasets/'
    # files = ['elevator_20180106.txt', 'elevator_20181230.txt', 'elevator_20181231.txt']
    # files = ['elevator_20180601_convert.txt']
    # split_data(root, files)

    # # 测试通过，2019.3.7
    # src_root = '/home/arc-fsy8515/data/elevator/20190106/'
    # dst_root = '/home/arc-fsy8515/data/elevator/20190106/'
    # replace_filename_space(src_root, dst_root)

    # # 测试通过，2019.3.11
    # file = '../datasets/train/elevator_train.txt'
    # temp = check_rept(file)
    # print(file, temp)

    # root = '../datasets/'
    # files = ['elevator_20180601.txt', 'elevator_20181230.txt', 'elevator_20181231.txt', 'elevator_20190106.txt']
    # files = ['elevator_20190106_convert.txt']
    # split_data(root, files, '../datasets/elevator_20190106_convert', test_size=0.1)

    # w = 500
    # h = 400
    # labels = np.array([[580, -2, 600, 360], [-11, 565, 144, 1000]])
    # temp = [labels[:, 0:2] < 0]
    # labels[:, 0:2][labels[:, 0:2] < 0] = 0  # 左上角坐标限幅
    # labels[:, 2][labels[:, 2] > w] = w  # 右下角坐标限幅
    # labels[:, 3][labels[:, 3] > h] = h

    # images_src = 'F:/bdd/bdd100k/images/10k/val'
    # labels_src = 'F:/bdd/bdd100k/labels/100k/val'
    # dst = 'F:/bdd/bdd100k/labels/10k/val'
    # sample_label_from_images(images_src, labels_src, dst)

    # # 测试通过，2019.6.28
    # replace = {
    #     ' ': '_',
    #     '递交数据': '_submitted',
    #     '人头标注': '_head_labeling'
    # }
    # folder_name_replace('F:/data/detection', replace)

    replace = {
        ' ': '_',
        ',': '_',
        '月': '_mouth_',
        '日': '_day_',
        '递交数据': '_submitted',
        '提交数据': '_submitted',
        '人头标注': '_head_labeling',
        '视频': 'video',
        '质检完成': 'quality_inspection'
    }
    files_name_replace('/media/data/detection', folder=True, list_replace=replace)

    pass