cvtools.label_convert.arcsoft_to_coco 源代码

# -*- coding:utf-8 -*-
# author   : gfjiangly
# time     : 2019/6/28 13:41
# e-mail   : jgf0719@foxmail.com
# software : PyCharm
import os
import os.path as osp
import json
from tqdm import tqdm
from PIL import Image

import cvtools


[文档]class Arcsoft2COCO(object): """convert arcsoft format label to standard coco format.""" def __init__(self, path, cls_map='arcsoft/cat_id_map.txt', path_replace=None, img_suffix='.jpg'): self.path = path self.path_replace = path_replace self.img_suffix = img_suffix self.txt_list = cvtools.get_files_list( self.path, file_type='.txt', basename=True) # you could comment this two sentences if you don't want check integrity # of images and labels. # self.img_list = cvtools.get_files_list( # self.path, file_type=img_suffix) # assert len(self.img_list) == len(self.txt_list) self.cls_map = cvtools.read_key_value(cls_map) self.coco_dataset = { "info": { "description": "Open Dataset.", "url": "http://www.arcsoft.com", "version": "1.0", "year": 2019, "contributor": "arcsoft", "date_created": cvtools.get_time_str() }, "categories": [], # Not added yet "images": [], "annotations": [] } self.imageID = 1 self.annID = 1 self.run_timer = cvtools.Timer()
[文档] def convert(self, label_processor=cvtools.rect_reserved): # the latter content covers the previous content, if the id is repeated. id_cats = {value: key for key, value in self.cls_map.items()} for key, value in id_cats.items(): self.coco_dataset['categories'].append({ 'id': int(key), # 0 for backgroud 'name': value, 'supercategory': value }) for txt_name in tqdm(self.txt_list): img_name = txt_name.replace('.txt', self.img_suffix) txt_file = osp.join(self.path, txt_name) img_file = osp.join(self.path, img_name) # read the image to get width and height try: # "PIL: Open an image file, without loading the raster data" im = Image.open(img_file) if im is None: print('Waring: !!!can\'t read %s, continue this image' % img_file) continue width, height = im.size except (FileNotFoundError, Image.DecompressionBombError) as e: print(e) # Image.DecompressionBombError for the big size image continue # add image information to dataset if self.path_replace is not None: for key, value in self.path_replace.items(): img_file = img_file.replace(key, value) self.coco_dataset["images"].append({ 'file_name': img_name, # use relative path 'id': self.imageID, 'width': width, 'height': height }) # read txt label labels = cvtools.read_arcsoft_txt_format(txt_file) if len(labels) == 0: continue for label in labels: # change here for specific labels label = label_processor(label) if len(label['bbox']) == 0: continue # may be not happened label['id'] = self.annID label['image_id'] = self.imageID label['segmentation'] = [] label['iscrowd'] = 0 try: label['category_id'] = int(self.cls_map[label['category']]) except KeyError: print('skip file: {}'.format(txt_file)) break self.coco_dataset['annotations'].append(label) self.annID += 1 self.imageID += 1
[文档] def save_json(self, to_file='cocolike.json'): # save json format results to disk dirname = osp.dirname(to_file) if dirname != '' and not osp.exists(dirname): os.makedirs(osp.dirname(dirname)) with open(to_file, 'w') as f: json.dump(self.coco_dataset, f) # using indent=4 show more friendly print('!save {} finished'.format(to_file))
if __name__ == '__main__': path_replace = {'\\': '/'} arcsoft_to_coco = Arcsoft2COCO('E:/data/person', cls_map='arcsoft/head_id_map.txt', path_replace=path_replace, img_suffix='.jpg') arcsoft_to_coco.convert(label_processor=cvtools.head_reserved) arcsoft_to_coco.save_json('arcsoft/person_head.json')