# -*- coding:utf-8 -*-
# author : gfjiangly
# time : 2019/6/28 13:41
# e-mail : jgf0719@foxmail.com
# software : PyCharm
import os
import os.path as osp
import json
from tqdm import tqdm
from PIL import Image
import cvtools
[文档]class Arcsoft2COCO(object):
"""convert arcsoft format label to standard coco format."""
def __init__(self,
path,
cls_map='arcsoft/cat_id_map.txt',
path_replace=None,
img_suffix='.jpg'):
self.path = path
self.path_replace = path_replace
self.img_suffix = img_suffix
self.txt_list = cvtools.get_files_list(
self.path, file_type='.txt', basename=True)
# you could comment this two sentences if you don't want check integrity
# of images and labels.
# self.img_list = cvtools.get_files_list(
# self.path, file_type=img_suffix)
# assert len(self.img_list) == len(self.txt_list)
self.cls_map = cvtools.read_key_value(cls_map)
self.coco_dataset = {
"info": {
"description": "Open Dataset.",
"url": "http://www.arcsoft.com",
"version": "1.0", "year": 2019,
"contributor": "arcsoft",
"date_created": cvtools.get_time_str()
},
"categories": [], # Not added yet
"images": [], "annotations": []
}
self.imageID = 1
self.annID = 1
self.run_timer = cvtools.Timer()
[文档] def convert(self, label_processor=cvtools.rect_reserved):
# the latter content covers the previous content, if the id is repeated.
id_cats = {value: key for key, value in self.cls_map.items()}
for key, value in id_cats.items():
self.coco_dataset['categories'].append({
'id': int(key), # 0 for backgroud
'name': value,
'supercategory': value
})
for txt_name in tqdm(self.txt_list):
img_name = txt_name.replace('.txt', self.img_suffix)
txt_file = osp.join(self.path, txt_name)
img_file = osp.join(self.path, img_name)
# read the image to get width and height
try:
# "PIL: Open an image file, without loading the raster data"
im = Image.open(img_file)
if im is None:
print('Waring: !!!can\'t read %s, continue this image'
% img_file)
continue
width, height = im.size
except (FileNotFoundError, Image.DecompressionBombError) as e:
print(e) # Image.DecompressionBombError for the big size image
continue
# add image information to dataset
if self.path_replace is not None:
for key, value in self.path_replace.items():
img_file = img_file.replace(key, value)
self.coco_dataset["images"].append({
'file_name': img_name, # use relative path
'id': self.imageID,
'width': width,
'height': height
})
# read txt label
labels = cvtools.read_arcsoft_txt_format(txt_file)
if len(labels) == 0:
continue
for label in labels:
# change here for specific labels
label = label_processor(label)
if len(label['bbox']) == 0:
continue # may be not happened
label['id'] = self.annID
label['image_id'] = self.imageID
label['segmentation'] = []
label['iscrowd'] = 0
try:
label['category_id'] = int(self.cls_map[label['category']])
except KeyError:
print('skip file: {}'.format(txt_file))
break
self.coco_dataset['annotations'].append(label)
self.annID += 1
self.imageID += 1
[文档] def save_json(self, to_file='cocolike.json'):
# save json format results to disk
dirname = osp.dirname(to_file)
if dirname != '' and not osp.exists(dirname):
os.makedirs(osp.dirname(dirname))
with open(to_file, 'w') as f:
json.dump(self.coco_dataset, f) # using indent=4 show more friendly
print('!save {} finished'.format(to_file))
if __name__ == '__main__':
path_replace = {'\\': '/'}
arcsoft_to_coco = Arcsoft2COCO('E:/data/person',
cls_map='arcsoft/head_id_map.txt',
path_replace=path_replace, img_suffix='.jpg')
arcsoft_to_coco.convert(label_processor=cvtools.head_reserved)
arcsoft_to_coco.save_json('arcsoft/person_head.json')