# -*- coding:utf-8 -*-
# author: gfjiangly
# time: 2019/5/9 15:21
# e-mail: jgf0719@foxmail.com
# software: PyCharm
import json
import os
from tqdm import tqdm
from PIL import Image
from cvtools.file_io.read import read_files_to_list
from cvtools.utils.timer import get_time_str
from cvtools.file_io.read import read_key_value
[文档]class Jiang2COCO(object):
"""
此转换的coco格式数据集与官方格式有一点差异:
官方的image name仅仅是图片名,不包含路径,路径是组装起来查找的,图片名命令也是有规律的,
但由于私有的数据集图片名和路径可能比较混乱,没有统一处理。因此在image name中包含了图片地址,
使用coco api读取标签文件时须注意这一点。
"""
def __init__(self, root, files, cls_map='', path_replace=None):
self.root = root
self.files = files
self.path_replace = path_replace
self.lines = read_files_to_list(self.files, root=self.root)
self.cls_first = False
self.cls_map = read_key_value(cls_map)
self.imageID = 1
self.annID = 1
self.coco_dataset = {
"info": {
"description": "This is stable 0.0.0 version "
"of the 2019 jiang's dataset format.",
"url": "https://github.com/gfjiangly/cvtools",
"version": "0.1", "year": 2019,
"contributor": "jiang",
"date_created": get_time_str()
},
"categories": [],
"images": [], "annotations": []
}
[文档] def convert(self):
for key, value in self.cls_map.items():
self.coco_dataset['categories'].append({
'id': int(key) + 1,
'name': value,
'supercategory': value
})
for line in tqdm(self.lines):
line = line.strip().split()
image_name = line[0]
for key, value in self.path_replace.items():
image_name = image_name.replace(key, value)
# read the image to get width and height
try:
# "PIL: Open an image file, without loading the raster data"
im = Image.open(image_name)
if im is None:
print('Waring: !!!can\'t read %s, continue this image'
% image_name)
continue
width, height = im.size
except (FileNotFoundError, Image.DecompressionBombError) as e:
print(e) # Image.DecompressionBombError for the big size image
continue
# 添加图像的信息到dataset中
self.coco_dataset["images"].append({
'file_name': image_name,
'id': self.imageID,
'width': width,
'height': height
})
for bbox in line[1:]:
bbox = list(map(int, bbox.strip().split(',')))
# 类别
if self.cls_first:
coor_start = 1
cls_id = bbox[0]
else:
coor_start = 0
cls_id = bbox[4]
x1, y1, x2, y2 = map(float, bbox[coor_start:coor_start+4])
width = max(0., x2 - x1)
height = max(0., y2 - y1)
self.coco_dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': int(cls_id) + 1, # 0 for backgroud
'id': self.annID,
'image_id': self.imageID,
'iscrowd': 0,
# mask, 矩形是从左上角点按顺时针的四个顶点
'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
})
self.annID += 1
self.imageID += 1
[文档] def save_json(self, to_file='cocolike.json'):
# save json format results to disk
dirname = os.path.dirname(to_file)
if dirname != '' and not os.path.exists(dirname):
os.makedirs(os.path.dirname(dirname))
with open(to_file, 'w') as f:
json.dump(self.coco_dataset, f) # using indent=4 show more friendly
print('!save {} finished'.format(to_file))
if __name__ == '__main__':
root_path = 'jiang/label/train/'
files_list = ['our_train.txt']
path_place = {'/root/data/': 'F:/data/detection/'}
jiang2coco = Jiang2COCO(root_path, files_list, path_replace=path_place,
cls_map='jiang/our_cat_id_map.txt')
jiang2coco.convert()
jiang2coco.save_json(to_file='jiang/our.json')