shunk031 · shunk031 · Jun 20, 2024
diff --git a/MSCOCO.py b/MSCOCO.py
diff --git a/annotation.py b/annotation.py
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+
+from .typehint import AnnotationId, ImageId
+
+
+@dataclass
+class AnnotationData(object):
+    annotation_id: AnnotationId
+    image_id: ImageId
diff --git a/base_example.py b/base_example.py
@@ -0,0 +1,22 @@
+from typing import TypedDict
+
+from .typehint import ImageId, LicenseId, PilImage
+
+
+class LicenseDict(TypedDict):
+    license_id: LicenseId
+    name: str
+    url: str
+
+
+class BaseExample(TypedDict):
+    image_id: ImageId
+    image: PilImage
+    file_name: str
+    coco_url: str
+    height: int
+    width: int
+    date_captured: str
+    flickr_url: str
+    license_id: LicenseId
+    license: LicenseDict
diff --git a/caption.py b/caption.py
@@ -0,0 +1,89 @@
+import os
+from collections import defaultdict
+from dataclasses import asdict, dataclass
+from typing import Dict, Iterator, List, Tuple, TypedDict
+
+import datasets as ds
+from tqdm.auto import tqdm
+
+from .annotation import AnnotationData
+from .base_example import BaseExample
+from .image import ImageData
+from .license import LicenseData
+from .processor import MsCocoProcessor
+from .typehint import AnnotationId, ImageId, JsonDict, LicenseId
+
+
+@dataclass
+class CaptionsAnnotationData(AnnotationData):
+    caption: str
+
+    @classmethod
+    def from_dict(cls, json_dict: JsonDict) -> "CaptionsAnnotationData":
+        return cls(
+            annotation_id=json_dict["id"],
+            image_id=json_dict["image_id"],
+            caption=json_dict["caption"],
+        )
+
+
+class CaptionAnnotationDict(TypedDict):
+    annotation_id: AnnotationId
+    caption: str
+
+
+class CaptionExample(BaseExample):
+    annotations: List[CaptionAnnotationDict]
+
+
+class CaptionsProcessor(MsCocoProcessor):
+    def get_features(self, *args, **kwargs) -> ds.Features:
+        features_dict = self.get_features_base_dict()
+        annotations = ds.Sequence(
+            {
+                "annotation_id": ds.Value("int64"),
+                "image_id": ds.Value("int64"),
+                "caption": ds.Value("string"),
+            }
+        )
+        features_dict.update({"annotations": annotations})
+        return ds.Features(features_dict)
+
+    def load_data(
+        self,
+        ann_dicts: List[JsonDict],
+        tqdm_desc: str = "Load captions data",
+        **kwargs,
+    ) -> Dict[ImageId, List[CaptionsAnnotationData]]:
+        annotations = defaultdict(list)
+        for ann_dict in tqdm(ann_dicts, desc=tqdm_desc):
+            ann_data = CaptionsAnnotationData.from_dict(ann_dict)
+            annotations[ann_data.image_id].append(ann_data)
+        return annotations
+
+    def generate_examples(
+        self,
+        image_dir: str,
+        images: Dict[ImageId, ImageData],
+        annotations: Dict[ImageId, List[CaptionsAnnotationData]],
+        licenses: Dict[LicenseId, LicenseData],
+        **kwargs,
+    ) -> Iterator[Tuple[int, CaptionExample]]:
+        for idx, image_id in enumerate(images.keys()):
+            image_data = images[image_id]
+            image_anns = annotations[image_id]
+
+            assert len(image_anns) > 0
+
+            image = self.load_image(
+                image_path=os.path.join(image_dir, image_data.file_name),
+            )
+            example = asdict(image_data)
+            example["image"] = image
+            example["license"] = asdict(licenses[image_data.license_id])
+
+            example["annotations"] = []
+            for ann in image_anns:
+                example["annotations"].append(asdict(ann))
+
+            yield idx, example  # type: ignore
diff --git a/category.py b/category.py
@@ -0,0 +1,18 @@
+from dataclasses import dataclass
+
+from .typehint import JsonDict
+
+
+@dataclass
+class CategoryData(object):
+    category_id: int
+    name: str
+    supercategory: str
+
+    @classmethod
+    def from_dict(cls, json_dict: JsonDict) -> "CategoryData":
+        return cls(
+            category_id=json_dict["id"],
+            name=json_dict["name"],
+            supercategory=json_dict["supercategory"],
+        )
diff --git a/const.py b/const.py
@@ -0,0 +1,99 @@
+from typing import Final, List
+
+CATEGORIES: Final[List[str]] = [
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+]
+
+SUPER_CATEGORIES: Final[List[str]] = [
+    "person",
+    "vehicle",
+    "outdoor",
+    "animal",
+    "accessory",
+    "sports",
+    "kitchen",
+    "food",
+    "furniture",
+    "electronic",
+    "appliance",
+    "indoor",
+]
diff --git a/image.py b/image.py
@@ -0,0 +1,28 @@
+from dataclasses import dataclass
+
+from .typehint import ImageId, JsonDict, LicenseId
+
+
+@dataclass
+class ImageData(object):
+    image_id: ImageId
+    license_id: LicenseId
+    file_name: str
+    coco_url: str
+    height: int
+    width: int
+    date_captured: str
+    flickr_url: str
+
+    @classmethod
+    def from_dict(cls, json_dict: JsonDict) -> "ImageData":
+        return cls(
+            image_id=json_dict["id"],
+            license_id=json_dict["license"],
+            file_name=json_dict["file_name"],
+            coco_url=json_dict["coco_url"],
+            height=json_dict["height"],
+            width=json_dict["width"],
+            date_captured=json_dict["date_captured"],
+            flickr_url=json_dict["flickr_url"],
+        )
diff --git a/info.py b/info.py
@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+
+from .typehint import JsonDict
+
+
+@dataclass
+class AnnotationInfo(object):
+    description: str
+    url: str
+    version: str
+    year: str
+    contributor: str
+    date_created: str
+
+    @classmethod
+    def from_dict(cls, json_dict: JsonDict) -> "AnnotationInfo":
+        return cls(**json_dict)