M2cai16-tool-locations
m2cai16-tool-locations/ annotations/ video01.json # or .xml / .txt video02.json frames/ video01/ frame_000001.jpg ... Here’s a robust parser using and torchvision :
import json import os from PIL import Image import torch from torch.utils.data import Dataset from torchvision.ops import box_convert class M2CAI16ToolLocations(Dataset): """Dataset for m2cai16-tool-locations bounding box annotations.""" m2cai16-tool-locations
# Draw boxes img_with_boxes = draw_bounding_boxes(img, boxes, labels=[class_names[l] for l in labels], colors='red', width=2) plt.figure(figsize=(10, 8)) plt.imshow(img_with_boxes.permute(1,2,0)) plt.axis('off') plt.title(f"Frame {idx} — {len(boxes)} tools detected") plt.show() dataset = M2CAI16ToolLocations('./m2cai16-tool-locations') show_annotations(dataset, idx=0) 4. Useful Preprocessing for Training Convert to COCO format (for Detectron2, MMDetection, etc.): m2cai16-tool-locations/ annotations/ video01
import matplotlib.pyplot as plt from torchvision.utils import draw_bounding_boxes from torchvision.transforms import ToTensor def show_annotations(dataset, idx=0): img, target = dataset[idx] if isinstance(img, torch.Tensor): img = (img * 255).byte() if img.max() <= 1 else img else: img = ToTensor()(img).byte() labels=[class_names[l] for l in labels]