Notebook for Arm UNICEF Disaster Vulnerability Challenge.
This starter notebook is designed for participants in the competition. It employs the Hugging Face Transformers library and PyTorch Lightning to streamline the development of deep learning models. With this setup, I achieved a preliminary score of approximately 0.4, using the same model I useג here. The dataset provided has been annotated following the COCO format, which facilitates a wide range of object detection and segmentation tasks. submission code is also provided.
You're encouraged to experiment and iterate on this foundation to improve your results.
LR = 1e-4
LR_backbone = 1e-5
WEIGHT_DECAY = 1e-4
BATCH_TRAIN = 8
BATCH_VAL = 4
n_folds = 5
seed = 42
%%capture
!pip install pycocotools
import re
from PIL import Image, ImageDraw
import pandas as pd
import json
import os
import ast
from sklearn.model_selection import train_test_split
import wandb
from pytorch_lightning.loggers import WandbLogger
import torchvision
from transformers import DetrImageProcessor, DetrForObjectDetection
from torch.utils.data import DataLoader
import torch
import pytorch_lightning as pl
from pytorch_lightning import Trainer
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
import random
from sklearn.metrics import mean_absolute_error
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn
COCO_DATASET = "/kaggle/input/zindi-od1/Images_unicef/Images_unicef"
DATA_FOLDER ='/kaggle/working/'
TEST_CSV = '/kaggle/input/zindi-od1/test_ufinal.csv'
IMAGE_FOLDER_TRAIN = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Train'
IMAGE_FOLDER_TEST = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Test'
TRAIN_CSV = "/kaggle/input/zindi-od1/train_ufinal.csv"
labels_dict = {1 : "Other", 2: "Tin" , 3 : "Thatch"}
def seed_everything(seed):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
seed_everything(seed)
train = pd.read_csv(TRAIN_CSV, header=0)
train.dropna(inplace=True)
train['bbox'] = train['bbox'].apply(ast.literal_eval)
# Initialize the COCO format dictionary
coco_format = {
"images": [],
"annotations": [],
"categories": [
{"id": 1, "name": "Other"},
{"id": 2, "name": "Tin"},
{"id": 3, "name": "Thatch"}
]
}
index_dict = {}
i = 0
unique_image_ids = train['image_id'].unique()
for image_id in unique_image_ids:
index_dict[image_id] = i
i += 1
# Helper function to get image dimensions
def get_image_dimensions(image_path):
with Image.open(image_path) as img:
return img.width, img.height
if __name__ == '__main__':
# Populate images section
unique_image_ids = train['image_id'].unique()
for image_id in unique_image_ids:
file_name = f"{image_id}" # Adjust the extension if needed
for j in [IMAGE_FOLDER_TRAIN , IMAGE_FOLDER_TEST] :
try :
image_path = os.path.join(j, file_name)
width, height = get_image_dimensions(image_path)
coco_format["images"].append({
"id": index_dict[image_id],
"file_name": file_name,
"width": width,
"height": height
})
except :
continue
# Annotations
for index, row in train.iterrows():
annotation = {
"id": index,
"image_id": index_dict[row["image_id"]],
"category_id": int(row["category_id"]),
"bbox": row["bbox"],
"area": row["bbox"][2] * row["bbox"][3], # width * height
"iscrowd": 0,
"segmentation": [] # Empty if not using segmentation
}
coco_format["annotations"].append(annotation)
# Save to JSON
with open('/kaggle/working/dataset_coco_format.json', 'w') as f:
json.dump(coco_format, f, indent=4)
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api")
PROJECT_NAME = 'detr_hous_count'
os.environ['WANDB_NOTEBOOK_NAME'] = 'DETR_Unicef.ipynb'
wandb.login(key=secret_value_0)
wandb.init(project=PROJECT_NAME , name = "detr_resnet-50")
wandb_logger = WandbLogger(name="MyModelRun", project=PROJECT_NAME)
wandb: WARNING WANDB_NOTEBOOK_NAME should be a path to a notebook file, couldn't find Getting_started_detr_hf.ipynb. wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin wandb: WARNING If you're specifying your api key in code, ensure this code is not shared publicly. wandb: WARNING Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line. wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc wandb: Currently logged in as: lassouedaymenla (comp1). Use `wandb login --relogin` to force relogin
/kaggle/working/wandb/run-20240622_185314-aniw3g3z
train.head()
| image_id | bbox | category_id | id | height | width | x_min | y_min | x_max | y_max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id_w55q2qr62fsk.tif | [122.0, 1.0, 42.0, 30.0] | 2.0 | 6356.0 | 500 | 500 | 122.0 | 1.0 | 164.0 | 31.0 |
| 1 | id_fvldv7o1kn9d.tif | [500.0, 141.0, 74.0, 70.0] | 2.0 | 2305.0 | 1000 | 1000 | 500.0 | 141.0 | 574.0 | 211.0 |
| 2 | id_5d1r9l1jp7b5.tif | [304.0, 525.0, 54.0, 58.0] | 2.0 | 238.0 | 1000 | 1000 | 304.0 | 525.0 | 358.0 | 583.0 |
| 3 | id_45qru79t6s4n.tif | [187.0, 298.0, 44.0, 56.0] | 2.0 | 921.0 | 500 | 500 | 187.0 | 298.0 | 231.0 | 354.0 |
| 4 | id_84cjf2pbqmtv.tif | [349.0, 721.0, 58.0, 59.0] | 2.0 | 1362.0 | 1000 | 1000 | 349.0 | 721.0 | 407.0 | 780.0 |
# Load the COCO dataset JSON
file_path = os.path.join("/kaggle/working/dataset_coco_format.json")
with open(file_path, 'r') as f:
coco = json.load(f)
# Extract annotations and images
annotations = coco['annotations']
images = coco['images']
# Split images directly
images_train, images_val = train_test_split(images, test_size=0.2)
# Get the IDs of the split images
images_train_ids = {img['id'] for img in images_train}
images_val_ids = {img['id'] for img in images_val}
# Filter annotations to only include those present in train/val images
annotations_train = [ann for ann in annotations if ann['image_id'] in images_train_ids]
annotations_val = [ann for ann in annotations if ann['image_id'] in images_val_ids]
# Create new JSON objects for train and validation sets
coco_train = {'images': images_train, 'annotations': annotations_train, 'categories': coco['categories']}
coco_val = {'images': images_val, 'annotations': annotations_val, 'categories': coco['categories']}
# Save new JSON files for train and validation datasets
with open(os.path.join(DATA_FOLDER, 'train.json'), 'w') as f:
json.dump(coco_train, f)
with open(os.path.join(DATA_FOLDER, 'val.json'), 'w') as f:
json.dump(coco_val, f)
class CocoDetection(torchvision.datasets.CocoDetection):
def __init__(self, img_folder,ann_file , processor):
super(CocoDetection, self).__init__(img_folder, ann_file)
self.processor = processor
def __getitem__(self, idx):
# read in PIL image and target in COCO format
# feel free to add data augmentation here before passing them to the next step
img, target = super(CocoDetection, self).__getitem__(idx)
# preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
image_id = self.ids[idx]
target = {'image_id': image_id, 'annotations': target}
encoding = self.processor(images=img, annotations=target, return_tensors="pt")
pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
target = encoding["labels"][0] # remove batch dimension
return pixel_values, target
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
train_dataset = CocoDetection(img_folder=IMAGE_FOLDER_TRAIN,ann_file='/kaggle/working/train.json', processor=processor )
val_dataset = CocoDetection(img_folder=IMAGE_FOLDER_TRAIN,ann_file='/kaggle/working/val.json', processor=processor)
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))
preprocessor_config.json: 0%| | 0.00/290 [00:00<?, ?B/s]
loading annotations into memory... Done (t=0.08s) creating index... index created! loading annotations into memory... Done (t=0.02s) creating index... index created! Number of training examples: 1972 Number of validation examples: 493
# based on https://github.com/woctezuma/finetune-detr/blob/master/finetune_detr.ipynb
image_ids = train_dataset.coco.getImgIds()
# let's pick a random image
image_id = image_ids[np.random.randint(0, len(image_ids))]
print('Image n°{}'.format(image_id))
image = train_dataset.coco.loadImgs(image_id)[0]
print(image['file_name'])
image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, image['file_name']))
annotations = train_dataset.coco.imgToAnns[image_id]
draw = ImageDraw.Draw(image, "RGBA")
cats = train_dataset.coco.cats
id2label = {k: v['name'] for k,v in cats.items()}
for annotation in annotations:
box = annotation['bbox']
class_idx = annotation['category_id']
x,y,w,h = tuple(box)
draw.rectangle((x,y,x+w,y+h), outline='red', width=1)
draw.text((x, y), id2label[class_idx], fill='white')
image
Image n°1678 id_w6up52p9thdw.tif
def collate_fn(batch):
pixel_values = [item[0] for item in batch]
encoding = processor.pad(pixel_values, return_tensors="pt")
labels = [item[1] for item in batch]
batch = {}
batch['pixel_values'] = encoding['pixel_values']
batch['pixel_mask'] = encoding['pixel_mask']
batch['labels'] = labels
return batch
train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=BATCH_TRAIN , shuffle=True)
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=BATCH_VAL)
class Detr(pl.LightningModule):
def __init__(self, lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4):
super().__init__()
self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50",
revision="no_timm",
num_labels=3,
ignore_mismatched_sizes=True) # we have images with differant sizes 500 and 1000
self.lr = lr
self.lr_backbone = lr_backbone
self.weight_decay = weight_decay
def forward(self, pixel_values, pixel_mask):
outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)
return outputs
def common_step(self, batch, batch_idx):
pixel_values = batch["pixel_values"]
pixel_mask = batch["pixel_mask"]
labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]
outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)
loss = outputs.loss
loss_dict = outputs.loss_dict
return loss, loss_dict
def training_step(self, batch, batch_idx):
loss, loss_dict = self.common_step(batch, batch_idx)
self.log("training_loss", loss)
for k, v in loss_dict.items():
self.log("train_" + k, v.item())
return loss
def validation_step(self, batch, batch_idx):
loss, loss_dict = self.common_step(batch, batch_idx)
self.log("validation_loss", loss)
for k, v in loss_dict.items():
self.log("validation_" + k, v.item())
return loss
def configure_optimizers(self):
param_dicts = [
{"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
{
"params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
"lr": self.lr_backbone,
},
]
optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
weight_decay=self.weight_decay)
return optimizer
def train_dataloader(self):
return train_dataloader
def val_dataloader(self):
return val_dataloader
skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
def correct_bbox_type(bbox):
return [np.float32(point) for point in re.findall(r"\d+\.\d+", bbox)]
# import cv2
# import matplotlib.pyplot as plt
# import albumentations as A
# from albumentations.pytorch import ToTensorV2
# import pandas as pd
# import numpy as np
# import re
# def correct_bbox_type(bbox):
# return [np.float32(point) for point in re.findall(r"\d+\.\d+", bbox)]
# def visualize_augmentation(image, bboxes, labels, transform):
# # Apply augmentations
# augmented = transform(image=image, bboxes=bboxes, labels=labels)
# augmented_image = augmented['image']
# augmented_bboxes = augmented['bboxes']
# # Convert image to a format suitable for display
# augmented_image = augmented_image.permute(1, 2, 0).cpu().numpy()
# # Plot image with bounding boxes
# fig, ax = plt.subplots(1, 1, figsize=(12, 8))
# ax.imshow(augmented_image)
# for bbox in augmented_bboxes:
# x_min, y_min, width, height = bbox
# rect = plt.Rectangle((x_min, y_min), width, height, edgecolor='r', facecolor='none', linewidth=2)
# ax.add_patch(rect)
# plt.show()
# # Example image and bounding boxes
# image_path = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Train/id_w55q2qr62fsk.tif'
# image = cv2.imread(image_path)
# # Check if the image is loaded correctly
# if image is None:
# raise ValueError(f"Image not found or unable to open: {image_path}")
# # Convert image to RGB
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# train = pd.read_csv('/kaggle/input/zindi-od1/train_ufinal.csv')
# # Parse bounding boxes
# bbox_strings = train[train['image_id'] == "id_w55q2qr62fsk.tif"]['bbox']
# bboxes = [correct_bbox_type(bbox_string) for bbox_string in bbox_strings]
# # print(len(bboxes))
# # Parse labels
# labels = train[train['image_id'] == "id_w55q2qr62fsk.tif"]['category_id'].tolist()
# # print(labels)
# # Define your augmentations
# train_transforms = A.Compose([
# A.OneOf([
# A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9),
# A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9)
# ], p=0.9),
# A.ToGray(p=0.01),
# A.HorizontalFlip(p=0.5),
# A.VerticalFlip(p=0.5),
# A.Rotate(limit=45, p=1),
# A.Resize(height=512, width=512, p=1),
# A.RandomCrop(p=0.5, height=400, width=400) ,
# ToTensorV2(p=0.5)
# ], p=1.0, bbox_params=A.BboxParams(format='coco', min_area=0, min_visibility=0, label_fields=['labels']))
# # Visualize the augmentation
# visualize_augmentation(image, bboxes, labels, train_transforms)
model = Detr()
trainer = Trainer(max_epochs=10, accelerator='gpu',logger=wandb_logger)
trainer.fit(model)
config.json: 0%| | 0.00/6.60k [00:00<?, ?B/s]
pytorch_model.bin: 0%| | 0.00/167M [00:00<?, ?B/s]
Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match: - class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([4, 256]) in the model instantiated - class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([4]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Sanity Checking: | | 0/? [00:00<?, ?it/s]
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
Training: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
Validation: | | 0/? [00:00<?, ?it/s]
val_dataloader = model.val_dataloader()
trainer.validate(model, val_dataloader)
Validation: | | 0/? [00:00<?, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Validate metric ┃ DataLoader 0 ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ validation_cardinality_error │ 8.33874225616455 │ │ validation_loss │ 1.205262303352356 │ │ validation_loss_bbox │ 0.03503163903951645 │ │ validation_loss_ce │ 0.20140598714351654 │ │ validation_loss_giou │ 0.4143490493297577 │ └──────────────────────────────┴──────────────────────────────┘
[{'validation_loss': 1.205262303352356,
'validation_loss_ce': 0.20140598714351654,
'validation_loss_bbox': 0.03503163903951645,
'validation_loss_giou': 0.4143490493297577,
'validation_cardinality_error': 8.33874225616455}]
def count_labels(counts):
labels = {1:0,2:0,3:0}
counts_cpu = counts.cpu().numpy()
for i in counts_cpu:
labels[i] +=1
return labels
submission_val_df = pd.DataFrame(columns=["image_id", "Target"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
for image_name in tqdm([images_val[i]['file_name'] for i in range(len(images_val))] , desc = "Inference"):
image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, f'{image_name}'))
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
result_dict = count_labels(results['labels'])
# print(result_dict)
df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
f"{image_name.split('.')[0]}_2",
f"{image_name.split('.')[0]}_3"],
"Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
submission_val_df = pd.concat([submission_val_df, df] )
Inference: 100%|██████████| 493/493 [00:41<00:00, 12.01it/s]
def count_labels_temp(counts):
labels = {1: 0, 2: 0, 3: 0}
for i in counts:
labels[i] += 1
return labels
sub_temp_df = pd.DataFrame(columns=["image_id", "Target"])
correction_df = train[train['image_id'].isin([images_val[i]['file_name'] for i in range(len(images_val))])]
for image_name in tqdm(correction_df['image_id'].unique() , desc = "Inference"):
temp_img_df = correction_df[correction_df['image_id']==image_name]
result_dict = count_labels_temp(temp_img_df.category_id)
df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
f"{image_name.split('.')[0]}_2",
f"{image_name.split('.')[0]}_3"],
"Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
sub_temp_df = pd.concat([sub_temp_df, df] )
Inference: 100%|██████████| 493/493 [00:01<00:00, 353.01it/s]
mean_absolute_error(sub_temp_df['Target'] ,submission_val_df['Target'] )
5.658553076402975
sub_temp_df['Target']
0 0
1 19
2 0
0 0
1 26
..
1 1
2 0
0 0
1 0
2 1
Name: Target, Length: 1479, dtype: object
submission_val_df['Target']
0 0
1 73
2 0
0 0
1 4
..
1 6
2 0
0 0
1 4
2 0
Name: Target, Length: 1479, dtype: object
def custom_mae(train , images_val ):
def count_labels(counts):
labels = {1:0,2:0,3:0}
counts_cpu = counts.cpu().numpy()
for i in counts_cpu:
labels[i] +=1
return labels
submission_val_df = pd.DataFrame(columns=["image_id", "Target"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
for image_name in tqdm([images_val[i]['file_name'] for i in range(len(images_val))] , desc = "Inference"):
image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, f'{image_name}'))
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
result_dict = count_labels(results['labels'])
# print(result_dict)
df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
f"{image_name.split('.')[0]}_2",
f"{image_name.split('.')[0]}_3"],
"Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
submission_val_df = pd.concat([submission_val_df, df] )
def count_labels_temp(counts):
labels = {1: 0, 2: 0, 3: 0}
for i in counts:
labels[i] += 1
return labels
sub_temp_df = pd.DataFrame(columns=["image_id", "Target"])
correction_df = train[train['image_id'].isin([images_val[i]['file_name'] for i in range(len(images_val))])]
for image_name in tqdm(correction_df['image_id'].unique() , desc = "Inference"):
temp_img_df = correction_df[correction_df['image_id']==image_name]
result_dict = count_labels_temp(temp_img_df.category_id)
df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
f"{image_name.split('.')[0]}_2",
f"{image_name.split('.')[0]}_3"],
"Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
sub_temp_df = pd.concat([sub_temp_df, df] )
return mean_absolute_error(sub_temp_df['Target'] ,submission_val_df['Target'] )
stop
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[25], line 1 ----> 1 stop NameError: name 'stop' is not defined
#4.972278566599053
# train[train['image_id'] == "id_ikmbin2sr5e1.tif"].category_id.value_counts()
train.head()
test_df = pd.read_csv(TEST_CSV)
test_list = test_df['image_id'].to_list()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def plot_results(img_name):
image = Image.open(os.path.join(IMAGE_FOLDER_TEST, f'{img_name}'))
width, height = image.size
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
print("Outputs:", outputs.keys())
postprocessed_outputs = processor.post_process_object_detection(outputs,
target_sizes=[(height, width)],
threshold=0.8)
results = postprocessed_outputs[0]
plt.figure(figsize=(16,10))
plt.imshow(image)
ax = plt.gca()
print(f"The image should contain only {len(results['boxes'])} boxes")
for score, label, (xmin, ymin, xmax, ymax) in zip(results['scores'].tolist(), results['labels'].tolist(), results['boxes'].tolist()):
ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
fill=False, linewidth=3))
text = f'{labels_dict[label]}: {score:0.2f}'
ax.text(xmin, ymin, text, fontsize=15,
bbox=dict(facecolor='yellow', alpha=0.5))
plt.axis('off')
plt.show()
import random
model = model.to(device)
random_img = random.choice(test_list)
print(random_img)
plot_results(random_img)
val_df
def count_labels(counts):
labels = {1:0,2:0,3:0}
counts_cpu = counts.cpu().numpy()
for i in counts_cpu:
labels[i] +=1
return labels
submission_df = pd.DataFrame(columns=["image_id", "Target"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
for image_name in tqdm(test_df['image_id'] , desc = "Inference"):
image = Image.open(os.path.join(IMAGE_FOLDER_TEST, f'{image_name}'))
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
result_dict = count_labels(results['labels'])
df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
f"{image_name.split('.')[0]}_2",
f"{image_name.split('.')[0]}_3"],
"Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
submission_df = pd.concat([submission_df, df])
submission_df.head()
submission_df.to_csv("submission.csv", index=False)