Notebook for Arm UNICEF Disaster Vulnerability Challenge.

This starter notebook is designed for participants in the competition. It employs the Hugging Face Transformers library and PyTorch Lightning to streamline the development of deep learning models. With this setup, I achieved a preliminary score of approximately 0.4, using the same model I useג here. The dataset provided has been annotated following the COCO format, which facilitates a wide range of object detection and segmentation tasks. submission code is also provided.

You're encouraged to experiment and iterate on this foundation to improve your results.

In [1]:
LR = 1e-4
LR_backbone = 1e-5
WEIGHT_DECAY = 1e-4
BATCH_TRAIN = 8
BATCH_VAL = 4
n_folds = 5
seed = 42
In [2]:
%%capture
!pip install pycocotools
In [ ]:
import re
from PIL import Image, ImageDraw
import pandas as pd
import json
import os
import ast
from sklearn.model_selection import train_test_split
import wandb
from pytorch_lightning.loggers import WandbLogger
import torchvision
from transformers import DetrImageProcessor, DetrForObjectDetection
from torch.utils.data import DataLoader
import torch
import pytorch_lightning as pl
from pytorch_lightning import Trainer
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
import random
from sklearn.metrics import mean_absolute_error
from torchvision.models.detection import fasterrcnn_mobilenet_v3_large_fpn

Adjust the folders names.¶

In [4]:
COCO_DATASET = "/kaggle/input/zindi-od1/Images_unicef/Images_unicef"
DATA_FOLDER ='/kaggle/working/'
TEST_CSV = '/kaggle/input/zindi-od1/test_ufinal.csv'
IMAGE_FOLDER_TRAIN = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Train'
IMAGE_FOLDER_TEST = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Test'
TRAIN_CSV = "/kaggle/input/zindi-od1/train_ufinal.csv"
labels_dict = {1 : "Other", 2: "Tin" , 3 : "Thatch"}
In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
In [6]:
seed_everything(seed)
In [7]:
train = pd.read_csv(TRAIN_CSV, header=0)
train.dropna(inplace=True)

train['bbox'] = train['bbox'].apply(ast.literal_eval)

# Initialize the COCO format dictionary
coco_format = {
    "images": [],
    "annotations": [],
    "categories": [
        {"id": 1, "name": "Other"},
        {"id": 2, "name": "Tin"},
        {"id": 3, "name": "Thatch"}
    ]
}
index_dict = {}
i = 0
unique_image_ids = train['image_id'].unique()
for image_id in unique_image_ids:
    index_dict[image_id] = i
    i += 1


# Helper function to get image dimensions
def get_image_dimensions(image_path):
    with Image.open(image_path) as img:
        return img.width, img.height


if __name__ == '__main__':
    # Populate images section
    unique_image_ids = train['image_id'].unique()
    for image_id in unique_image_ids:
        
        file_name = f"{image_id}"  # Adjust the extension if needed
        for j in [IMAGE_FOLDER_TRAIN , IMAGE_FOLDER_TEST] : 
            try :
                image_path = os.path.join(j, file_name)
                width, height = get_image_dimensions(image_path)
                coco_format["images"].append({
                    "id": index_dict[image_id],
                    "file_name": file_name,
                    "width": width,
                    "height": height
                })
            except :
                continue

    # Annotations
    for index, row in train.iterrows():
        annotation = {
            "id": index,
            "image_id": index_dict[row["image_id"]],
            "category_id": int(row["category_id"]),
            "bbox": row["bbox"],
            "area": row["bbox"][2] * row["bbox"][3],  # width * height
            "iscrowd": 0,
            "segmentation": []  # Empty if not using segmentation
        }
        coco_format["annotations"].append(annotation)

    # Save to JSON
    with open('/kaggle/working/dataset_coco_format.json', 'w') as f:
        json.dump(coco_format, f, indent=4)
In [8]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api")
PROJECT_NAME = 'detr_hous_count'
os.environ['WANDB_NOTEBOOK_NAME'] = 'DETR_Unicef.ipynb'

wandb.login(key=secret_value_0)
wandb.init(project=PROJECT_NAME , name = "detr_resnet-50")

wandb_logger = WandbLogger(name="MyModelRun", project=PROJECT_NAME)
wandb: WARNING WANDB_NOTEBOOK_NAME should be a path to a notebook file, couldn't find Getting_started_detr_hf.ipynb.
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
wandb: WARNING If you're specifying your api key in code, ensure this code is not shared publicly.
wandb: WARNING Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
wandb: Currently logged in as: lassouedaymenla (comp1). Use `wandb login --relogin` to force relogin
wandb version 0.17.2 is available! To upgrade, please run: $ pip install wandb --upgrade
Tracking run with wandb version 0.16.5
Run data is saved locally in /kaggle/working/wandb/run-20240622_185314-aniw3g3z
Syncing run detr_resnet-50 to Weights & Biases (docs)
View project at https://wandb.ai/comp1/detr_hous_count
View run at https://wandb.ai/comp1/detr_hous_count/runs/aniw3g3z/workspace
In [9]:
train.head()
Out[9]:
image_id bbox category_id id height width x_min y_min x_max y_max
0 id_w55q2qr62fsk.tif [122.0, 1.0, 42.0, 30.0] 2.0 6356.0 500 500 122.0 1.0 164.0 31.0
1 id_fvldv7o1kn9d.tif [500.0, 141.0, 74.0, 70.0] 2.0 2305.0 1000 1000 500.0 141.0 574.0 211.0
2 id_5d1r9l1jp7b5.tif [304.0, 525.0, 54.0, 58.0] 2.0 238.0 1000 1000 304.0 525.0 358.0 583.0
3 id_45qru79t6s4n.tif [187.0, 298.0, 44.0, 56.0] 2.0 921.0 500 500 187.0 298.0 231.0 354.0
4 id_84cjf2pbqmtv.tif [349.0, 721.0, 58.0, 59.0] 2.0 1362.0 1000 1000 349.0 721.0 407.0 780.0
In [10]:
# Load the COCO dataset JSON
file_path = os.path.join("/kaggle/working/dataset_coco_format.json")
with open(file_path, 'r') as f:
    coco = json.load(f)

# Extract annotations and images
annotations = coco['annotations']
images = coco['images']

# Split images directly
images_train, images_val = train_test_split(images, test_size=0.2)

# Get the IDs of the split images
images_train_ids = {img['id'] for img in images_train}
images_val_ids = {img['id'] for img in images_val}

# Filter annotations to only include those present in train/val images
annotations_train = [ann for ann in annotations if ann['image_id'] in images_train_ids]
annotations_val = [ann for ann in annotations if ann['image_id'] in images_val_ids]

# Create new JSON objects for train and validation sets
coco_train = {'images': images_train, 'annotations': annotations_train, 'categories': coco['categories']}
coco_val = {'images': images_val, 'annotations': annotations_val, 'categories': coco['categories']}

# Save new JSON files for train and validation datasets
with open(os.path.join(DATA_FOLDER, 'train.json'), 'w') as f:
    json.dump(coco_train, f)

with open(os.path.join(DATA_FOLDER, 'val.json'), 'w') as f:
    json.dump(coco_val, f)
In [11]:
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder,ann_file , processor):
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.processor = processor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        # feel free to add data augmentation here before passing them to the next step
        img, target = super(CocoDetection, self).__getitem__(idx)

        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.processor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze()  # remove batch dimension
        target = encoding["labels"][0]  # remove batch dimension

        return pixel_values, target
In [ ]:
 
In [12]:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

train_dataset = CocoDetection(img_folder=IMAGE_FOLDER_TRAIN,ann_file='/kaggle/working/train.json', processor=processor )
val_dataset = CocoDetection(img_folder=IMAGE_FOLDER_TRAIN,ann_file='/kaggle/working/val.json', processor=processor)

print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))
preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]
loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
Number of training examples: 1972
Number of validation examples: 493
In [ ]:
# based on https://github.com/woctezuma/finetune-detr/blob/master/finetune_detr.ipynb
image_ids = train_dataset.coco.getImgIds()
# let's pick a random image
image_id = image_ids[np.random.randint(0, len(image_ids))]
print('Image n°{}'.format(image_id))
image = train_dataset.coco.loadImgs(image_id)[0]
print(image['file_name'])
image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, image['file_name']))
annotations = train_dataset.coco.imgToAnns[image_id]
draw = ImageDraw.Draw(image, "RGBA")

cats = train_dataset.coco.cats
id2label = {k: v['name'] for k,v in cats.items()}

for annotation in annotations:
  box = annotation['bbox']
  class_idx = annotation['category_id']
  x,y,w,h = tuple(box)
  draw.rectangle((x,y,x+w,y+h), outline='red', width=1)
  draw.text((x, y), id2label[class_idx], fill='white')

image
Image n°1678
id_w6up52p9thdw.tif
In [ ]:
def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    encoding = processor.pad(pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    batch = {}
    batch['pixel_values'] = encoding['pixel_values']
    batch['pixel_mask'] = encoding['pixel_mask']
    batch['labels'] = labels
    return batch


train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=BATCH_TRAIN , shuffle=True)
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=BATCH_VAL)
In [ ]:
class Detr(pl.LightningModule):
    def __init__(self, lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4):
        super().__init__()
        self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50",
                                                            revision="no_timm",
                                                            num_labels=3,
                                                            ignore_mismatched_sizes=True) # we have images with differant sizes 500 and 1000
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

    def forward(self, pixel_values, pixel_mask):
        outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

        return outputs

    def common_step(self, batch, batch_idx):
        pixel_values = batch["pixel_values"]
        pixel_mask = batch["pixel_mask"]
        labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

        outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

        loss = outputs.loss
        loss_dict = outputs.loss_dict

        return loss, loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("training_loss", loss)
        for k, v in loss_dict.items():
            self.log("train_" + k, v.item())

        return loss

    def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("validation_loss", loss)
        for k, v in loss_dict.items():
            self.log("validation_" + k, v.item())

        return loss

    def configure_optimizers(self):
        param_dicts = [
            {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
            {
                "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                "lr": self.lr_backbone,
            },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                      weight_decay=self.weight_decay)

        return optimizer

    def train_dataloader(self):
        return train_dataloader

    def val_dataloader(self):
        return val_dataloader
In [ ]:
skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
In [ ]:
def correct_bbox_type(bbox):
    return [np.float32(point) for point in re.findall(r"\d+\.\d+", bbox)]
In [18]:
# import cv2
# import matplotlib.pyplot as plt
# import albumentations as A
# from albumentations.pytorch import ToTensorV2
# import pandas as pd
# import numpy as np
# import re

# def correct_bbox_type(bbox):
#     return [np.float32(point) for point in re.findall(r"\d+\.\d+", bbox)]

# def visualize_augmentation(image, bboxes, labels, transform):
#     # Apply augmentations
#     augmented = transform(image=image, bboxes=bboxes, labels=labels)
#     augmented_image = augmented['image']
#     augmented_bboxes = augmented['bboxes']
    
#     # Convert image to a format suitable for display
#     augmented_image = augmented_image.permute(1, 2, 0).cpu().numpy()
    
#     # Plot image with bounding boxes
#     fig, ax = plt.subplots(1, 1, figsize=(12, 8))
#     ax.imshow(augmented_image)
    
#     for bbox in augmented_bboxes:
#         x_min, y_min, width, height = bbox
#         rect = plt.Rectangle((x_min, y_min), width, height, edgecolor='r', facecolor='none', linewidth=2)
#         ax.add_patch(rect)
    
#     plt.show()

# # Example image and bounding boxes
# image_path = '/kaggle/input/zindi-od1/Images_unicef/Images_unicef/Train/id_w55q2qr62fsk.tif'
# image = cv2.imread(image_path)

# # Check if the image is loaded correctly
# if image is None:
#     raise ValueError(f"Image not found or unable to open: {image_path}")

# # Convert image to RGB
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# train = pd.read_csv('/kaggle/input/zindi-od1/train_ufinal.csv')

# # Parse bounding boxes
# bbox_strings = train[train['image_id'] == "id_w55q2qr62fsk.tif"]['bbox']
# bboxes = [correct_bbox_type(bbox_string) for bbox_string in bbox_strings]
# # print(len(bboxes))
# # Parse labels
# labels = train[train['image_id'] == "id_w55q2qr62fsk.tif"]['category_id'].tolist()
# # print(labels)

# # Define your augmentations
# train_transforms = A.Compose([
#     A.OneOf([
#         A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9),
#         A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9)
        
#     ], p=0.9),
#     A.ToGray(p=0.01),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
#     A.Rotate(limit=45, p=1),

#     A.Resize(height=512, width=512, p=1),
#     A.RandomCrop(p=0.5, height=400, width=400) , 
#     ToTensorV2(p=0.5)
# ], p=1.0, bbox_params=A.BboxParams(format='coco', min_area=0, min_visibility=0, label_fields=['labels']))

# # Visualize the augmentation
# visualize_augmentation(image, bboxes, labels, train_transforms)
In [19]:
model = Detr()

trainer = Trainer(max_epochs=10, accelerator='gpu',logger=wandb_logger)
trainer.fit(model)
config.json:   0%|          | 0.00/6.60k [00:00<?, ?B/s]
pytorch_model.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]
Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([4, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Sanity Checking: |          | 0/? [00:00<?, ?it/s]
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
Training: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
In [20]:
val_dataloader = model.val_dataloader()
trainer.validate(model, val_dataloader)
Validation: |          | 0/? [00:00<?, ?it/s]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃       Validate metric        ┃         DataLoader 0         ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ validation_cardinality_error │       8.33874225616455       │
│       validation_loss        │      1.205262303352356       │
│     validation_loss_bbox     │     0.03503163903951645      │
│      validation_loss_ce      │     0.20140598714351654      │
│     validation_loss_giou     │      0.4143490493297577      │
└──────────────────────────────┴──────────────────────────────┘
Out[20]:
[{'validation_loss': 1.205262303352356,
  'validation_loss_ce': 0.20140598714351654,
  'validation_loss_bbox': 0.03503163903951645,
  'validation_loss_giou': 0.4143490493297577,
  'validation_cardinality_error': 8.33874225616455}]

GET val score¶

In [21]:
def count_labels(counts):
    labels = {1:0,2:0,3:0}
    counts_cpu = counts.cpu().numpy()
    for i in counts_cpu:
         labels[i] +=1
    return labels

submission_val_df = pd.DataFrame(columns=["image_id", "Target"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

for image_name in tqdm([images_val[i]['file_name'] for i in range(len(images_val))] , desc = "Inference"):
    image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, f'{image_name}'))

    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
    result_dict = count_labels(results['labels']) 
#     print(result_dict)
    df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
                                     f"{image_name.split('.')[0]}_2",
                                     f"{image_name.split('.')[0]}_3"],
                      "Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
    submission_val_df = pd.concat([submission_val_df, df] ) 
Inference: 100%|██████████| 493/493 [00:41<00:00, 12.01it/s]
In [22]:
def count_labels_temp(counts):
    labels = {1: 0, 2: 0, 3: 0}
    for i in counts:
        labels[i] += 1
    return labels

sub_temp_df = pd.DataFrame(columns=["image_id", "Target"])

correction_df = train[train['image_id'].isin([images_val[i]['file_name'] for i in range(len(images_val))])]
for image_name in tqdm(correction_df['image_id'].unique() , desc = "Inference"):
    temp_img_df = correction_df[correction_df['image_id']==image_name]
    result_dict = count_labels_temp(temp_img_df.category_id)
    
    df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
                                     f"{image_name.split('.')[0]}_2",
                                     f"{image_name.split('.')[0]}_3"],
                      "Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
    sub_temp_df = pd.concat([sub_temp_df, df] ) 
Inference: 100%|██████████| 493/493 [00:01<00:00, 353.01it/s]
In [23]:
mean_absolute_error(sub_temp_df['Target'] ,submission_val_df['Target'] )
Out[23]:
5.658553076402975
In [26]:
sub_temp_df['Target']
Out[26]:
0     0
1    19
2     0
0     0
1    26
     ..
1     1
2     0
0     0
1     0
2     1
Name: Target, Length: 1479, dtype: object
In [27]:
submission_val_df['Target']
Out[27]:
0     0
1    73
2     0
0     0
1     4
     ..
1     6
2     0
0     0
1     4
2     0
Name: Target, Length: 1479, dtype: object
In [24]:
def custom_mae(train , images_val ):
    
    
    def count_labels(counts):
        labels = {1:0,2:0,3:0}
        counts_cpu = counts.cpu().numpy()
        for i in counts_cpu:
             labels[i] +=1
        return labels

    submission_val_df = pd.DataFrame(columns=["image_id", "Target"])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    for image_name in tqdm([images_val[i]['file_name'] for i in range(len(images_val))] , desc = "Inference"):
        image = Image.open(os.path.join(IMAGE_FOLDER_TRAIN, f'{image_name}'))

        inputs = processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)

        target_sizes = torch.tensor([image.size[::-1]])
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
        result_dict = count_labels(results['labels']) 
    #     print(result_dict)
        df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
                                         f"{image_name.split('.')[0]}_2",
                                         f"{image_name.split('.')[0]}_3"],
                          "Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
        submission_val_df = pd.concat([submission_val_df, df] ) 
        
        
    def count_labels_temp(counts):
        labels = {1: 0, 2: 0, 3: 0}
        for i in counts:
            labels[i] += 1
        return labels

    sub_temp_df = pd.DataFrame(columns=["image_id", "Target"])

    correction_df = train[train['image_id'].isin([images_val[i]['file_name'] for i in range(len(images_val))])]
    for image_name in tqdm(correction_df['image_id'].unique() , desc = "Inference"):
        temp_img_df = correction_df[correction_df['image_id']==image_name]
        result_dict = count_labels_temp(temp_img_df.category_id)

        df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
                                         f"{image_name.split('.')[0]}_2",
                                         f"{image_name.split('.')[0]}_3"],
                          "Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
        sub_temp_df = pd.concat([sub_temp_df, df] ) 
        
    return mean_absolute_error(sub_temp_df['Target'] ,submission_val_df['Target'] )
In [25]:
stop
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[25], line 1
----> 1 stop

NameError: name 'stop' is not defined
In [ ]:
#4.972278566599053
In [ ]:
# train[train['image_id'] == "id_ikmbin2sr5e1.tif"].category_id.value_counts()
In [ ]:
train.head()

Predict TEST¶

In [ ]:
test_df = pd.read_csv(TEST_CSV)
test_list = test_df['image_id'].to_list()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
In [ ]:
def plot_results(img_name):
    image = Image.open(os.path.join(IMAGE_FOLDER_TEST, f'{img_name}'))
    width, height = image.size
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    print("Outputs:", outputs.keys())    
    postprocessed_outputs  =  processor.post_process_object_detection(outputs,
                                                        target_sizes=[(height, width)],
                                                        threshold=0.8)
    results = postprocessed_outputs[0]

    plt.figure(figsize=(16,10))
    plt.imshow(image)
    ax = plt.gca()
    print(f"The image should contain only {len(results['boxes'])} boxes")
    for score, label, (xmin, ymin, xmax, ymax)  in zip(results['scores'].tolist(), results['labels'].tolist(), results['boxes'].tolist()):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, linewidth=3))
        text = f'{labels_dict[label]}: {score:0.2f}'
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()
In [ ]:
import random
model = model.to(device)
random_img = random.choice(test_list)
print(random_img)
plot_results(random_img)
In [ ]:
val_df
In [ ]:
def count_labels(counts):
    labels = {1:0,2:0,3:0}
    counts_cpu = counts.cpu().numpy()
    for i in counts_cpu:
         labels[i] +=1
    return labels

submission_df = pd.DataFrame(columns=["image_id", "Target"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

for image_name in tqdm(test_df['image_id'] , desc = "Inference"):
    image = Image.open(os.path.join(IMAGE_FOLDER_TEST, f'{image_name}'))

    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    result_dict = count_labels(results['labels']) 
    df = pd.DataFrame({"image_id" : [f"{image_name.split('.')[0]}_1",
                                     f"{image_name.split('.')[0]}_2",
                                     f"{image_name.split('.')[0]}_3"],
                      "Target" : [result_dict[1], result_dict[2], result_dict[3]]},columns=["image_id", "Target"])
    submission_df = pd.concat([submission_df, df]) 
In [ ]:
submission_df.head()
In [ ]:
submission_df.to_csv("submission.csv", index=False)

"id_uawi0k5spci5_1" in submission_df.image_id.unique()¶