In [1]:
import numpy as np 
import pandas as pd
from glob import glob
import torch
from torch import optim
import torchvision
import timm
from tqdm import tqdm
import seaborn as sns
from PIL import Image
import random
import os
from torchvision.transforms import v2
from torch.utils.data import Dataset , DataLoader
import cv2
import matplotlib.pyplot as plt
import albumentations as A
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
      RandomBrightnessContrast, Rotate, ShiftScaleRotate,  Transpose
    )
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import KFold , StratifiedKFold , StratifiedGroupKFold
import torch.nn as nn
from contextlib import contextmanager
from torch.optim import Adam, SGD , AdamW
from functools import partial
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
import time
from sklearn.metrics import roc_auc_score
import math
# from catalyst.data import BalanceClassSampler
import urllib.request as urlreq
from torchvision.transforms import ToPILImage
from accelerate.tracking import GeneralTracker
from accelerate import Accelerator, notebook_launcher
from accelerate.utils import set_seed
from sklearn.metrics import log_loss
import psutil
import warnings
pd.set_option('display.max_columns'  , None)
warnings.filterwarnings('ignore')
In [2]:
def wandb_log(**kwargs):
    for k, v in kwargs.items():
        wandb.log({k: v})
In [3]:
memory_info = psutil.virtual_memory()
# Print the used and available memory
print(f"Total Memory: {memory_info.total / (1024 ** 3):.2f} GB")
print(f"Available Memory: {memory_info.available / (1024 ** 3):.2f} GB")
print(f"Used Memory: {memory_info.used / (1024 ** 3):.2f} GB")
print(f"Percentage Used: {memory_info.percent}%")
Total Memory: 31.36 GB
Available Memory: 30.00 GB
Used Memory: 0.91 GB
Percentage Used: 4.3%
In [4]:
train = pd.read_csv('/kaggle/input/isic-2024/isic-2024/train-metadata.csv')
val = pd.read_csv('/kaggle/input/isic-2024/isic-2024/test-metadata.csv')
In [5]:
train.head()
Out[5]:
isic_id target patient_id age_approx sex anatom_site_general clin_size_long_diam_mm image_type tbp_tile_type tbp_lv_A tbp_lv_Aext tbp_lv_B tbp_lv_Bext tbp_lv_C tbp_lv_Cext tbp_lv_H tbp_lv_Hext tbp_lv_L tbp_lv_Lext tbp_lv_areaMM2 tbp_lv_area_perim_ratio tbp_lv_color_std_mean tbp_lv_deltaA tbp_lv_deltaB tbp_lv_deltaL tbp_lv_deltaLB tbp_lv_deltaLBnorm tbp_lv_eccentricity tbp_lv_location tbp_lv_location_simple tbp_lv_minorAxisMM tbp_lv_nevi_confidence tbp_lv_norm_border tbp_lv_norm_color tbp_lv_perimeterMM tbp_lv_radial_color_std_max tbp_lv_stdL tbp_lv_stdLExt tbp_lv_symm_2axis tbp_lv_symm_2axis_angle tbp_lv_x tbp_lv_y tbp_lv_z attribution copyright_license lesion_id iddx_full iddx_1 iddx_2 iddx_3 iddx_4 iddx_5 mel_mitotic_index mel_thick_mm tbp_lv_dnn_lesion_confidence
0 ISIC_0015670 0 IP_1235828 60.0 male lower extremity 3.04 TBP tile: close-up 3D: white 20.244422 16.261975 26.922447 23.954773 33.684638 28.953117 53.058545 55.828924 54.367448 62.025701 3.152561 27.476170 0.00000 3.982447 2.967674 -7.658253 8.360566 5.784302 0.901302 Right Leg - Upper Right Leg 1.543016 2.628592e-03 7.091360 0.000000 9.307003 0.00000 2.036195 2.637780 0.590476 85 -182.703552 613.493652 -42.427948 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 97.517282
1 ISIC_0015845 0 IP_8170065 60.0 male head/neck 1.10 TBP tile: close-up 3D: white 31.712570 25.364740 26.331000 24.549290 41.219030 35.299260 39.702910 44.064040 48.861520 55.362360 0.919497 12.235290 0.00000 6.347830 1.781713 -6.500838 6.839008 4.987244 0.639885 Head & Neck Head & Neck 0.821918 1.334303e-07 2.116402 0.000000 3.354148 0.00000 0.853227 3.912844 0.285714 55 -0.078308 1575.687000 57.174500 Memorial Sloan Kettering Cancer Center CC-BY IL_6727506 Benign Benign NaN NaN NaN NaN NaN NaN 3.141455
2 ISIC_0015864 0 IP_6724798 60.0 male posterior torso 3.40 TBP tile: close-up 3D: XP 22.575830 17.128170 37.970460 33.485410 44.174920 37.611800 59.265850 62.909730 53.961180 61.670520 3.265153 24.184620 0.00000 5.447655 4.485044 -7.709336 9.092376 6.290359 0.932147 Torso Back Top Third Torso Back 1.194905 2.959177e-04 4.798335 0.000000 8.886309 0.00000 1.743651 1.950777 0.361905 105 123.649700 1472.010000 232.908900 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.804040
3 ISIC_0015902 0 IP_4111386 65.0 male anterior torso 3.22 TBP tile: close-up 3D: XP 14.242329 12.164757 21.448144 21.121356 25.746200 24.374023 56.414429 60.060388 18.649518 23.314841 6.079940 14.889242 0.51452 2.077572 0.326788 -4.665323 4.783413 6.400196 0.654458 Torso Front Top Half Torso Front 2.481328 2.198945e+01 1.975874 1.771705 9.514499 0.66469 1.258541 1.573733 0.209581 130 -141.024780 1442.185791 58.359802 ACEMID MIA CC-0 NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.989998
4 ISIC_0024200 0 IP_8313778 55.0 male anterior torso 2.73 TBP tile: close-up 3D: white 24.725520 20.057470 26.464900 25.710460 36.217980 32.608740 46.946070 52.041180 46.276310 54.855740 2.101708 19.902560 0.00000 4.668053 0.754434 -8.579431 9.148495 6.531302 0.946448 Torso Front Top Half Torso Front 0.929916 1.378832e-03 3.658854 0.000000 6.467562 0.00000 2.085409 2.480509 0.313433 20 -72.315640 1488.720000 21.428960 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 70.442510
In [6]:
txt_to_csv = False
DIR_PATH = "/kaggle/input/isic-2024/isic-2024"
OUTPUT_DIR = "/kaggle/working/"
TRAIN_DIR = "/kaggle/input/isic-2024/isic-2024/train-image/image/"


# os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# !export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.6,max_split_size_mb:128

        
class CFG : 
    competition = "ISIC_2024"
    seed = 42
    n_fold = 5
    target_col = 'target'
    train=True
    inference=False
    pseudo_labeling = False
    num_classes = 2 #binary class
    trn_fold=[2,]
    debug=False
    apex=False
    print_freq=18 #every how many batch the scores get showed
    num_workers=4
#     model_name="eva02_large_patch14_448.mim_m38m_ft_in22k_in1k"
    model_name=  "tf_efficientnetv2_m.in21k"
    size=384 
    scheduler='CosineAnnealingWarmRestarts' 
    epochs=10
    lr=1e-4
    min_lr=1e-6
    T_0=10 # CosineAnnealingWarmRestarts
    batch_size=20
    weight_decay=1e-6
    max_grad_norm=1000
    gradient_accumulation_steps = 1
    
    add_only_negatives = True
    weighted_loss = True
    WandB = True
In [7]:
if CFG.pseudo_labeling : 
    ps = pd.read_csv('/kaggle/input/best-result/b4_nTTA_2epochs_aug (2).csv')
    ps.rename(columns = {"label" : "target"} , inplace = True)
    #confident pseudo labeling
    to_add = ps[(ps['target']>0.9) | (ps['target']<0.1)]
#     print(to_add.shape)
    to_add["target"] = [1 if i>0.9 else 0 for i in to_add['target']]
    print(to_add["target"].value_counts())
    shape_before = train.shape
    if CFG.add_only_negatives == True : 
        to_add= to_add[to_add['target'] == 0 ]
    train = pd.concat([train , to_add] , axis=0)
    shape_after = train.shape
    print(f"The shape of the train set have moved from {shape_before} => {shape_after}")
    train.reset_index(drop = True , inplace =True , )
In [8]:
def get_score(y_true, y_pred):
    num_classes = 2 
    total_log_loss = 0.0
    y_true = np.array([[0, 1] if i == 1 else [1, 0] for i in y_true])
#     print(y_true)
#     print(y_pred)
    for class_idx in range(num_classes):
        
        class_true = y_true[:,class_idx]  
        class_pred = y_pred[:, class_idx] 

        print(class_true , class_pred)
        class_log_loss = log_loss(class_true, class_pred)
        total_log_loss += class_log_loss
    return total_log_loss
#     mean_log_loss = total_log_loss / num_classes
#     return mean_log_loss

# def get_score(y_true, y_pred):
#     # Ensure y_true and y_pred are 1D arrays
#     y_true = y_true.flatten()
#     y_pred = y_pred.flatten()

#     # Calculate the log loss directly
#     total_log_loss = log_loss(y_true, y_pred)



@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()



def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    set_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)
In [9]:
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=3000, random_state=CFG.seed).reset_index(drop=True)
#     test = test.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)
In [10]:
print(f"The train file contains {train.shape[0]} elements")
print(f"The test file contains {val.shape[0]} elements")
The train file contains 401059 elements
The test file contains 3 elements
In [11]:
sns.countplot(data = train , x = train["target"])
Out[11]:
<Axes: xlabel='target', ylabel='count'>
In [12]:
train["target"].value_counts()
Out[12]:
target
0    400666
1       393
Name: count, dtype: int64
In [13]:
# image_files1 = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_Train_Image_IDs.csv')['Image_File'].unique()

# # Split each unique value and get the third part
# third_parts = [file.split('/')[3] for file in image_files1]

# print(set(third_parts))

# image_files2 = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_balanced_valb_Image_IDs.csv')['Image_File'].unique()

# # Split each unique value and get the third part
# third_parts = [file.split('/')[3] for file in image_files2]

# print(set(third_parts))

# train_ext = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_Train_Image_IDs.csv')
# val_ext = pd.read_csv('/kaggle/input/isic-2024-balanced-image-dataset/ISIC_2024_balanced_valb_Image_IDs.csv')
# train_ext = pd.concat([train_ext , val_ext] , axis = 0)
# stock_pos = train_ext[train_ext['Label'] == 1]

# train['Image_File'] = TRAIN_DIR + train["isic_id"] + '.jpg'
# val['Image_File'] = TRAIN_DIR + val["isic_id"] + '.jpg'

# stock_pos.rename(columns = {"Label"  : CFG.target_col} , inplace =True)

# merged = pd.concat([train[train[CFG.target_col] == 0 ][stock_pos.columns] , stock_pos] , axis=0).reset_index(drop = True)
# train = merged.copy(deep = True)

# train = merged.copy(deep = True)
In [14]:
train['Image_File'] = TRAIN_DIR + train["isic_id"] + '.jpg'
val['Image_File'] = TRAIN_DIR + val["isic_id"] + '.jpg'
In [15]:
if CFG.weighted_loss : 
    pos = train[CFG.target_col].value_counts().values[0]
    neg = train[CFG.target_col].value_counts().values[1]
    class_counts = [neg, pos]

    total_count = sum(class_counts)
    class_weights = [count / total_count for count in class_counts]
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float)
    print(class_weights_tensor)
tensor([9.7991e-04, 9.9902e-01])
In [16]:
train.head()
Out[16]:
isic_id target patient_id age_approx sex anatom_site_general clin_size_long_diam_mm image_type tbp_tile_type tbp_lv_A tbp_lv_Aext tbp_lv_B tbp_lv_Bext tbp_lv_C tbp_lv_Cext tbp_lv_H tbp_lv_Hext tbp_lv_L tbp_lv_Lext tbp_lv_areaMM2 tbp_lv_area_perim_ratio tbp_lv_color_std_mean tbp_lv_deltaA tbp_lv_deltaB tbp_lv_deltaL tbp_lv_deltaLB tbp_lv_deltaLBnorm tbp_lv_eccentricity tbp_lv_location tbp_lv_location_simple tbp_lv_minorAxisMM tbp_lv_nevi_confidence tbp_lv_norm_border tbp_lv_norm_color tbp_lv_perimeterMM tbp_lv_radial_color_std_max tbp_lv_stdL tbp_lv_stdLExt tbp_lv_symm_2axis tbp_lv_symm_2axis_angle tbp_lv_x tbp_lv_y tbp_lv_z attribution copyright_license lesion_id iddx_full iddx_1 iddx_2 iddx_3 iddx_4 iddx_5 mel_mitotic_index mel_thick_mm tbp_lv_dnn_lesion_confidence Image_File
0 ISIC_0015670 0 IP_1235828 60.0 male lower extremity 3.04 TBP tile: close-up 3D: white 20.244422 16.261975 26.922447 23.954773 33.684638 28.953117 53.058545 55.828924 54.367448 62.025701 3.152561 27.476170 0.00000 3.982447 2.967674 -7.658253 8.360566 5.784302 0.901302 Right Leg - Upper Right Leg 1.543016 2.628592e-03 7.091360 0.000000 9.307003 0.00000 2.036195 2.637780 0.590476 85 -182.703552 613.493652 -42.427948 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 97.517282 /kaggle/input/isic-2024/isic-2024/train-image/...
1 ISIC_0015845 0 IP_8170065 60.0 male head/neck 1.10 TBP tile: close-up 3D: white 31.712570 25.364740 26.331000 24.549290 41.219030 35.299260 39.702910 44.064040 48.861520 55.362360 0.919497 12.235290 0.00000 6.347830 1.781713 -6.500838 6.839008 4.987244 0.639885 Head & Neck Head & Neck 0.821918 1.334303e-07 2.116402 0.000000 3.354148 0.00000 0.853227 3.912844 0.285714 55 -0.078308 1575.687000 57.174500 Memorial Sloan Kettering Cancer Center CC-BY IL_6727506 Benign Benign NaN NaN NaN NaN NaN NaN 3.141455 /kaggle/input/isic-2024/isic-2024/train-image/...
2 ISIC_0015864 0 IP_6724798 60.0 male posterior torso 3.40 TBP tile: close-up 3D: XP 22.575830 17.128170 37.970460 33.485410 44.174920 37.611800 59.265850 62.909730 53.961180 61.670520 3.265153 24.184620 0.00000 5.447655 4.485044 -7.709336 9.092376 6.290359 0.932147 Torso Back Top Third Torso Back 1.194905 2.959177e-04 4.798335 0.000000 8.886309 0.00000 1.743651 1.950777 0.361905 105 123.649700 1472.010000 232.908900 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.804040 /kaggle/input/isic-2024/isic-2024/train-image/...
3 ISIC_0015902 0 IP_4111386 65.0 male anterior torso 3.22 TBP tile: close-up 3D: XP 14.242329 12.164757 21.448144 21.121356 25.746200 24.374023 56.414429 60.060388 18.649518 23.314841 6.079940 14.889242 0.51452 2.077572 0.326788 -4.665323 4.783413 6.400196 0.654458 Torso Front Top Half Torso Front 2.481328 2.198945e+01 1.975874 1.771705 9.514499 0.66469 1.258541 1.573733 0.209581 130 -141.024780 1442.185791 58.359802 ACEMID MIA CC-0 NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.989998 /kaggle/input/isic-2024/isic-2024/train-image/...
4 ISIC_0024200 0 IP_8313778 55.0 male anterior torso 2.73 TBP tile: close-up 3D: white 24.725520 20.057470 26.464900 25.710460 36.217980 32.608740 46.946070 52.041180 46.276310 54.855740 2.101708 19.902560 0.00000 4.668053 0.754434 -8.579431 9.148495 6.531302 0.946448 Torso Front Top Half Torso Front 0.929916 1.378832e-03 3.658854 0.000000 6.467562 0.00000 2.085409 2.480509 0.313433 20 -72.315640 1488.720000 21.428960 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 70.442510 /kaggle/input/isic-2024/isic-2024/train-image/...
In [17]:
def feature_engineering(df):
    # New features to try...
    df["lesion_size_ratio"] = df["tbp_lv_minorAxisMM"] / df["clin_size_long_diam_mm"]
    df["lesion_shape_index"] = df["tbp_lv_areaMM2"] / (df["tbp_lv_perimeterMM"] ** 2)
    df["hue_contrast"] = (df["tbp_lv_H"] - df["tbp_lv_Hext"]).abs()
    df["luminance_contrast"] = (df["tbp_lv_L"] - df["tbp_lv_Lext"]).abs()
    df["lesion_color_difference"] = np.sqrt(df["tbp_lv_deltaA"] ** 2 + df["tbp_lv_deltaB"] ** 2 + df["tbp_lv_deltaL"] ** 2)
    df["border_complexity"] = df["tbp_lv_norm_border"] + df["tbp_lv_symm_2axis"]
    df["color_uniformity"] = df["tbp_lv_color_std_mean"] / df["tbp_lv_radial_color_std_max"]
    df["3d_position_distance"] = np.sqrt(df["tbp_lv_x"] ** 2 + df["tbp_lv_y"] ** 2 + df["tbp_lv_z"] ** 2) 
    df["perimeter_to_area_ratio"] = df["tbp_lv_perimeterMM"] / df["tbp_lv_areaMM2"]
    df["lesion_visibility_score"] = df["tbp_lv_deltaLBnorm"] + df["tbp_lv_norm_color"]
#     df["combined_anatomical_site"] = df["anatom_site_general"] + "_" + df["tbp_lv_location"]
    
    df["symmetry_border_consistency"] = df["tbp_lv_symm_2axis"] * df["tbp_lv_norm_border"]
    df["color_consistency"] = df["tbp_lv_stdL"] / df["tbp_lv_Lext"]
    
    df["size_age_interaction"] = df["clin_size_long_diam_mm"] * df["age_approx"]
    df["hue_color_std_interaction"] = df["tbp_lv_H"] * df["tbp_lv_color_std_mean"]
    df["lesion_severity_index"] = (df["tbp_lv_norm_border"] + df["tbp_lv_norm_color"] + df["tbp_lv_eccentricity"]) / 3
    df["shape_complexity_index"] = df["border_complexity"] + df["lesion_shape_index"]
    df["color_contrast_index"] = df["tbp_lv_deltaA"] + df["tbp_lv_deltaB"] + df["tbp_lv_deltaL"] + df["tbp_lv_deltaLBnorm"]
    df["log_lesion_area"] = np.log(df["tbp_lv_areaMM2"] + 1)
    df["normalized_lesion_size"] = df["clin_size_long_diam_mm"] / df["age_approx"]
    df["mean_hue_difference"] = (df["tbp_lv_H"] + df["tbp_lv_Hext"]) / 2
    df["std_dev_contrast"] = np.sqrt((df["tbp_lv_deltaA"] ** 2 + df["tbp_lv_deltaB"] ** 2 + df["tbp_lv_deltaL"] ** 2) / 3)
    df["color_shape_composite_index"] = (df["tbp_lv_color_std_mean"] + df["tbp_lv_area_perim_ratio"] + df["tbp_lv_symm_2axis"]) / 3
    df["3d_lesion_orientation"] = np.arctan2(train["tbp_lv_y"], train["tbp_lv_x"])
    df["overall_color_difference"] = (df["tbp_lv_deltaA"] + df["tbp_lv_deltaB"] + df["tbp_lv_deltaL"]) / 3
    df["symmetry_perimeter_interaction"] = df["tbp_lv_symm_2axis"] * df["tbp_lv_perimeterMM"]
    df["comprehensive_lesion_index"] = (df["tbp_lv_area_perim_ratio"] + df["tbp_lv_eccentricity"] + df["tbp_lv_norm_color"] + df["tbp_lv_symm_2axis"]) / 4
    
    # 新しい特徴量
    
    # 1. 複合的な形状指標
    df['shape_complexity_ratio'] = df['tbp_lv_norm_border'] / df['lesion_shape_index']
    
    # 2. 色彩の変動性
    df['color_variability'] = df['tbp_lv_color_std_mean'] / df['tbp_lv_stdL']
    
    # 3. 境界の非対称性
    df['border_asymmetry'] = df['tbp_lv_norm_border'] * (1 - df['tbp_lv_symm_2axis'])
    
    # 4. 3D位置と大きさの関係
    df['3d_size_ratio'] = df['3d_position_distance'] / df['clin_size_long_diam_mm']
    
    # 5. 年齢と病変の特徴の相互作用
    df['age_lesion_interaction'] = df['age_approx'] * df['lesion_severity_index']
    
    # 6. 色彩コントラストの複合指標
    df['color_contrast_complexity'] = df['color_contrast_index'] * df['tbp_lv_radial_color_std_max']
    
    # 7. 形状と色彩の複合指標
    df['shape_color_composite'] = df['shape_complexity_index'] * df['color_uniformity']
    
    # 8. 病変の相対的な大きさ
    df['relative_lesion_size'] = df['clin_size_long_diam_mm'] / df['tbp_lv_minorAxisMM']
    
    # 9. 境界の複雑さと色彩の変動性の相互作用
    df['border_color_interaction'] = df['border_complexity'] * df['color_variability']
    
    # 10. 3D位置の極座標表現
    df['3d_radial_distance'] = np.sqrt(df['tbp_lv_x']**2 + df['tbp_lv_y']**2 + df['tbp_lv_z']**2)
    df['3d_polar_angle'] = np.arccos(df['tbp_lv_z'] / df['3d_radial_distance'])
    df['3d_azimuthal_angle'] = np.arctan2(df['tbp_lv_y'], df['tbp_lv_x'])
    
    # 11. 病変の形状の複雑さと大きさの比
    df['shape_size_ratio'] = df['shape_complexity_index'] / df['clin_size_long_diam_mm']
    
    # 12. 色彩の非一様性と境界の複雑さの複合指標
    df['color_border_complexity'] = df['color_uniformity'] * df['border_complexity']
    
    # 13. 病変の可視性と大きさの相互作用
    df['visibility_size_interaction'] = df['lesion_visibility_score'] * np.log(df['clin_size_long_diam_mm'])
    
    # 14. 年齢調整済みの病変の特徴
    df['age_adjusted_lesion_index'] = df['comprehensive_lesion_index'] / np.log(df['age_approx'])
    
    # 15. 色彩コントラストの非線形変換
    df['nonlinear_color_contrast'] = np.tanh(df['color_contrast_index'])
    
    # 16. 病変の形状と位置の複合指標
    df['shape_location_index'] = df['lesion_shape_index'] * df['3d_position_distance']
    
    # 17. 境界の複雑さと非対称性の比率
    df['border_complexity_asymmetry_ratio'] = df['border_complexity'] / (df['tbp_lv_symm_2axis'] + 1e-5)
    
    # 18. 色彩の変動性と病変の大きさの相互作用
    df['color_variability_size_interaction'] = df['color_variability'] * np.log(df['tbp_lv_areaMM2'])
    
    # 19. 3D位置と病変の特徴の複合指標
    df['3d_lesion_composite'] = df['3d_position_distance'] * df['comprehensive_lesion_index']
    
    # 20. 病変の形状と色彩の非線形複合指標
    df['nonlinear_shape_color_composite'] = np.tanh(df['shape_color_composite'])
    
    new_num_cols = [
        "lesion_size_ratio", "lesion_shape_index", "hue_contrast",
        "luminance_contrast", "lesion_color_difference", "border_complexity",
        "color_uniformity", "3d_position_distance", "perimeter_to_area_ratio",
        "lesion_visibility_score", "symmetry_border_consistency", "color_consistency",

        "size_age_interaction", "hue_color_std_interaction", "lesion_severity_index", 
        "shape_complexity_index", "color_contrast_index", "log_lesion_area",
        "normalized_lesion_size", "mean_hue_difference", "std_dev_contrast",
        "color_shape_composite_index", "3d_lesion_orientation", "overall_color_difference",
        "symmetry_perimeter_interaction", "comprehensive_lesion_index","shape_complexity_ratio",
        "color_variability", "border_asymmetry", "3d_size_ratio", "age_lesion_interaction",
        "color_contrast_complexity", "shape_color_composite", "relative_lesion_size",
        "border_color_interaction", "3d_radial_distance", "3d_polar_angle", "3d_azimuthal_angle",
        "shape_size_ratio", "color_border_complexity", "visibility_size_interaction", "age_adjusted_lesion_index",
        "nonlinear_color_contrast", "shape_location_index", "border_complexity_asymmetry_ratio", "color_variability_size_interaction",
        "3d_lesion_composite", "nonlinear_shape_color_composite",
    ]
#     new_cat_cols = ["combined_anatomical_site"]
    return df, new_num_cols
In [18]:
numerical_columns = ['age_approx','clin_size_long_diam_mm','tbp_lv_A','tbp_lv_Aext','tbp_lv_B','tbp_lv_Bext','tbp_lv_C','tbp_lv_Cext','tbp_lv_H','tbp_lv_Hext',
                     'tbp_lv_L','tbp_lv_Lext','tbp_lv_areaMM2','tbp_lv_area_perim_ratio','tbp_lv_color_std_mean','tbp_lv_deltaA','tbp_lv_deltaB','tbp_lv_deltaL','tbp_lv_deltaLBnorm','tbp_lv_eccentricity','tbp_lv_minorAxisMM','tbp_lv_nevi_confidence','tbp_lv_norm_border','tbp_lv_norm_color',
                     'tbp_lv_perimeterMM','tbp_lv_radial_color_std_max','tbp_lv_stdL','tbp_lv_stdLExt','tbp_lv_symm_2axis','tbp_lv_symm_2axis_angle','tbp_lv_x','tbp_lv_y','tbp_lv_z','lesion_size_ratio','lesion_shape_index','hue_contrast','lesion_color_difference','color_uniformity',
                     'perimeter_to_area_ratio','lesion_visibility_score','symmetry_border_consistency','color_consistency','size_age_interaction','lesion_severity_index','color_contrast_index','log_lesion_area','normalized_lesion_size','mean_hue_difference','3d_lesion_orientation','overall_color_difference',
                     'symmetry_perimeter_interaction','comprehensive_lesion_index','shape_complexity_ratio','color_variability','border_asymmetry','3d_size_ratio','age_lesion_interaction','color_contrast_complexity','shape_color_composite','relative_lesion_size',
                     'border_color_interaction','3d_polar_angle','shape_size_ratio','visibility_size_interaction','age_adjusted_lesion_index','nonlinear_color_contrast','shape_location_index','border_complexity_asymmetry_ratio',
                     'color_variability_size_interaction','3d_lesion_composite','nonlinear_shape_color_composite'
                    ]
In [19]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
In [20]:
test = val.copy(deep = True)
In [21]:
train, _= feature_engineering(train.copy())
test, _= feature_engineering(test.copy())

cat_features = ["anatom_site_general","tbp_lv_location_simple","tbp_lv_location"]
binary_column = "sex"

# train_num_df,to_drop = remove_collinear_features(train[numerical_columns].copy(),threshold=0.98)
# train = train.drop(columns=to_drop)

# numerical_columns = [col for col in numerical_columns if col not in to_drop]


train.replace([np.inf, -np.inf], np.nan, inplace=True)
test.replace([np.inf, -np.inf], np.nan, inplace=True)

train[numerical_columns] = train[numerical_columns].fillna(train[numerical_columns].median())
test[numerical_columns] = test[numerical_columns].fillna(test[numerical_columns].median())

train['n_images'] = train.patient_id.map(train.groupby(['patient_id']).isic_id.count())
test['n_images'] = test.patient_id.map(test.groupby(['patient_id']).isic_id.count())
train.loc[train['patient_id'] == -1, 'n_images'] = 1
train['n_images'] = np.log1p(train['n_images'].values)
test['n_images'] = np.log1p(test['n_images'].values)
numerical_columns += ["n_images"]


scaler = MinMaxScaler()
train[numerical_columns] = scaler.fit_transform(train[numerical_columns])
test[numerical_columns] = scaler.transform(test[numerical_columns])


simple_imputer = SimpleImputer(strategy='most_frequent')
train[cat_features+[binary_column]] = simple_imputer.fit_transform(train[cat_features+[binary_column]])
test[cat_features+[binary_column]] = simple_imputer.transform(test[cat_features+[binary_column]])

train[binary_column] = train[binary_column].map({'male': 0, 'female': 1})
test[binary_column] = test[binary_column].map({'male': 0, 'female': 1})

onehot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
train_encoded_df = pd.DataFrame(onehot_encoder.fit_transform(train[cat_features]))
test_encoded_df = pd.DataFrame(onehot_encoder.transform(test[cat_features]))

train_encoded_df.columns = onehot_encoder.get_feature_names_out(cat_features)
test_encoded_df.columns = onehot_encoder.get_feature_names_out(cat_features)

train = train.drop(columns=cat_features).reset_index(drop=True)
train = pd.concat([train, train_encoded_df], axis=1)

test = test.drop(columns=cat_features).reset_index(drop=True)
test = pd.concat([test, test_encoded_df], axis=1)

cat_features = list(onehot_encoder.get_feature_names_out(cat_features))
cat_features += ["sex"]

meta_features = numerical_columns + cat_features
In [22]:
train['Image_File']
Out[22]:
0         /kaggle/input/isic-2024/isic-2024/train-image/...
1         /kaggle/input/isic-2024/isic-2024/train-image/...
2         /kaggle/input/isic-2024/isic-2024/train-image/...
3         /kaggle/input/isic-2024/isic-2024/train-image/...
4         /kaggle/input/isic-2024/isic-2024/train-image/...
                                ...                        
401054    /kaggle/input/isic-2024/isic-2024/train-image/...
401055    /kaggle/input/isic-2024/isic-2024/train-image/...
401056    /kaggle/input/isic-2024/isic-2024/train-image/...
401057    /kaggle/input/isic-2024/isic-2024/train-image/...
401058    /kaggle/input/isic-2024/isic-2024/train-image/...
Name: Image_File, Length: 401059, dtype: object
In [23]:
class TrainDataset(Dataset) : 
    def __init__(self , df  , hdf5 , meta_features ,  transform = None) : 
        
        
        if df is not None :
            self.patient_0 = df.query(f"target == 0").reset_index(drop=True)
            self.patient_1 = df.query(f"target == 1").reset_index(drop=True)
            
        self.df = df 
        self.transform = transform
        self.file_names = df["Image_File"].values
        self.labels = df["target"].values
        self.to_img = ToPILImage()
        self.use_meta = meta_features is not None
        self.meta_features = meta_features

    
    def __len__(self) : 
        return len(self.df)
    
    def __getitem__(self, idx):
        
        file_path = self.file_names[idx]
        row = self.df.iloc[idx]

        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        random_num = torch.randint(1, 10, (1,))
#         image = augment_face(self.detector, self.predictor, file_path, random_num)
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        label = torch.tensor(self.labels[idx]).long()
        
        if self.use_meta:
            meta_data = row[self.meta_features].to_numpy().astype(np.float32)
            data = ( torch.tensor(image).float(), torch.tensor(meta_data).float())
            
        return image, label , file_path , data
    
    def get_labels(self):
        return list(self.labels)
    
class TestDataset(Dataset) : 
    def __init__(self , df , hdf5 , meta_features , transform = None) : 
        
        self.df = df 
        self.hdf5 = hdf5
        self.use_meta = meta_features is not None
        self.patient_ids = list(self.hdf5.keys())
        self.transform = transform
        self.file_names = df["Image_File"].values
        self.meta_features = meta_features
        
    def __len__(self) : 
        return len(self.df)
    
    def __getitem__(self , idx) : 
        
        row = self.df.iloc[index]
        file_path = self.file_names[idx]
        print(file_path)
        image_data = self.hdf5[self.patient_ids[index]][()]
        image = cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
#         image = cv2.imread(file_path)
        image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
        if self.transform : 
            augmented = self.transform(image=image)
            image = augmented['image']
            
        if self.use_meta:
            
            meta_data = row[self.meta_features].to_numpy().astype(np.float32)
            data = ( torch.tensor(image).float(), torch.tensor(meta_data).float())
            
            return image , data 
            
        return image
In [24]:
train.head()
Out[24]:
isic_id target patient_id age_approx sex clin_size_long_diam_mm image_type tbp_tile_type tbp_lv_A tbp_lv_Aext tbp_lv_B tbp_lv_Bext tbp_lv_C tbp_lv_Cext tbp_lv_H tbp_lv_Hext tbp_lv_L tbp_lv_Lext tbp_lv_areaMM2 tbp_lv_area_perim_ratio tbp_lv_color_std_mean tbp_lv_deltaA tbp_lv_deltaB tbp_lv_deltaL tbp_lv_deltaLB tbp_lv_deltaLBnorm tbp_lv_eccentricity tbp_lv_minorAxisMM tbp_lv_nevi_confidence tbp_lv_norm_border tbp_lv_norm_color tbp_lv_perimeterMM tbp_lv_radial_color_std_max tbp_lv_stdL tbp_lv_stdLExt tbp_lv_symm_2axis tbp_lv_symm_2axis_angle tbp_lv_x tbp_lv_y tbp_lv_z attribution copyright_license lesion_id iddx_full iddx_1 iddx_2 iddx_3 iddx_4 iddx_5 mel_mitotic_index mel_thick_mm tbp_lv_dnn_lesion_confidence Image_File lesion_size_ratio lesion_shape_index hue_contrast luminance_contrast lesion_color_difference border_complexity color_uniformity 3d_position_distance perimeter_to_area_ratio lesion_visibility_score symmetry_border_consistency color_consistency size_age_interaction hue_color_std_interaction lesion_severity_index shape_complexity_index color_contrast_index log_lesion_area normalized_lesion_size mean_hue_difference std_dev_contrast color_shape_composite_index 3d_lesion_orientation overall_color_difference symmetry_perimeter_interaction comprehensive_lesion_index shape_complexity_ratio color_variability border_asymmetry 3d_size_ratio age_lesion_interaction color_contrast_complexity shape_color_composite relative_lesion_size border_color_interaction 3d_radial_distance 3d_polar_angle 3d_azimuthal_angle shape_size_ratio color_border_complexity visibility_size_interaction age_adjusted_lesion_index nonlinear_color_contrast shape_location_index border_complexity_asymmetry_ratio color_variability_size_interaction 3d_lesion_composite nonlinear_shape_color_composite n_images anatom_site_general_anterior torso anatom_site_general_head/neck anatom_site_general_lower extremity anatom_site_general_posterior torso anatom_site_general_upper extremity tbp_lv_location_simple_Head & Neck tbp_lv_location_simple_Left Arm tbp_lv_location_simple_Left Leg tbp_lv_location_simple_Right Arm tbp_lv_location_simple_Right Leg tbp_lv_location_simple_Torso Back tbp_lv_location_simple_Torso Front tbp_lv_location_simple_Unknown tbp_lv_location_Head & Neck tbp_lv_location_Left Arm tbp_lv_location_Left Arm - Lower tbp_lv_location_Left Arm - Upper tbp_lv_location_Left Leg tbp_lv_location_Left Leg - Lower tbp_lv_location_Left Leg - Upper tbp_lv_location_Right Arm tbp_lv_location_Right Arm - Lower tbp_lv_location_Right Arm - Upper tbp_lv_location_Right Leg tbp_lv_location_Right Leg - Lower tbp_lv_location_Right Leg - Upper tbp_lv_location_Torso Back tbp_lv_location_Torso Back Bottom Third tbp_lv_location_Torso Back Middle Third tbp_lv_location_Torso Back Top Third tbp_lv_location_Torso Front tbp_lv_location_Torso Front Bottom Half tbp_lv_location_Torso Front Top Half tbp_lv_location_Unknown
0 ISIC_0015670 0 IP_1235828 0.6875 0 0.074453 TBP tile: close-up 3D: white 0.448560 0.549700 0.502444 0.376069 0.549810 0.402899 0.508448 0.267121 0.601787 0.547974 0.008153 0.218652 0.000000 0.387365 0.685141 0.836741 8.360566 0.101256 0.922243 0.070059 2.628592e-05 0.690918 0.00000 0.067335 0.000000 0.102225 0.080388 0.582086 0.485714 0.356776 0.566559 0.408087 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 97.517282 /kaggle/input/isic-2024/isic-2024/train-image/... 0.377312 0.306032 0.057723 7.658253 0.153458 7.681836 0.001929 641.525666 0.464388 0.074536 0.426332 0.093746 0.079887 0.000000 0.334336 7.718231 0.527696 0.195193 0.024389 0.404682 5.269909 9.355549 0.796815 0.723533 0.105880 0.215550 0.216541 0.000000 0.540615 0.124366 0.260433 0.499957 0.001206 0.245694 0.000000 641.525666 0.519977 1.860241 0.403224 NaN 0.062648 0.225713 0.999961 0.143238 0.164883 0.00000 0.114770 0.996913 0.657389 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 ISIC_0015845 0 IP_8170065 0.6875 0 0.003650 TBP tile: close-up 3D: white 0.674860 0.747149 0.491697 0.391260 0.685050 0.552365 0.384152 0.152394 0.535710 0.465595 0.001462 0.019278 0.000000 0.438834 0.646551 0.867883 6.839008 0.072258 0.646281 0.030250 1.334303e-09 0.162262 0.00000 0.007756 0.000000 0.033828 0.131598 0.252621 0.314286 0.504132 0.893847 0.571023 Memorial Sloan Kettering Cancer Center CC-BY IL_6727506 Benign Benign NaN NaN NaN NaN NaN NaN 3.141455 /kaggle/input/isic-2024/isic-2024/train-image/... 0.671535 0.862602 0.090871 6.500840 0.156607 2.402116 0.001929 1576.723962 0.585990 0.053190 0.058229 0.035666 0.019856 0.000000 0.061852 2.483847 0.555390 0.053754 0.003870 0.265244 5.345725 4.173668 0.750701 0.756171 0.009881 0.018873 0.021085 0.000000 0.259181 0.875064 0.072891 0.499957 0.001206 0.088039 0.000000 1576.723962 0.486381 1.570846 0.357300 NaN 0.004630 0.025995 0.999998 0.882084 0.004149 0.00000 0.129278 0.996913 0.479479 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 ISIC_0015864 0 IP_6724798 0.6875 0 0.087591 TBP tile: close-up 3D: XP 0.494565 0.568489 0.703178 0.619598 0.738108 0.606831 0.566217 0.336171 0.596911 0.543583 0.008491 0.175593 0.000000 0.419247 0.734515 0.835366 9.092376 0.119668 0.954805 0.050841 2.959177e-06 0.447253 0.00000 0.063125 0.000000 0.085311 0.052795 0.334987 0.600000 0.603966 0.858581 0.858501 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.804040 /kaggle/input/isic-2024/isic-2024/train-image/... 0.185612 0.366843 0.075925 7.709340 0.185190 5.160240 0.001929 1495.442825 0.424068 0.088089 0.174525 0.078557 0.091026 0.000000 0.216620 5.201588 0.589523 0.200096 0.028197 0.478441 6.033971 8.182175 0.737339 0.764476 0.057649 0.172135 0.125381 0.000000 0.572494 0.264886 0.179413 0.499957 0.001206 0.464085 0.000000 1495.442825 0.446995 1.486992 0.238232 NaN 0.074987 0.181627 1.000000 0.412719 0.208498 0.00000 0.245416 0.996913 0.810607 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
3 ISIC_0015902 0 IP_4111386 0.7500 0 0.081022 TBP tile: close-up 3D: XP 0.330121 0.460827 0.402979 0.303669 0.407316 0.295051 0.539680 0.308385 0.173134 0.069389 0.016925 0.053995 0.051695 0.345916 0.599208 0.917270 4.783413 0.123664 0.661665 0.121860 2.198945e-01 0.147329 0.17717 0.069412 0.057844 0.057262 0.037652 0.170317 0.742857 0.390405 0.848437 0.572962 ACEMID MIA CC-0 NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.989998 /kaggle/input/isic-2024/isic-2024/train-image/... 0.700267 0.683752 0.075968 4.665323 0.057300 2.185455 0.001449 1450.239191 0.221867 0.138479 0.038648 0.164283 0.093760 29.026369 0.147492 2.252618 0.510845 0.292986 0.023673 0.446797 2.954563 5.204448 0.766225 0.701845 0.031795 0.073172 0.025163 0.148822 0.269298 0.271365 0.144972 0.510397 0.000654 0.077898 0.040456 1450.239191 0.485075 1.668272 0.102462 1.691708 0.093087 0.076888 0.999746 0.661763 0.074702 0.09984 0.160499 0.940654 0.664374 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
4 ISIC_0024200 0 IP_8313778 0.6250 0 0.063139 TBP tile: close-up 3D: white 0.536985 0.632028 0.494130 0.420931 0.595283 0.488997 0.451561 0.230185 0.504685 0.459331 0.005005 0.119577 0.000000 0.402283 0.613124 0.811955 9.148495 0.128433 0.969901 0.036212 1.378832e-05 0.326168 0.00000 0.038916 0.000000 0.105071 0.074071 0.282586 0.114286 0.445845 0.864265 0.512548 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 70.442510 /kaggle/input/isic-2024/isic-2024/train-image/... 0.172334 0.476059 0.106167 8.579430 0.169487 3.972287 0.001929 1490.629394 0.486255 0.094542 0.113931 0.111047 0.063254 0.000000 0.158069 4.022532 0.497088 0.141714 0.023735 0.349728 5.655868 6.738664 0.758427 0.689332 0.032495 0.118431 0.075373 0.000000 0.461375 0.330097 0.125369 0.499957 0.001206 0.486625 0.000000 1490.629394 0.493560 1.619334 0.229006 NaN 0.063896 0.132898 0.998829 0.504179 0.153139 0.00000 0.201530 0.996913 0.639917 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
In [25]:
import h5py
In [26]:
hdf = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
train_dataset = TrainDataset(train , hdf , meta_features)
In [27]:
fig, axes = plt.subplots(2, 4, figsize=(10, 7))

for i in range(2):
    for j in range(4):
        index = i * 3 + j
        if index < len(train_dataset):
            image, label , file_path , data = train_dataset[index]
            axes[i, j].imshow(image)
            if label.numpy() == 1:
                axes[i, j].set_title("Fake", color="r")
            else:
                axes[i, j].set_title("Real", color="g")
            axes[i, j].axis('off')

plt.tight_layout()
plt.show()
In [28]:
hdf = h5py.File(f'/kaggle/input/isic-2024/isic-2024/test-image.hdf5', mode='r')
test_dataset = TestDataset(test , hdf , meta_features)
In [29]:
fig, axes = plt.subplots(1, 3, figsize=(10, 7))  # 1 row, 3 columns

for i in range(3):
    index = i
    if index < len(train_dataset):
        image, data = test_dataset[index]
        axes[i].imshow(image)
        axes[i].axis('off')

plt.tight_layout()
plt.show()
/kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015657.jpg
/kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015729.jpg
/kaggle/input/isic-2024/isic-2024/train-image/image/ISIC_0015740.jpg
In [30]:
import albumentations as A
from typing import Any, Callable, Literal, Tuple, cast
from PIL import Image, ImageDraw, ImageFilter
In [31]:
class DrawHair(A.ImageOnlyTransform) : 
    def __init__(self , count = 10 , size = 300 , p = 0.5) : 
        
        """
        Initializes the DrawHair class with the specified number of pre-generated 
        hair images, image size, and probability of applying the transformation.

        Args:
            count (int, optional): Number of pre-generated hair images. Defaults to 100.
            size (int, optional): The size of the images. Defaults to 224.
            p (float, optional): Probability of applying the transformation. Defaults to 0.5.
        """
        super().__init__(always_apply=False, p=p) #to add the probabilistic feature 
        self.count = count
        self.size = size
        self.colors = [
            (35, 18, 11),
            (61, 35, 20),
            (90, 56, 37),
            (44, 22, 8),
            (21, 15, 8),
        ]
        self.premade_hairs = [self.generate_hairs() for _ in range(count)]

    def generate_hairs(self):
        """
        Generates a single hair pattern image. The number of hairs, their color,
        and the direction of each hair are randomized to create a unique pattern.

        Returns:
            Image: An image object containing the generated hair pattern.
        """
        num_hairs = random.choice([0, 0, 50, 100, 150, 200, 300, 400, 500, 600])
        color = list(random.choice(self.colors))

        width, height = (self.size + 200, self.size + 200)
        hair_img = Image.new('RGBA', (width, height), tuple(color + [0]))
        draw = ImageDraw.Draw(hair_img)

        for _ in range(num_hairs):
            direction = random.choice([[1, 1], [-1, 1], [1, -1]])
            hair_color = tuple(color + [random.randint(70, 180)])
            origin = random.randint(-width, width), random.randint(-height, height)

            end0 = random.randint(100, int(width * 1.5)) * direction[0]
            end1 = random.randint(100, int(height * 1.5)) * direction[1]

            end = (origin[0] + end0, origin[1] + end1)
            left, top = min(origin[0], end[0]), min(origin[1], end[1])
            right, bottom = max(origin[0], end[0]), max(origin[1], end[1])

            angles = (-random.randint(0, 70), random.randint(0, 70))
            draw.arc([left, top, right, bottom], angles[0], angles[1], fill=hair_color, width=random.choice([1, 2, 3]))

        hair_img = hair_img.filter(ImageFilter.GaussianBlur(radius=1))
        return hair_img.resize((self.size, self.size), resample=Image.LANCZOS)

    def apply(self, image: np.ndarray, **config: Any):
        """
        Applies a random pre-generated hair pattern onto the given image if the 
        transformation is to be applied based on probability p.

        Args:
            image (np.ndarray): The input image onto which the hair pattern will be applied.

        Returns:
            np.ndarray: The input image with the overlaid hair pattern.
        """
        if image.dtype != np.float32:
            image = image.astype(np.float32)

        img_pil = Image.fromarray(np.uint8(image)).convert('RGBA')
        random_hairs = random.choice(self.premade_hairs).resize(img_pil.size, resample=Image.LANCZOS)
        
        out = Image.alpha_composite(img_pil, random_hairs).convert('RGB')
        return np.array(out)
In [32]:
class GridMask(A.ImageOnlyTransform): 
    """
    GridMask augmentation as an Albumentations transform.

    Parameters:
    - num_grid (int or tuple of int): Range of grid sizes (minimum, maximum) for the mask.
    - fill_value (float): Pixel value for the masked areas (default is 0). means white / 255 : black
    - rotate (int or tuple of int): Range of rotation angles in degrees for the mask.
    - mode (int): Masking mode; 0 for regular, 1 for inverse, 2 for additional masking.
    - always_apply (bool): If True, apply the transform to every image.
    - p (float): Probability of applying the transform.
    """
    def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
        super().__init__(always_apply = False, p = p)
        if isinstance(num_grid, int):
            num_grid = (num_grid, num_grid)
        if isinstance(rotate, int):
            rotate = (-rotate, rotate)
        self.num_grid = num_grid
        self.fill_value = fill_value
        self.rotate = rotate
        self.mode = mode
        self.masks = None
        self.rand_h_max = []
        self.rand_w_max = []

    def init_masks(self, height, width):
        if self.masks is None:
            self.masks = []
            n_masks = self.num_grid[1] - self.num_grid[0] + 1
            for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
                
                grid_h = height / n_g #height of the square mask
                grid_w = width / n_g # width of the square mask
                
                this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w)), dtype='float32')
                for i in range(n_g + 1):
                    for j in range(n_g + 1):
                        this_mask[
                            int(i * grid_h) : int(i * grid_h + grid_h / 2),
                            int(j * grid_w) : int(j * grid_w + grid_w / 2)
                        ] = self.fill_value
                        if self.mode == 2:
                            this_mask[
                                int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
                                int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
                            ] = self.fill_value

                if self.mode == 1:
                    this_mask = 1 - this_mask

                self.masks.append(this_mask)
                self.rand_h_max.append(grid_h)
                self.rand_w_max.append(grid_w)

    def apply(self, img, **params):
        dim = min(img.shape[:2])
        self.init_masks(dim, dim)
        mask = self.masks[0]

        # Convert mask to torch tensor and repeat for 3 channels if needed
        mask_tensor = torch.tensor(mask, dtype=torch.float32)
        if img.shape[2] == 3:
            mask_tensor = mask_tensor.unsqueeze(0).repeat(3, 1, 1)

        # Randomly rotate the mask
        angle = random.uniform(self.rotate[0], self.rotate[1])
        mask_tensor = self.rotate_mask(mask_tensor, angle)

        # Randomly crop the mask
        crop_x = random.randint(0, mask_tensor.shape[1] - dim)
        crop_y = random.randint(0, mask_tensor.shape[2] - dim)
        cropped_mask = mask_tensor[:, crop_x:crop_x+dim, crop_y:crop_y+dim]

        # Apply the mask to the image
        img = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1)
        img = img * cropped_mask

        return img.permute(1, 2, 0).numpy()

    def rotate_mask(self, mask, angle):
        mask = mask.numpy().transpose(1, 2, 0)
        center = (mask.shape[1] / 2, mask.shape[0] / 2)
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated_mask = cv2.warpAffine(mask, rotation_matrix, (mask.shape[1], mask.shape[0]))
        return torch.tensor(rotated_mask.transpose(2, 0, 1))
In [33]:
from skimage.color import rgb2hsv, rgb2gray, rgb2yuv
from skimage import color, exposure, transform
from skimage.exposure import equalize_hist
from albumentations import RandomCrop
from scipy.fftpack import dct, idct

def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
    h, w = img.shape[:2]

    if max(w, h) == size:
        return img
    if w > h:
        scale = size / w
        h = h * scale
        w = size
    else:
        scale = size / h
        w = w * scale
        h = size
    interpolation = interpolation_up if scale > 1 else interpolation_down

    img = img.astype('uint8')
    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
    return resized
In [34]:
class IsotropicResize(A.DualTransform):
    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
                 always_apply=False, p=1):
        super(IsotropicResize, self).__init__(always_apply, p)
        self.max_side = max_side
        self.interpolation_down = interpolation_down
        self.interpolation_up = interpolation_up

    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
                                          interpolation_up=interpolation_up)

    def apply_to_mask(self, img, **params):
        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)

    def get_transform_init_args_names(self):
        return ("max_side", "interpolation_down", "interpolation_up")


class Resize4xAndBack(A.ImageOnlyTransform):
    def __init__(self, always_apply=False, p=0.5):
        super(Resize4xAndBack, self).__init__(always_apply, p)

    def apply(self, img, **params):
        h, w = img.shape[:2]
        scale = random.choice([2, 4])
        img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
        img = cv2.resize(img, (w, h),
                         interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
        return img
In [35]:
def get_transforms(*, data):
    image_size = CFG.size

    if data == 'train':        
        return A.Compose([
#             A.OneOf([
#                 IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
#                 IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
#                 IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
# #                 CustomRandomCrop(size=image_size)
#             ], p=1),    
            DrawHair(count=3), 
            A.Transpose(p=0.5),
            A.VerticalFlip(p=0.5),
            A.HorizontalFlip(p=0.5),
            # contrast
            A.OneOf([
                A.RandomToneCurve(scale=0.3, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.2), contrast_limit=(-0.4, 0.5), brightness_by_max=True, always_apply=False, p=0.5)
            ], p=0.5),
            A.OneOf([
                A.MotionBlur(blur_limit=5),
                A.MedianBlur(blur_limit=5),
                A.GaussianBlur(blur_limit=5),
                A.GaussNoise(var_limit=(5.0, 30.0)),
            ], p=0.7),
            A.OneOf([
                A.OpticalDistortion(distort_limit=1.0),
                A.GridDistortion(num_steps=5, distort_limit=1.0),
                A.ElasticTransform(alpha=3),
            ], p=0.7),
            A.CLAHE(clip_limit=4.0, p=0.7),
            A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
            A.Resize(image_size, image_size),
            # A.Cutout(max_h_size=int(image_size * 0.375), max_w_size=int(image_size * 0.375), num_holes=1, p=0.7),
            GridMask(num_grid=3, fill_value=0, rotate=0, mode=0 , p = 0.3),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return A.Compose([
#             IsotropicResize(max_side=image_size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
            A.Resize(CFG.size, CFG.size),
            # PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT, value=0),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
In [36]:
hdf_train = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
train_dataset = TrainDataset(train , hdf5 = hdf_train  , meta_features = meta_features , transform= get_transforms(data = "train") )
In [37]:
#image, label , file_path , data

fig, axes = plt.subplots(2, 4, figsize=(10, 7))

for i in range(2):
    for j in range(4):
        index = i * 3 + j
        if index < len(train_dataset):
            image, label , file_path , data = train_dataset[index]
            axes[i, j].imshow(image.permute(1,2,0))
            if label.numpy() == 1:
                axes[i, j].set_title("Fake", color="r")
            else:
                axes[i, j].set_title("Real", color="g")
            axes[i, j].axis('off')

plt.tight_layout()
plt.show()
In [38]:
folds = train.copy()
Fold = StratifiedGroupKFold(n_splits = CFG.n_fold  , shuffle = True , random_state = CFG.seed , )
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds[CFG.target_col] , groups=folds["patient_id"])):
    folds.loc[val_index, 'fold'] = int(n)
folds['fold'] = folds['fold'].astype(int)
In [39]:
for fold in folds['fold'].unique() : 
    print(folds[folds['fold'] == fold]['target'].value_counts())
target
0    83272
1       89
Name: count, dtype: int64
target
0    71081
1       83
Name: count, dtype: int64
target
0    77585
1       60
Name: count, dtype: int64
target
0    81512
1       83
Name: count, dtype: int64
target
0    87216
1       78
Name: count, dtype: int64
In [40]:
class CustomResNext(nn.Module) : 
    def __init__(self , model = 'resnext50_32x4d', out_dim =2 ,  num_classes = 0 ,n_meta_features=0 , n_meta_dim=[512,256,128],  pretrained = False  ):
        super().__init__() # num_classes=0 typically means you are not attaching a classification head to the model, which is often done when you want to use the model as a feature extractor.
        self.extractor = timm.create_model(CFG.model_name ,
                                    pretrained = pretrained , 
                                    drop_rate = 0.1,
                                    drop_path_rate = 0.2,
                                    num_classes = num_classes ,
                                           global_pool='avg'
                                )
        self.dropouts = nn.ModuleList([
            nn.Dropout(0.5) for _ in range(5)
        ])
        in_ch = self.extractor.num_features  
        self.n_meta_features = n_meta_features
        self.n_meta_dim = n_meta_dim
        if n_meta_features > 0:
            self.meta = nn.Sequential(
                nn.Linear(n_meta_features, n_meta_dim[0]),
                nn.BatchNorm1d(int(n_meta_dim[0])),
                nn.SiLU(),
                nn.Dropout(p=0.5),
                nn.Linear(n_meta_dim[0], n_meta_dim[1]),
                nn.BatchNorm1d(n_meta_dim[1]),
                nn.SiLU(),
                nn.Dropout(p=0.5),
                nn.Linear(n_meta_dim[1],n_meta_dim[2]),
                nn.BatchNorm1d(n_meta_dim[2]),
                nn.SiLU(),
                nn.Dropout(p=0.5)
            )
            in_ch += n_meta_dim[2]
        
        self.out = nn.Linear(in_ch, out_dim)
#         self.sigmoid = nn.Sigmoid()
        
        
    def extract(self , x ) : 
        return self.extractor(x)
    
    
    def forward(self , x , x_meta): 
        X = self.extract(x)
        if self.n_meta_features > 0:
            X_meta = self.meta(x_meta)
        if X_meta is not None:
#             X_meta = X_meta.unsqueeze(0) 
#             print(X.shape , X_meta.shape)
            X = torch.cat((X, X_meta), dim=1)
            for i, dropout in enumerate(self.dropouts):
                if i == 0:
                    X = dropout(X)
                else:
                    X += dropout(X)
        X /= len(self.dropouts)
        X_out = self.out(X)
        
        # may add sigmoid if out = 1
        return X_out
In [41]:
# # Calling the model now will cuz memory fragmentation when using accelerator
# model = CustomResNext(num_classes = 0 ,n_meta_features=len(meta_features), pretrained=False) 
# model(train_dataset[0][0].unsqueeze(1).permute(1,0,2,3) , train_dataset[0][3][1])
In [42]:
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())
|===========================================================================|
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|===========================================================================|
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Requested memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| GPU reserved memory   |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Non-releasable memory |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Allocations           |       0    |       0    |       0    |       0    |
|       from large pool |       0    |       0    |       0    |       0    |
|       from small pool |       0    |       0    |       0    |       0    |
|---------------------------------------------------------------------------|
| Active allocs         |       0    |       0    |       0    |       0    |
|       from large pool |       0    |       0    |       0    |       0    |
|       from small pool |       0    |       0    |       0    |       0    |
|---------------------------------------------------------------------------|
| GPU reserved segments |       0    |       0    |       0    |       0    |
|       from large pool |       0    |       0    |       0    |       0    |
|       from small pool |       0    |       0    |       0    |       0    |
|---------------------------------------------------------------------------|
| Non-releasable allocs |       0    |       0    |       0    |       0    |
|       from large pool |       0    |       0    |       0    |       0    |
|       from small pool |       0    |       0    |       0    |       0    |
|---------------------------------------------------------------------------|
| Oversize allocations  |       0    |       0    |       0    |       0    |
|---------------------------------------------------------------------------|
| Oversize GPU segments |       0    |       0    |       0    |       0    |
|===========================================================================|

In [43]:
import wandb
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
    anonymous = None
except:
    anonymous = "must"
    print('To use your W&B account,\nGo to Add-ons -> Secrets and provide your W&B access token. Use the Label name as WANDB. \nGet your W&B access token from here: https://wandb.ai/authorize')
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin
wandb: WARNING If you're specifying your api key in code, ensure this code is not shared publicly.
wandb: WARNING Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
In [44]:
CFG.WandB = True
In [45]:
if CFG.WandB : 
    run = wandb.init(entity = 'lassouedaymenla',
                     project = CFG.competition,
                     save_code = True,
                     name = f"{CFG.model_name}_Fd_{CFG.trn_fold[0]}_ek_{CFG.epochs}")
wandb: Currently logged in as: lassouedaymenla. Use `wandb login --relogin` to force relogin
wandb: wandb version 0.17.7 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.4
wandb: Run data is saved locally in /kaggle/working/wandb/run-20240820_014056-f7534ypo
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run tf_efficientnetv2_m.in21k_Fd_2_ek_10
wandb: ⭐️ View project at https://wandb.ai/lassouedaymenla/ISIC_2024
wandb: 🚀 View run at https://wandb.ai/lassouedaymenla/ISIC_2024/runs/f7534ypo
In [46]:
# torch.cuda.empty_cache()
In [47]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    
    
            
    for step, (images, labels , file_name , data) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
#         Automated action while using HF accelerator
        images = images.to(device)
        labels = labels.to(device)

        batch_size = labels.size(0)
        meta = data[1].to(device)
#         print(images.shape ,data[1].shape )
#         print(images)
#         print(data[1])
        y_preds = model(images , meta)


# debug
#         print(torch.nn.functional.softmax(y_preds, dim=1))
#         print(labels)
        loss = criterion(y_preds, labels)
        
        # record loss
        losses.update(loss.item(), batch_size)

        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        
        # optimizer step
        optimizer.step()
        optimizer.zero_grad()                
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   lr=scheduler.get_lr()[0],
                   ))


        wandb.log({
    "Train Loss": losses.val,
    "Step": step,
    "Gradient Norm": grad_norm,
    "Learning Rate": scheduler.get_lr()[0], # Add this line to log the learning rate
})
    return losses.avg


def valid_fn(valid_loader, model, criterion , accelerator):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.eval()
    preds = []
    start = end = time.time()
    
    
    for step, (images, labels , file_name , data) in enumerate(valid_loader):
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        meta = data[1].to(device)

    
        with torch.no_grad():
            y_preds = model(images , meta)
            
        labels = labels.cuda()
        y_preds = y_preds.cuda()     
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        
        y_preds = torch.nn.functional.softmax(y_preds, dim=1)
        y_preds = y_preds.to('cpu').numpy()
        
        
        preds.append(y_preds)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            pass

        batch_time.update(time.time() - end)
        end = time.time()
        
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
        
        wandb.log({
            "Val Loss ": losses.val,
            "Val Step": step,
        }) 
        
        
    predictions = np.concatenate(preds)
    
    
    return losses.avg, predictions 
In [48]:
check_pAUC = False
In [49]:
if check_pAUC : 
    solution_data = {
        'id': [1, 2, 3, 4, 5],
        'target': [1, 0, 1, 0, 1]
    }
    submission_data = {
        'id': [1, 2, 3, 4, 5],
        'prediction': [0.9, 0.1, 0.8, 0.2, 0.7]
    }

    # Convert to DataFrames
    solution_df = pd.DataFrame(solution_data)
    submission_df = pd.DataFrame(submission_data)

    # Set row_id_column_name
    row_id_column_name = 'id'

    # Call the score3 function
    pauc_score = score(solution_df, submission_df, row_id_column_name)
    print(f'pAUC Score: {pauc_score:.4f}')
In [50]:
import numpy as np
import pandas as pd
import pandas.api.types
from sklearn.metrics import roc_curve, auc, roc_auc_score

class ParticipantVisibleError(Exception):
    pass


def score_pAUC(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float=0.80) -> float:
    '''
    2024 ISIC Challenge metric: pAUC
    
    Given a solution file and submission file, this function returns the
    the partial area under the receiver operating characteristic (pAUC) 
    above a given true positive rate (TPR) = 0.80.
    https://en.wikipedia.org/wiki/Partial_Area_Under_the_ROC_Curve.
    
    (c) 2024 Nicholas R Kurtansky, MSKCC

    Args:
        solution: ground truth pd.DataFrame of 1s and 0s
        submission: solution dataframe of predictions of scores ranging [0, 1]

    Returns:
        Float value range [0, max_fpr]
    '''

    del solution[row_id_column_name]
    del submission[row_id_column_name]

    # check submission is numeric
    if not pandas.api.types.is_numeric_dtype(submission.values):
        raise ParticipantVisibleError('Submission target column must be numeric')

    # rescale the target. set 0s to 1s and 1s to 0s (since sklearn only has max_fpr)
    v_gt = abs(np.asarray(solution.values)-1)
    
    # flip the submissions to their compliments
    v_pred = -1.0*np.asarray(submission.values)

    max_fpr = abs(1-min_tpr)

    # using sklearn.metric functions: (1) roc_curve and (2) auc
    fpr, tpr, _ = roc_curve(v_gt, v_pred, sample_weight=None)
    if max_fpr is None or max_fpr == 1:
        return auc(fpr, tpr)
    if max_fpr <= 0 or max_fpr > 1:
        raise ValueError("Expected min_tpr in range [0, 1), got: %r" % min_tpr)
        
    # Add a single point at max_fpr by linear interpolation
    stop = np.searchsorted(fpr, max_fpr, "right")
    x_interp = [fpr[stop - 1], fpr[stop]]
    y_interp = [tpr[stop - 1], tpr[stop]]
    tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
    fpr = np.append(fpr[:stop], max_fpr)
    partial_auc = auc(fpr, tpr)

#     # Equivalent code that uses sklearn's roc_auc_score
#     v_gt = abs(np.asarray(solution.values)-1)
#     v_pred = np.array([1.0 - x for x in submission.values])
#     max_fpr = abs(1-min_tpr)
#     partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
#     # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
#     # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
#     partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return(partial_auc)
In [51]:
def train_loop(folds, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    
    
    # Over3000Down10000
    ##########################################################
    df_minority = train_folds[train_folds['target'] == 1]
    df_majority = train_folds[train_folds['target'] == 0]
    df_minority_upsampled = resample(df_minority, 
                                     replace=True,    # Sample with replacement
                                     n_samples=3000,  # Upsample to 3000
                                     random_state=42) # For reproducibility

    df_majority_downsampled = resample(df_majority, 
                                       replace=False,   # Sample without replacement
                                       n_samples=10000, # Downsample to 10000
                                       random_state=42) # For reproducibility

    train_resampled = pd.concat([df_minority_upsampled, df_majority_downsampled])

    train_resampled = train_resampled.sample(frac=1, random_state=42).reset_index(drop=True)

    print(train_resampled['target'].value_counts())
    train_folds = train_resampled
    ###########################################################
    
    
    
    
    hdf_train = h5py.File(f'/kaggle/input/isic-2024/isic-2024/train-image.hdf5', mode='r')
    
    train_dataset = TrainDataset(train_folds , hdf5 = hdf_train  , meta_features = meta_features , transform= get_transforms(data = "train") )
    valid_dataset = TrainDataset(valid_folds, hdf5 = hdf_train  , meta_features = meta_features , 
                                 transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers , 
                              pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomResNext(CFG.model_name, num_classes = 0 ,n_meta_features=len(meta_features), pretrained=True) 
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # apex
    # ====================================================
    if CFG.apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.CrossEntropyLoss(weight = torch.Tensor([0.3,0.7]).cuda())


    best_score = 0
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds[CFG.target_col].values
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
#         print(valid_labels.shape , preds.shape)
#         score = get_score(valid_labels, preds)

        preds= torch.nn.functional.softmax(torch.from_numpy(preds), dim=1).numpy()[:,1]
        score2 = roc_auc_score(valid_labels, preds)
        wandb.log({"pAUC" : score2})
        
        comp_score = score_pAUC(pd.DataFrame({'id': valid_folds.index, 'target': valid_labels}), 
                       pd.DataFrame({'id': valid_folds.index, 'prediction': preds}), 
                       row_id_column_name='id')
        
        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s') #.info makes the msg shows in red cadre
#         LOGGER.info(f'Epoch {epoch+1} - LogLoss: {score} - AUC: {score2} - pAUC: {comp_score}')
        LOGGER.info(f'Epoch {epoch+1} - AUC: {score2} - pAUC: {comp_score}')
    
        if comp_score > best_score:
            best_score = comp_score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
            oof = pd.DataFrame({'filename' : valid_folds['isic_id'].values , 'prediction': preds , 'target': valid_labels })
            oof.to_csv(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.csv')
            
            
    check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    #valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    #valid_folds['preds'] = check_point['preds'].argmax(1)

    return 
In [52]:
train
Out[52]:
isic_id target patient_id age_approx sex clin_size_long_diam_mm image_type tbp_tile_type tbp_lv_A tbp_lv_Aext tbp_lv_B tbp_lv_Bext tbp_lv_C tbp_lv_Cext tbp_lv_H tbp_lv_Hext tbp_lv_L tbp_lv_Lext tbp_lv_areaMM2 tbp_lv_area_perim_ratio tbp_lv_color_std_mean tbp_lv_deltaA tbp_lv_deltaB tbp_lv_deltaL tbp_lv_deltaLB tbp_lv_deltaLBnorm tbp_lv_eccentricity tbp_lv_minorAxisMM tbp_lv_nevi_confidence tbp_lv_norm_border tbp_lv_norm_color tbp_lv_perimeterMM tbp_lv_radial_color_std_max tbp_lv_stdL tbp_lv_stdLExt tbp_lv_symm_2axis tbp_lv_symm_2axis_angle tbp_lv_x tbp_lv_y tbp_lv_z attribution copyright_license lesion_id iddx_full iddx_1 iddx_2 iddx_3 iddx_4 iddx_5 mel_mitotic_index mel_thick_mm tbp_lv_dnn_lesion_confidence Image_File lesion_size_ratio lesion_shape_index hue_contrast luminance_contrast lesion_color_difference border_complexity color_uniformity 3d_position_distance perimeter_to_area_ratio lesion_visibility_score symmetry_border_consistency color_consistency size_age_interaction hue_color_std_interaction lesion_severity_index shape_complexity_index color_contrast_index log_lesion_area normalized_lesion_size mean_hue_difference std_dev_contrast color_shape_composite_index 3d_lesion_orientation overall_color_difference symmetry_perimeter_interaction comprehensive_lesion_index shape_complexity_ratio color_variability border_asymmetry 3d_size_ratio age_lesion_interaction color_contrast_complexity shape_color_composite relative_lesion_size border_color_interaction 3d_radial_distance 3d_polar_angle 3d_azimuthal_angle shape_size_ratio color_border_complexity visibility_size_interaction age_adjusted_lesion_index nonlinear_color_contrast shape_location_index border_complexity_asymmetry_ratio color_variability_size_interaction 3d_lesion_composite nonlinear_shape_color_composite n_images anatom_site_general_anterior torso anatom_site_general_head/neck anatom_site_general_lower extremity anatom_site_general_posterior torso anatom_site_general_upper extremity tbp_lv_location_simple_Head & Neck tbp_lv_location_simple_Left Arm tbp_lv_location_simple_Left Leg tbp_lv_location_simple_Right Arm tbp_lv_location_simple_Right Leg tbp_lv_location_simple_Torso Back tbp_lv_location_simple_Torso Front tbp_lv_location_simple_Unknown tbp_lv_location_Head & Neck tbp_lv_location_Left Arm tbp_lv_location_Left Arm - Lower tbp_lv_location_Left Arm - Upper tbp_lv_location_Left Leg tbp_lv_location_Left Leg - Lower tbp_lv_location_Left Leg - Upper tbp_lv_location_Right Arm tbp_lv_location_Right Arm - Lower tbp_lv_location_Right Arm - Upper tbp_lv_location_Right Leg tbp_lv_location_Right Leg - Lower tbp_lv_location_Right Leg - Upper tbp_lv_location_Torso Back tbp_lv_location_Torso Back Bottom Third tbp_lv_location_Torso Back Middle Third tbp_lv_location_Torso Back Top Third tbp_lv_location_Torso Front tbp_lv_location_Torso Front Bottom Half tbp_lv_location_Torso Front Top Half tbp_lv_location_Unknown
0 ISIC_0015670 0 IP_1235828 0.6875 0 0.074453 TBP tile: close-up 3D: white 0.448560 0.549700 0.502444 0.376069 0.549810 0.402899 0.508448 0.267121 0.601787 0.547974 0.008153 0.218652 0.000000 0.387365 0.685141 0.836741 8.360566 0.101256 0.922243 0.070059 2.628592e-05 0.690918 0.000000 0.067335 0.000000 0.102225 0.080388 0.582086 0.485714 0.356776 0.566559 0.408087 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 97.517282 /kaggle/input/isic-2024/isic-2024/train-image/... 0.377312 0.306032 0.057723 7.658253 0.153458 7.681836 0.001929 641.525666 0.464388 0.074536 0.426332 0.093746 0.079887 0.000000 0.334336 7.718231 0.527696 0.195193 0.024389 0.404682 5.269909 9.355549 0.796815 0.723533 0.105880 0.215550 0.216541 0.000000 0.540615 0.124366 0.260433 0.499957 0.001206 0.245694 0.000000 641.525666 0.519977 1.860241 0.403224 NaN 0.062648 0.225713 0.999961 0.143238 0.164883 0.000000 0.114770 0.996913 0.657389 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 ISIC_0015845 0 IP_8170065 0.6875 0 0.003650 TBP tile: close-up 3D: white 0.674860 0.747149 0.491697 0.391260 0.685050 0.552365 0.384152 0.152394 0.535710 0.465595 0.001462 0.019278 0.000000 0.438834 0.646551 0.867883 6.839008 0.072258 0.646281 0.030250 1.334303e-09 0.162262 0.000000 0.007756 0.000000 0.033828 0.131598 0.252621 0.314286 0.504132 0.893847 0.571023 Memorial Sloan Kettering Cancer Center CC-BY IL_6727506 Benign Benign NaN NaN NaN NaN NaN NaN 3.141455 /kaggle/input/isic-2024/isic-2024/train-image/... 0.671535 0.862602 0.090871 6.500840 0.156607 2.402116 0.001929 1576.723962 0.585990 0.053190 0.058229 0.035666 0.019856 0.000000 0.061852 2.483847 0.555390 0.053754 0.003870 0.265244 5.345725 4.173668 0.750701 0.756171 0.009881 0.018873 0.021085 0.000000 0.259181 0.875064 0.072891 0.499957 0.001206 0.088039 0.000000 1576.723962 0.486381 1.570846 0.357300 NaN 0.004630 0.025995 0.999998 0.882084 0.004149 0.000000 0.129278 0.996913 0.479479 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 ISIC_0015864 0 IP_6724798 0.6875 0 0.087591 TBP tile: close-up 3D: XP 0.494565 0.568489 0.703178 0.619598 0.738108 0.606831 0.566217 0.336171 0.596911 0.543583 0.008491 0.175593 0.000000 0.419247 0.734515 0.835366 9.092376 0.119668 0.954805 0.050841 2.959177e-06 0.447253 0.000000 0.063125 0.000000 0.085311 0.052795 0.334987 0.600000 0.603966 0.858581 0.858501 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.804040 /kaggle/input/isic-2024/isic-2024/train-image/... 0.185612 0.366843 0.075925 7.709340 0.185190 5.160240 0.001929 1495.442825 0.424068 0.088089 0.174525 0.078557 0.091026 0.000000 0.216620 5.201588 0.589523 0.200096 0.028197 0.478441 6.033971 8.182175 0.737339 0.764476 0.057649 0.172135 0.125381 0.000000 0.572494 0.264886 0.179413 0.499957 0.001206 0.464085 0.000000 1495.442825 0.446995 1.486992 0.238232 NaN 0.074987 0.181627 1.000000 0.412719 0.208498 0.000000 0.245416 0.996913 0.810607 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
3 ISIC_0015902 0 IP_4111386 0.7500 0 0.081022 TBP tile: close-up 3D: XP 0.330121 0.460827 0.402979 0.303669 0.407316 0.295051 0.539680 0.308385 0.173134 0.069389 0.016925 0.053995 0.051695 0.345916 0.599208 0.917270 4.783413 0.123664 0.661665 0.121860 2.198945e-01 0.147329 0.177170 0.069412 0.057844 0.057262 0.037652 0.170317 0.742857 0.390405 0.848437 0.572962 ACEMID MIA CC-0 NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.989998 /kaggle/input/isic-2024/isic-2024/train-image/... 0.700267 0.683752 0.075968 4.665323 0.057300 2.185455 0.001449 1450.239191 0.221867 0.138479 0.038648 0.164283 0.093760 29.026369 0.147492 2.252618 0.510845 0.292986 0.023673 0.446797 2.954563 5.204448 0.766225 0.701845 0.031795 0.073172 0.025163 0.148822 0.269298 0.271365 0.144972 0.510397 0.000654 0.077898 0.040456 1450.239191 0.485075 1.668272 0.102462 1.691708 0.093087 0.076888 0.999746 0.661763 0.074702 0.099840 0.160499 0.940654 0.664374 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
4 ISIC_0024200 0 IP_8313778 0.6250 0 0.063139 TBP tile: close-up 3D: white 0.536985 0.632028 0.494130 0.420931 0.595283 0.488997 0.451561 0.230185 0.504685 0.459331 0.005005 0.119577 0.000000 0.402283 0.613124 0.811955 9.148495 0.128433 0.969901 0.036212 1.378832e-05 0.326168 0.000000 0.038916 0.000000 0.105071 0.074071 0.282586 0.114286 0.445845 0.864265 0.512548 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 70.442510 /kaggle/input/isic-2024/isic-2024/train-image/... 0.172334 0.476059 0.106167 8.579430 0.169487 3.972287 0.001929 1490.629394 0.486255 0.094542 0.113931 0.111047 0.063254 0.000000 0.158069 4.022532 0.497088 0.141714 0.023735 0.349728 5.655868 6.738664 0.758427 0.689332 0.032495 0.118431 0.075373 0.000000 0.461375 0.330097 0.125369 0.499957 0.001206 0.486625 0.000000 1490.629394 0.493560 1.619334 0.229006 NaN 0.063896 0.132898 0.998829 0.504179 0.153139 0.000000 0.201530 0.996913 0.639917 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
401054 ISIC_9999937 0 IP_1140263 0.8125 0 0.211679 TBP tile: close-up 3D: XP 0.494536 0.521126 0.515904 0.447931 0.586077 0.443018 0.487281 0.315836 0.385361 0.440617 0.067308 0.092628 0.259206 0.466726 0.617734 0.585158 17.187750 0.381900 0.756712 0.254170 9.936233e-01 0.266975 0.734813 0.176467 0.202597 0.392395 0.061576 0.256087 0.571429 0.622958 0.760043 0.678136 Department of Dermatology, Hospital Clínic de ... CC-BY-NC IL_9520694 Benign Benign NaN NaN NaN NaN NaN NaN 99.999988 /kaggle/input/isic-2024/isic-2024/train-image/... 0.634890 0.547280 0.209217 17.008450 0.382087 3.390731 0.002075 1197.740551 0.102628 0.477913 0.088179 0.425315 0.231305 131.016077 0.500945 3.446777 0.526605 0.515932 0.053883 0.419786 10.775006 6.903743 0.730957 0.614949 0.113153 0.181408 0.055155 0.133119 0.399441 0.102939 0.441927 0.544264 0.001432 0.101932 0.056144 1197.740551 0.464636 1.446943 0.070953 3.757460 0.389263 0.179022 0.999956 0.449790 0.120393 0.154905 0.200620 0.999038 0.605171 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
401055 ISIC_9999951 0 IP_5678181 0.6875 0 0.077007 TBP tile: close-up 3D: white 0.443295 0.544600 0.633924 0.556393 0.655484 0.543159 0.570062 0.333835 0.658611 0.610268 0.013045 0.097717 0.081617 0.386676 0.690975 0.828566 8.671249 0.097520 0.730502 0.104002 2.311562e-03 0.409172 0.279525 0.067668 0.091213 0.093165 0.091352 0.441034 0.142857 0.546435 0.831770 0.685676 Memorial Sloan Kettering Cancer Center CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.999820 /kaggle/input/isic-2024/isic-2024/train-image/... 0.606029 0.532598 0.062324 7.962060 0.160682 4.899972 0.001451 1399.966447 0.289525 0.146645 0.205950 0.077729 0.082053 48.479191 0.332370 4.954822 0.523040 0.255966 0.025129 0.479404 5.443848 6.501268 0.744708 0.721352 0.080510 0.131420 0.084775 0.157334 0.438239 0.271220 0.259080 0.519117 0.001440 0.113716 0.095893 1399.966447 0.468425 1.533238 0.248585 3.797567 0.093690 0.140282 0.999935 0.517427 0.082548 0.091546 0.198470 0.999077 0.626251 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0
401056 ISIC_9999960 0 IP_0076153 0.7500 1 0.038321 TBP tile: close-up 3D: XP 0.391100 0.465157 0.555550 0.441107 0.564682 0.409718 0.571694 0.356429 0.422326 0.372602 0.006973 0.029935 0.050055 0.408813 0.697427 0.816060 9.267366 0.176141 0.656763 0.072968 5.994798e-01 0.090863 0.166041 0.034235 0.052872 0.082950 0.063017 0.141688 0.228571 0.548269 0.720228 0.302728 Frazer Institute, The University of Queensland... CC-BY IL_9852274 Benign Benign NaN NaN NaN NaN NaN NaN 99.999416 /kaggle/input/isic-2024/isic-2024/train-image/... 0.709842 0.799960 0.106954 8.426838 0.182489 1.627598 0.001535 1071.999873 0.328530 0.174127 0.023274 0.102968 0.054538 29.819207 0.113808 1.704226 0.575412 0.176917 0.012250 0.493238 5.968948 4.577102 0.742529 0.731840 0.012848 0.048516 0.012936 0.106503 0.192135 0.315915 0.119857 0.517776 0.000524 0.074654 0.021562 1071.999873 0.531008 1.519565 0.124007 1.334636 0.066451 0.052331 1.000000 0.554938 0.020968 0.040166 0.101910 0.884804 0.480515 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
401057 ISIC_9999964 0 IP_5231513 0.3125 1 0.065693 TBP tile: close-up 3D: XP 0.488897 0.404430 0.529857 0.454194 0.593638 0.395893 0.497716 0.410203 0.541314 0.533738 0.016757 0.039585 0.132411 0.577571 0.634747 0.732143 11.751160 0.200620 0.491148 0.125537 9.931933e-01 0.088423 0.358397 0.065396 0.093829 0.197278 0.053052 0.118717 0.800000 0.496238 0.656369 0.351561 University Hospital of Basel CC-BY-NC NaN Benign Benign NaN NaN NaN NaN NaN NaN 100.000000 /kaggle/input/isic-2024/isic-2024/train-image/... 0.871405 0.749631 0.387508 11.545670 0.347967 1.583386 0.002288 880.952401 0.212779 0.243662 0.019739 0.185876 0.029139 68.404855 0.204544 1.655914 0.636265 0.291523 0.051466 0.479725 9.953426 5.089118 0.752484 0.769697 0.020812 0.079411 0.013803 0.130358 0.194449 0.187990 0.072638 0.545422 0.000759 0.028302 0.025674 880.952401 0.526966 1.582034 0.084774 1.935352 0.121353 0.133885 1.000000 0.427143 0.052183 0.087002 0.096675 0.965685 0.662388 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
401058 ISIC_9999967 0 IP_6426047 0.5625 0 0.083942 TBP tile: close-up 3D: XP 0.380451 0.444992 0.403144 0.306060 0.434260 0.288605 0.498154 0.324136 0.265458 0.255607 0.019399 0.053571 0.098350 0.417300 0.596460 0.718983 12.157000 0.328629 0.675802 0.127479 9.365141e-01 0.137519 0.310094 0.075561 0.094688 0.161926 0.102932 0.157707 0.257143 0.295317 0.408429 0.653693 Department of Dermatology, University of Athen... CC-BY NaN Benign Benign NaN NaN NaN NaN NaN NaN 99.999960 /kaggle/input/isic-2024/isic-2024/train-image/... 0.715202 0.685555 0.202607 12.034760 0.250523 2.081471 0.001684 317.333017 0.204711 0.324955 0.034402 0.250929 0.070913 50.854538 0.212554 2.148780 0.537111 0.313205 0.034120 0.430995 7.607111 5.344518 0.917956 0.643558 0.032020 0.089329 0.023502 0.116116 0.258989 0.053822 0.142874 0.523077 0.000725 0.072866 0.030063 317.333017 0.384716 2.620471 0.094543 1.872560 0.176021 0.108624 0.999986 0.129311 0.077815 0.083394 0.030362 0.958984 0.659003 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

401059 rows × 136 columns

In [53]:
def main():

    """
    Prepare: 1.train  2.test  3.submission  4.folds
    """

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')
    
    if CFG.train:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                train_loop(folds, fold)
                #oof_df = pd.concat([oof_df, _oof_df])
                #LOGGER.info(f"========== fold: {fold} result ==========")
                #get_result(_oof_df)
        # CV result
        LOGGER.info(f"========== CV ==========")
        #get_result(oof_df)
        # save result
        #oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
    
    if CFG.inference:
        # inference
        model = CustomResNext(CFG.model_name, pretrained=False)
        states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
        test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                                 num_workers=CFG.num_workers, pin_memory=True)
        predictions = inference(model, states, test_loader, device)
        # submission
        print(predictions)
        test['label'] = torch.nn.functional.softmax(torch.from_numpy(predictions), dim=1).numpy()[:,1]
        print(test['label'])
        test[['img_name', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
In [54]:
from sklearn.utils import resample
In [55]:
if __name__ == '__main__':                                                                                              
    main()
========== fold: 2 training ==========
target
0    10000
1     3000
Name: count, dtype: int64
Epoch: [1][0/650] Data 1.094 (1.094) Elapsed 0m 3s (remain 39m 52s) Loss: 3.5243(3.5243) Grad: 88.2561  LR: 0.000100  
Epoch: [1][18/650] Data 0.270 (0.298) Elapsed 0m 15s (remain 8m 41s) Loss: 0.7758(1.8412) Grad: 68.7465  LR: 0.000100  
Epoch: [1][36/650] Data 0.248 (0.281) Elapsed 0m 28s (remain 7m 44s) Loss: 1.6655(1.8824) Grad: 53.8497  LR: 0.000100  
Epoch: [1][54/650] Data 0.271 (0.276) Elapsed 0m 40s (remain 7m 16s) Loss: 2.0998(1.7617) Grad: 70.7080  LR: 0.000100  
Epoch: [1][72/650] Data 0.272 (0.274) Elapsed 0m 52s (remain 6m 55s) Loss: 1.7836(1.7374) Grad: 56.8125  LR: 0.000100  
Epoch: [1][90/650] Data 0.272 (0.273) Elapsed 1m 4s (remain 6m 38s) Loss: 3.0666(1.6971) Grad: 69.2511  LR: 0.000100  
Epoch: [1][108/650] Data 0.269 (0.272) Elapsed 1m 17s (remain 6m 23s) Loss: 1.5897(1.6144) Grad: 52.9566  LR: 0.000100  
Epoch: [1][126/650] Data 0.268 (0.271) Elapsed 1m 29s (remain 6m 8s) Loss: 0.8502(1.5542) Grad: 28.3672  LR: 0.000100  
Epoch: [1][144/650] Data 0.259 (0.271) Elapsed 1m 41s (remain 5m 54s) Loss: 1.2228(1.5023) Grad: 41.7399  LR: 0.000100  
Epoch: [1][162/650] Data 0.272 (0.271) Elapsed 1m 54s (remain 5m 40s) Loss: 0.9951(1.4584) Grad: 32.2839  LR: 0.000100  
Epoch: [1][180/650] Data 0.272 (0.270) Elapsed 2m 6s (remain 5m 27s) Loss: 1.0090(1.4137) Grad: 35.8288  LR: 0.000100  
Epoch: [1][198/650] Data 0.271 (0.270) Elapsed 2m 18s (remain 5m 14s) Loss: 0.5395(1.3594) Grad: 29.4353  LR: 0.000100  
Epoch: [1][216/650] Data 0.271 (0.269) Elapsed 2m 31s (remain 5m 1s) Loss: 0.8337(1.3185) Grad: 22.6757  LR: 0.000100  
Epoch: [1][234/650] Data 0.271 (0.269) Elapsed 2m 43s (remain 4m 48s) Loss: 0.9160(1.2864) Grad: 29.3670  LR: 0.000100  
Epoch: [1][252/650] Data 0.264 (0.268) Elapsed 2m 55s (remain 4m 35s) Loss: 1.0782(1.2481) Grad: 32.6038  LR: 0.000100  
Epoch: [1][270/650] Data 0.260 (0.268) Elapsed 3m 8s (remain 4m 22s) Loss: 0.2894(1.2132) Grad: 9.0277  LR: 0.000100  
Epoch: [1][288/650] Data 0.272 (0.268) Elapsed 3m 20s (remain 4m 10s) Loss: 0.3973(1.1846) Grad: 15.0366  LR: 0.000100  
Epoch: [1][306/650] Data 0.270 (0.268) Elapsed 3m 32s (remain 3m 57s) Loss: 1.2028(1.1562) Grad: 28.0102  LR: 0.000100  
Epoch: [1][324/650] Data 0.271 (0.268) Elapsed 3m 44s (remain 3m 44s) Loss: 0.2811(1.1305) Grad: 10.3663  LR: 0.000100  
Epoch: [1][342/650] Data 0.271 (0.268) Elapsed 3m 57s (remain 3m 32s) Loss: 1.0315(1.1047) Grad: 28.1144  LR: 0.000100  
Epoch: [1][360/650] Data 0.267 (0.267) Elapsed 4m 9s (remain 3m 19s) Loss: 0.2589(1.0819) Grad: 10.1003  LR: 0.000100  
Epoch: [1][378/650] Data 0.263 (0.267) Elapsed 4m 21s (remain 3m 7s) Loss: 0.9474(1.0648) Grad: 27.8960  LR: 0.000100  
Epoch: [1][396/650] Data 0.253 (0.267) Elapsed 4m 34s (remain 2m 54s) Loss: 0.7659(1.0464) Grad: 25.9070  LR: 0.000100  
Epoch: [1][414/650] Data 0.271 (0.267) Elapsed 4m 46s (remain 2m 42s) Loss: 0.9235(1.0263) Grad: 23.6163  LR: 0.000100  
Epoch: [1][432/650] Data 0.259 (0.267) Elapsed 4m 58s (remain 2m 29s) Loss: 0.8599(1.0089) Grad: 21.5964  LR: 0.000100  
Epoch: [1][450/650] Data 0.249 (0.267) Elapsed 5m 11s (remain 2m 17s) Loss: 0.6775(0.9899) Grad: 25.0101  LR: 0.000100  
Epoch: [1][468/650] Data 0.260 (0.267) Elapsed 5m 23s (remain 2m 4s) Loss: 0.5397(0.9707) Grad: 20.5652  LR: 0.000100  
Epoch: [1][486/650] Data 0.266 (0.267) Elapsed 5m 35s (remain 1m 52s) Loss: 0.7107(0.9590) Grad: 22.9121  LR: 0.000100  
Epoch: [1][504/650] Data 0.271 (0.267) Elapsed 5m 47s (remain 1m 39s) Loss: 0.5597(0.9454) Grad: 15.1320  LR: 0.000100  
Epoch: [1][522/650] Data 0.263 (0.267) Elapsed 6m 0s (remain 1m 27s) Loss: 0.5222(0.9337) Grad: 20.3184  LR: 0.000100  
Epoch: [1][540/650] Data 0.272 (0.267) Elapsed 6m 12s (remain 1m 15s) Loss: 0.3124(0.9211) Grad: 12.5482  LR: 0.000100  
Epoch: [1][558/650] Data 0.255 (0.267) Elapsed 6m 24s (remain 1m 2s) Loss: 0.3745(0.9116) Grad: 12.6663  LR: 0.000100  
Epoch: [1][576/650] Data 0.267 (0.267) Elapsed 6m 37s (remain 0m 50s) Loss: 0.9547(0.9022) Grad: 22.3890  LR: 0.000100  
Epoch: [1][594/650] Data 0.273 (0.267) Elapsed 6m 49s (remain 0m 37s) Loss: 0.6682(0.8915) Grad: 20.6627  LR: 0.000100  
Epoch: [1][612/650] Data 0.271 (0.267) Elapsed 7m 1s (remain 0m 25s) Loss: 0.5520(0.8817) Grad: 13.6343  LR: 0.000100  
Epoch: [1][630/650] Data 0.272 (0.267) Elapsed 7m 14s (remain 0m 13s) Loss: 1.0143(0.8717) Grad: 21.2005  LR: 0.000100  
Epoch: [1][648/650] Data 0.273 (0.267) Elapsed 7m 26s (remain 0m 0s) Loss: 0.7269(0.8607) Grad: 17.1867  LR: 0.000100  
Epoch: [1][649/650] Data 0.273 (0.267) Elapsed 7m 27s (remain 0m 0s) Loss: 0.5090(0.8602) Grad: 16.8420  LR: 0.000100  
EVAL: [0/3883] Data 0.499 (0.499) Elapsed 0m 0s (remain 44m 33s) Loss: 0.4381(0.4381) 
EVAL: [18/3883] Data 0.001 (0.028) Elapsed 0m 3s (remain 13m 2s) Loss: 0.5002(0.3744) 
EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 6s (remain 12m 7s) Loss: 0.3218(0.3593) 
EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 46s) Loss: 0.1657(0.3550) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 33s) Loss: 0.3832(0.3693) 
EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 25s) Loss: 0.4993(0.3707) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 18s) Loss: 0.8354(0.3842) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 12s) Loss: 0.5466(0.3942) 
EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 7s) Loss: 0.5858(0.3973) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.2338(0.4001) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.1705(0.3901) 
EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 54s) Loss: 0.3506(0.3907) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.3607(0.3904) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.3918(0.3891) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.4226(0.3846) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.3249(0.3875) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.5338(0.3909) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 31s) Loss: 0.2750(0.3903) 
EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.2308(0.3896) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.1096(0.3913) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.3877(0.3913) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.4232(0.3931) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.5665(0.3918) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.4080(0.3907) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.3875(0.3906) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.3887(0.3906) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.2919(0.3930) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.4803(0.3924) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.2495(0.3917) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.6714(0.3903) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.2734(0.3896) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.5121(0.3911) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.2138(0.3891) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.3425(0.3872) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.3249(0.3870) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.3349(0.3875) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.1616(0.3871) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.2064(0.3864) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 21s) Loss: 0.4660(0.3874) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.4269(0.3852) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.4283(0.3847) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.2061(0.3851) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.3393(0.3851) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 5s) Loss: 0.3480(0.3857) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.3959(0.3871) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.2176(0.3871) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.2506(0.3876) 
EVAL: [846/3883] Data 0.001 (0.001) Elapsed 2m 28s (remain 8m 52s) Loss: 0.2162(0.3885) 
EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.5012(0.3899) 
EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 34s (remain 8m 46s) Loss: 0.0853(0.3893) 
EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.5533(0.3895) 
EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.5037(0.3900) 
EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 36s) Loss: 0.5038(0.3896) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 33s) Loss: 0.3163(0.3883) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.1257(0.3885) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.4717(0.3883) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.4044(0.3884) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.1554(0.3873) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 17s) Loss: 0.3022(0.3873) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.3534(0.3874) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.3599(0.3872) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.2153(0.3865) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 15s (remain 8m 5s) Loss: 0.5516(0.3869) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 1s) Loss: 0.1750(0.3862) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 58s) Loss: 0.4854(0.3869) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.4427(0.3860) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.3641(0.3866) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.2484(0.3866) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.3988(0.3864) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 37s (remain 7m 42s) Loss: 0.5588(0.3867) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.6628(0.3872) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.5253(0.3873) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.2245(0.3872) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.4149(0.3879) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.2844(0.3872) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 23s) Loss: 0.2141(0.3870) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 3m 59s (remain 7m 20s) Loss: 0.8111(0.3883) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.2058(0.3880) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.4379(0.3881) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.4210(0.3881) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.3823(0.3874) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 4s) Loss: 0.2784(0.3876) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 18s (remain 7m 1s) Loss: 0.3118(0.3871) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.6044(0.3865) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.6427(0.3862) 
EVAL: [1530/3883] Data 0.003 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.4457(0.3859) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.4420(0.3854) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 45s) Loss: 0.3235(0.3847) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.2181(0.3847) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 40s (remain 6m 39s) Loss: 0.6755(0.3852) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.8215(0.3863) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2758(0.3866) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.2801(0.3872) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 26s) Loss: 0.3649(0.3876) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.2677(0.3877) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.5662(0.3884) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 2s (remain 6m 17s) Loss: 0.3305(0.3888) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.4049(0.3894) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.2036(0.3895) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 7s) Loss: 0.3680(0.3895) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.5654(0.3891) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.2664(0.3889) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 21s (remain 5m 58s) Loss: 0.3189(0.3892) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 24s (remain 5m 55s) Loss: 0.1155(0.3888) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.6842(0.3889) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 48s) Loss: 0.2787(0.3890) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.4071(0.3888) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.1842(0.3884) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.3079(0.3887) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 43s (remain 5m 36s) Loss: 0.2134(0.3894) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.4866(0.3896) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.2357(0.3897) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 26s) Loss: 0.3194(0.3898) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.2397(0.3896) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.6325(0.3897) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.4882(0.3903) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 5s (remain 5m 14s) Loss: 0.1250(0.3901) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.5751(0.3899) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 7s) Loss: 0.2849(0.3893) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.3084(0.3892) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.5098(0.3887) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.1769(0.3879) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 24s (remain 4m 55s) Loss: 0.3323(0.3880) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 27s (remain 4m 52s) Loss: 0.5231(0.3881) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 48s) Loss: 0.5544(0.3880) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.7136(0.3882) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.2869(0.3881) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.7316(0.3878) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.4014(0.3878) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 46s (remain 4m 33s) Loss: 0.2719(0.3874) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.4887(0.3874) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 26s) Loss: 0.5208(0.3872) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.5303(0.3870) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.6512(0.3869) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.4824(0.3869) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.2176(0.3864) 
EVAL: [2448/3883] Data 0.002 (0.001) Elapsed 7m 8s (remain 4m 11s) Loss: 0.3585(0.3860) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 7s) Loss: 0.2684(0.3861) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.5313(0.3866) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.2752(0.3871) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.3823(0.3871) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.4412(0.3875) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 27s (remain 3m 52s) Loss: 0.7861(0.3874) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 30s (remain 3m 49s) Loss: 0.6420(0.3874) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 45s) Loss: 0.2034(0.3877) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.3482(0.3877) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.1686(0.3876) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.4539(0.3876) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.3845(0.3875) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 49s (remain 3m 30s) Loss: 0.1901(0.3870) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 52s (remain 3m 26s) Loss: 0.4236(0.3868) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.4896(0.3867) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.1220(0.3867) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.3902(0.3874) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.5802(0.3876) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.2578(0.3873) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 11s (remain 3m 8s) Loss: 0.5002(0.3873) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 4s) Loss: 0.4060(0.3871) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.6644(0.3872) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.6403(0.3871) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.2028(0.3867) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.3228(0.3867) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 30s (remain 2m 49s) Loss: 0.4995(0.3866) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 33s (remain 2m 45s) Loss: 0.6318(0.3866) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.1441(0.3867) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.3390(0.3868) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.3221(0.3869) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.5247(0.3867) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.4454(0.3867) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 52s (remain 2m 27s) Loss: 0.3169(0.3865) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 55s (remain 2m 23s) Loss: 0.3938(0.3861) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.4561(0.3861) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.8708(0.3863) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.6743(0.3865) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.2955(0.3864) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.4218(0.3863) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 14s (remain 2m 5s) Loss: 0.6962(0.3863) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 17s (remain 2m 1s) Loss: 0.1438(0.3860) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.5117(0.3861) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2541(0.3860) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.8336(0.3860) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.2186(0.3860) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 33s (remain 1m 46s) Loss: 0.5246(0.3859) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 36s (remain 1m 42s) Loss: 0.3215(0.3861) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0979(0.3861) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.3324(0.3859) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.2753(0.3860) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.4307(0.3858) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.4982(0.3860) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 55s (remain 1m 24s) Loss: 0.3686(0.3862) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 58s (remain 1m 20s) Loss: 0.6403(0.3859) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2733(0.3861) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.5780(0.3862) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.1601(0.3863) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.4136(0.3866) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 14s (remain 1m 5s) Loss: 0.2691(0.3864) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 17s (remain 1m 1s) Loss: 0.8080(0.3867) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 20s (remain 0m 58s) Loss: 0.6231(0.3868) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.6234(0.3867) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.2234(0.3865) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.2450(0.3866) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.3118(0.3865) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 36s (remain 0m 43s) Loss: 0.1431(0.3866) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 39s (remain 0m 39s) Loss: 0.4818(0.3867) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 42s (remain 0m 36s) Loss: 0.8194(0.3869) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.2142(0.3869) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.4991(0.3869) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.5069(0.3869) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.3938(0.3868) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 58s (remain 0m 21s) Loss: 0.2337(0.3868) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 1s (remain 0m 17s) Loss: 0.1664(0.3867) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.1616(0.3868) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.2270(0.3867) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.2859(0.3870) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.3702(0.3871) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 17s (remain 0m 2s) Loss: 0.2314(0.3867) 
Epoch 1 - avg_train_loss: 0.8602  avg_val_loss: 0.3868  time: 1127s
Epoch 1 - AUC: 0.9477652467186527 - pAUC: 0.16142628514962082
Epoch 1 - Save Best Score: 0.1614 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.7261(0.3868) 
Epoch: [2][0/650] Data 0.879 (0.879) Elapsed 0m 1s (remain 14m 29s) Loss: 0.6439(0.6439) Grad: 19.0668  LR: 0.000098  
Epoch: [2][18/650] Data 0.269 (0.296) Elapsed 0m 13s (remain 7m 31s) Loss: 0.3772(0.5004) Grad: 11.6672  LR: 0.000098  
Epoch: [2][36/650] Data 0.272 (0.282) Elapsed 0m 25s (remain 7m 9s) Loss: 1.1432(0.5320) Grad: 31.3411  LR: 0.000098  
Epoch: [2][54/650] Data 0.269 (0.277) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3563(0.5282) Grad: 11.9038  LR: 0.000098  
Epoch: [2][72/650] Data 0.266 (0.275) Elapsed 0m 50s (remain 6m 39s) Loss: 0.6747(0.5323) Grad: 20.4311  LR: 0.000098  
Epoch: [2][90/650] Data 0.252 (0.273) Elapsed 1m 2s (remain 6m 26s) Loss: 0.9387(0.5260) Grad: 25.1824  LR: 0.000098  
Epoch: [2][108/650] Data 0.273 (0.270) Elapsed 1m 15s (remain 6m 13s) Loss: 0.6322(0.5173) Grad: 17.2351  LR: 0.000098  
Epoch: [2][126/650] Data 0.273 (0.269) Elapsed 1m 27s (remain 6m 0s) Loss: 0.6374(0.5100) Grad: 17.7875  LR: 0.000098  
Epoch: [2][144/650] Data 0.263 (0.268) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4050(0.5000) Grad: 13.3298  LR: 0.000098  
Epoch: [2][162/650] Data 0.271 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.7207(0.5047) Grad: 17.4840  LR: 0.000098  
Epoch: [2][180/650] Data 0.272 (0.268) Elapsed 2m 4s (remain 5m 22s) Loss: 0.4872(0.5117) Grad: 12.6906  LR: 0.000098  
Epoch: [2][198/650] Data 0.261 (0.268) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4582(0.5071) Grad: 17.1446  LR: 0.000098  
Epoch: [2][216/650] Data 0.271 (0.268) Elapsed 2m 29s (remain 4m 57s) Loss: 0.9624(0.5033) Grad: 21.6777  LR: 0.000098  
Epoch: [2][234/650] Data 0.259 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.5756(0.5049) Grad: 14.4410  LR: 0.000098  
Epoch: [2][252/650] Data 0.266 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.3603(0.4992) Grad: 10.9208  LR: 0.000098  
Epoch: [2][270/650] Data 0.264 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.2299(0.4938) Grad: 7.9438  LR: 0.000098  
Epoch: [2][288/650] Data 0.264 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.4541(0.4956) Grad: 12.0555  LR: 0.000098  
Epoch: [2][306/650] Data 0.272 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.1797(0.4931) Grad: 8.0973  LR: 0.000098  
Epoch: [2][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.4167(0.4920) Grad: 12.4313  LR: 0.000098  
Epoch: [2][342/650] Data 0.272 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.1934(0.4859) Grad: 8.9752  LR: 0.000098  
Epoch: [2][360/650] Data 0.272 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.4769(0.4875) Grad: 14.0812  LR: 0.000098  
Epoch: [2][378/650] Data 0.271 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4079(0.4861) Grad: 11.6847  LR: 0.000098  
Epoch: [2][396/650] Data 0.271 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.2117(0.4866) Grad: 8.8013  LR: 0.000098  
Epoch: [2][414/650] Data 0.259 (0.268) Elapsed 4m 44s (remain 2m 41s) Loss: 1.0991(0.4878) Grad: 22.1012  LR: 0.000098  
Epoch: [2][432/650] Data 0.264 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.4128(0.4853) Grad: 11.9749  LR: 0.000098  
Epoch: [2][450/650] Data 0.261 (0.268) Elapsed 5m 8s (remain 2m 16s) Loss: 0.4381(0.4861) Grad: 14.3544  LR: 0.000098  
Epoch: [2][468/650] Data 0.268 (0.268) Elapsed 5m 21s (remain 2m 3s) Loss: 0.4091(0.4825) Grad: 13.4175  LR: 0.000098  
Epoch: [2][486/650] Data 0.266 (0.268) Elapsed 5m 33s (remain 1m 51s) Loss: 0.7158(0.4842) Grad: 21.4496  LR: 0.000098  
Epoch: [2][504/650] Data 0.266 (0.268) Elapsed 5m 45s (remain 1m 39s) Loss: 0.3441(0.4838) Grad: 9.2027  LR: 0.000098  
Epoch: [2][522/650] Data 0.273 (0.268) Elapsed 5m 58s (remain 1m 26s) Loss: 0.2895(0.4828) Grad: 13.0865  LR: 0.000098  
Epoch: [2][540/650] Data 0.273 (0.268) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4308(0.4805) Grad: 16.5972  LR: 0.000098  
Epoch: [2][558/650] Data 0.270 (0.268) Elapsed 6m 22s (remain 1m 2s) Loss: 0.3672(0.4803) Grad: 9.5362  LR: 0.000098  
Epoch: [2][576/650] Data 0.270 (0.268) Elapsed 6m 35s (remain 0m 49s) Loss: 0.4240(0.4783) Grad: 12.9091  LR: 0.000098  
Epoch: [2][594/650] Data 0.271 (0.268) Elapsed 6m 47s (remain 0m 37s) Loss: 0.5612(0.4772) Grad: 13.3439  LR: 0.000098  
Epoch: [2][612/650] Data 0.269 (0.268) Elapsed 6m 59s (remain 0m 25s) Loss: 0.3119(0.4742) Grad: 8.1953  LR: 0.000098  
Epoch: [2][630/650] Data 0.263 (0.268) Elapsed 7m 12s (remain 0m 13s) Loss: 0.4496(0.4717) Grad: 13.7777  LR: 0.000098  
Epoch: [2][648/650] Data 0.259 (0.268) Elapsed 7m 24s (remain 0m 0s) Loss: 0.3236(0.4706) Grad: 12.8262  LR: 0.000098  
Epoch: [2][649/650] Data 0.273 (0.268) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3428(0.4704) Grad: 16.1197  LR: 0.000098  
EVAL: [0/3883] Data 0.588 (0.588) Elapsed 0m 0s (remain 49m 25s) Loss: 0.1770(0.1770) 
EVAL: [18/3883] Data 0.001 (0.032) Elapsed 0m 3s (remain 13m 17s) Loss: 0.1737(0.1417) 
EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 14s) Loss: 0.1269(0.1333) 
EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 51s) Loss: 0.0539(0.1329) 
EVAL: [72/3883] Data 0.002 (0.009) Elapsed 0m 13s (remain 11m 37s) Loss: 0.2033(0.1438) 
EVAL: [90/3883] Data 0.002 (0.007) Elapsed 0m 16s (remain 11m 28s) Loss: 0.2019(0.1487) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.3960(0.1579) 
EVAL: [126/3883] Data 0.002 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.2764(0.1630) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.2941(0.1641) 
EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.1169(0.1649) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0275(0.1607) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.1268(0.1629) 
EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.0800(0.1617) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1952(0.1608) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.1427(0.1578) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.1033(0.1594) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.2294(0.1623) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.1408(0.1620) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0960(0.1612) 
EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0254(0.1617) 
EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1140(0.1621) 
EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.2514(0.1625) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.2673(0.1622) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1760(0.1623) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.1059(0.1627) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.2345(0.1621) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.1300(0.1633) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1591(0.1626) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.0984(0.1630) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.3431(0.1625) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0756(0.1616) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3011(0.1623) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0651(0.1615) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1147(0.1603) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1375(0.1602) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0742(0.1603) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0578(0.1606) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.0681(0.1606) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1984(0.1612) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.2155(0.1596) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.1946(0.1593) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0460(0.1592) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.2104(0.1591) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.1124(0.1593) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1982(0.1603) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0537(0.1604) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.1227(0.1606) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0865(0.1609) 
EVAL: [864/3883] Data 0.002 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.2704(0.1620) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0305(0.1618) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2715(0.1618) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1925(0.1619) 
EVAL: [936/3883] Data 0.002 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2288(0.1619) 
EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.1014(0.1612) 
EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0365(0.1615) 
EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.1667(0.1610) 
EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.2637(0.1615) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0475(0.1606) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0820(0.1605) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.1155(0.1606) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 12s) Loss: 0.1442(0.1606) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 13s (remain 8m 8s) Loss: 0.1222(0.1603) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.2785(0.1607) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0810(0.1603) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1948(0.1604) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1612(0.1600) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1162(0.1601) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0833(0.1601) 
EVAL: [1224/3883] Data 0.002 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.2616(0.1601) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1812(0.1601) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3908(0.1606) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2581(0.1607) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0572(0.1606) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.1650(0.1609) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1224(0.1605) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0769(0.1602) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.4134(0.1610) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.0569(0.1607) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0992(0.1608) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1436(0.1606) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1273(0.1602) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.0942(0.1604) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0917(0.1602) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 59s) Loss: 0.3089(0.1598) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3519(0.1596) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3325(0.1597) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.2314(0.1595) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1941(0.1592) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0591(0.1592) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 40s) Loss: 0.3192(0.1595) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2999(0.1602) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1067(0.1603) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1351(0.1604) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2332(0.1606) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0895(0.1603) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.1498(0.1608) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0905(0.1610) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1193(0.1610) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1140(0.1614) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3481(0.1615) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2563(0.1613) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 19s (remain 6m 2s) Loss: 0.0577(0.1613) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0808(0.1614) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0103(0.1611) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3210(0.1612) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0612(0.1612) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1735(0.1613) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 43s) Loss: 0.0412(0.1612) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0557(0.1613) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0927(0.1616) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2122(0.1617) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1713(0.1618) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.1183(0.1620) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0553(0.1618) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3482(0.1618) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2087(0.1621) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0497(0.1619) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1427(0.1619) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0933(0.1616) 
EVAL: [2142/3883] Data 0.002 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.1509(0.1615) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.2055(0.1613) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0383(0.1609) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0791(0.1609) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1740(0.1608) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.3226(0.1609) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4714(0.1612) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.1312(0.1612) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2954(0.1613) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1997(0.1611) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0700(0.1611) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1696(0.1611) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1497(0.1609) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 24s) Loss: 0.3126(0.1610) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3481(0.1608) 
EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 3s (remain 4m 17s) Loss: 0.2191(0.1607) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0873(0.1605) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.1328(0.1602) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0985(0.1602) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.3652(0.1605) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0634(0.1607) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.1097(0.1607) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.1674(0.1611) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4341(0.1610) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4961(0.1610) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0531(0.1612) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1743(0.1613) 
EVAL: [2628/3883] Data 0.002 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0543(0.1614) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2201(0.1613) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.1047(0.1611) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.0751(0.1609) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0896(0.1608) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.2584(0.1608) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0306(0.1610) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.1155(0.1615) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.2865(0.1616) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0568(0.1615) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2481(0.1615) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1486(0.1613) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1604(0.1613) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.4025(0.1612) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0399(0.1610) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2110(0.1609) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2526(0.1608) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2118(0.1607) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0318(0.1607) 
EVAL: [2970/3883] Data 0.002 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0850(0.1606) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0990(0.1606) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1794(0.1605) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.1962(0.1606) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0895(0.1604) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1145(0.1603) 
EVAL: [3078/3883] Data 0.002 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1884(0.1603) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5165(0.1604) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.2237(0.1604) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.1928(0.1603) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.1161(0.1602) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.4552(0.1601) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0659(0.1600) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.1388(0.1600) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1322(0.1600) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4383(0.1601) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0369(0.1601) 
EVAL: [3276/3883] Data 0.002 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1538(0.1600) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1418(0.1601) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0165(0.1602) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2204(0.1600) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.1072(0.1600) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 50s (remain 1m 30s) Loss: 0.0789(0.1599) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.2069(0.1600) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.2183(0.1601) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2647(0.1599) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1387(0.1601) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2248(0.1601) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0411(0.1601) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.1656(0.1602) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0819(0.1601) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5300(0.1602) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2949(0.1603) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2626(0.1601) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0641(0.1601) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.1033(0.1601) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.2881(0.1601) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.1004(0.1603) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.1796(0.1602) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.3772(0.1604) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0389(0.1604) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.1311(0.1605) 
EVAL: [3726/3883] Data 0.002 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.1450(0.1604) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1706(0.1603) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0853(0.1603) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0261(0.1602) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0367(0.1603) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0487(0.1602) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0623(0.1603) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2256(0.1603) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.1332(0.1601) 
Epoch 2 - avg_train_loss: 0.4704  avg_val_loss: 0.1602  time: 1126s
Epoch 2 - AUC: 0.9479023007024554 - pAUC: 0.16179566496960318
Epoch 2 - Save Best Score: 0.1618 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.1898(0.1602) 
Epoch: [3][0/650] Data 0.828 (0.828) Elapsed 0m 1s (remain 13m 58s) Loss: 0.3784(0.3784) Grad: 11.2917  LR: 0.000091  
Epoch: [3][18/650] Data 0.268 (0.293) Elapsed 0m 13s (remain 7m 30s) Loss: 0.2873(0.4024) Grad: 10.2432  LR: 0.000091  
Epoch: [3][36/650] Data 0.272 (0.281) Elapsed 0m 25s (remain 7m 8s) Loss: 0.9090(0.4081) Grad: 28.8463  LR: 0.000091  
Epoch: [3][54/650] Data 0.261 (0.277) Elapsed 0m 38s (remain 6m 52s) Loss: 0.2889(0.4113) Grad: 9.4664  LR: 0.000091  
Epoch: [3][72/650] Data 0.264 (0.275) Elapsed 0m 50s (remain 6m 38s) Loss: 0.4738(0.4275) Grad: 12.0205  LR: 0.000091  
Epoch: [3][90/650] Data 0.251 (0.274) Elapsed 1m 2s (remain 6m 25s) Loss: 1.1058(0.4364) Grad: 30.0984  LR: 0.000091  
Epoch: [3][108/650] Data 0.268 (0.272) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4917(0.4317) Grad: 11.7421  LR: 0.000091  
Epoch: [3][126/650] Data 0.252 (0.271) Elapsed 1m 27s (remain 5m 59s) Loss: 0.5872(0.4287) Grad: 16.3690  LR: 0.000091  
Epoch: [3][144/650] Data 0.270 (0.271) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4533(0.4239) Grad: 9.2913  LR: 0.000091  
Epoch: [3][162/650] Data 0.264 (0.271) Elapsed 1m 52s (remain 5m 34s) Loss: 0.4157(0.4164) Grad: 10.7173  LR: 0.000091  
Epoch: [3][180/650] Data 0.267 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.6966(0.4287) Grad: 15.1735  LR: 0.000091  
Epoch: [3][198/650] Data 0.273 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.5989(0.4238) Grad: 22.1596  LR: 0.000091  
Epoch: [3][216/650] Data 0.272 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3471(0.4190) Grad: 10.7571  LR: 0.000091  
Epoch: [3][234/650] Data 0.273 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.3999(0.4224) Grad: 11.3680  LR: 0.000091  
Epoch: [3][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2138(0.4191) Grad: 7.7851  LR: 0.000091  
Epoch: [3][270/650] Data 0.263 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.3282(0.4192) Grad: 8.2922  LR: 0.000091  
Epoch: [3][288/650] Data 0.271 (0.269) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3682(0.4217) Grad: 11.6487  LR: 0.000091  
Epoch: [3][306/650] Data 0.272 (0.269) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3138(0.4248) Grad: 11.0097  LR: 0.000091  
Epoch: [3][324/650] Data 0.271 (0.269) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1658(0.4227) Grad: 7.1503  LR: 0.000091  
Epoch: [3][342/650] Data 0.271 (0.269) Elapsed 3m 55s (remain 3m 30s) Loss: 0.3155(0.4205) Grad: 8.8760  LR: 0.000091  
Epoch: [3][360/650] Data 0.266 (0.269) Elapsed 4m 7s (remain 3m 18s) Loss: 0.3396(0.4193) Grad: 10.1301  LR: 0.000091  
Epoch: [3][378/650] Data 0.266 (0.269) Elapsed 4m 19s (remain 3m 5s) Loss: 0.5675(0.4191) Grad: 14.2463  LR: 0.000091  
Epoch: [3][396/650] Data 0.270 (0.269) Elapsed 4m 32s (remain 2m 53s) Loss: 0.4167(0.4187) Grad: 12.4221  LR: 0.000091  
Epoch: [3][414/650] Data 0.258 (0.269) Elapsed 4m 44s (remain 2m 41s) Loss: 0.6098(0.4167) Grad: 13.9772  LR: 0.000091  
Epoch: [3][432/650] Data 0.261 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.3655(0.4157) Grad: 9.4036  LR: 0.000091  
Epoch: [3][450/650] Data 0.272 (0.268) Elapsed 5m 8s (remain 2m 16s) Loss: 0.4631(0.4175) Grad: 11.3350  LR: 0.000091  
Epoch: [3][468/650] Data 0.259 (0.268) Elapsed 5m 21s (remain 2m 3s) Loss: 0.2234(0.4194) Grad: 7.3291  LR: 0.000091  
Epoch: [3][486/650] Data 0.271 (0.268) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2298(0.4184) Grad: 9.0037  LR: 0.000091  
Epoch: [3][504/650] Data 0.272 (0.268) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1840(0.4182) Grad: 6.3553  LR: 0.000091  
Epoch: [3][522/650] Data 0.271 (0.268) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1830(0.4170) Grad: 7.4448  LR: 0.000091  
Epoch: [3][540/650] Data 0.270 (0.268) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4438(0.4147) Grad: 11.7441  LR: 0.000091  
Epoch: [3][558/650] Data 0.270 (0.268) Elapsed 6m 22s (remain 1m 2s) Loss: 0.4030(0.4136) Grad: 11.9621  LR: 0.000091  
Epoch: [3][576/650] Data 0.258 (0.268) Elapsed 6m 35s (remain 0m 49s) Loss: 0.3255(0.4111) Grad: 9.3649  LR: 0.000091  
Epoch: [3][594/650] Data 0.271 (0.268) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3872(0.4110) Grad: 10.8818  LR: 0.000091  
Epoch: [3][612/650] Data 0.271 (0.268) Elapsed 6m 59s (remain 0m 25s) Loss: 0.3804(0.4101) Grad: 11.1846  LR: 0.000091  
Epoch: [3][630/650] Data 0.270 (0.268) Elapsed 7m 12s (remain 0m 13s) Loss: 0.7112(0.4090) Grad: 16.2961  LR: 0.000091  
Epoch: [3][648/650] Data 0.271 (0.268) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2905(0.4076) Grad: 7.7534  LR: 0.000091  
Epoch: [3][649/650] Data 0.273 (0.268) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2781(0.4074) Grad: 7.3812  LR: 0.000091  
EVAL: [0/3883] Data 0.601 (0.601) Elapsed 0m 0s (remain 50m 23s) Loss: 0.2910(0.2910) 
EVAL: [18/3883] Data 0.001 (0.033) Elapsed 0m 3s (remain 13m 20s) Loss: 0.2816(0.1745) 
EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 16s) Loss: 0.1325(0.1669) 
EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 52s) Loss: 0.0291(0.1599) 
EVAL: [72/3883] Data 0.001 (0.009) Elapsed 0m 13s (remain 11m 38s) Loss: 0.2609(0.1723) 
EVAL: [90/3883] Data 0.001 (0.008) Elapsed 0m 16s (remain 11m 29s) Loss: 0.3429(0.1755) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.3869(0.1838) 
EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.3862(0.1921) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.1768(0.1918) 
EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0688(0.1921) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0352(0.1871) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.1562(0.1884) 
EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.1298(0.1879) 
EVAL: [234/3883] Data 0.001 (0.004) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1438(0.1865) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.2176(0.1825) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.1207(0.1848) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.3173(0.1880) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.1768(0.1878) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.1491(0.1880) 
EVAL: [342/3883] Data 0.002 (0.003) Elapsed 1m 0s (remain 10m 26s) Loss: 0.0209(0.1888) 
EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1015(0.1891) 
EVAL: [378/3883] Data 0.002 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.1924(0.1896) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.3370(0.1884) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.2262(0.1890) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 9s) Loss: 0.1208(0.1891) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.3187(0.1903) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.0561(0.1919) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 59s) Loss: 0.2520(0.1913) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.1054(0.1917) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.4699(0.1912) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 49s) Loss: 0.1210(0.1903) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3499(0.1911) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0792(0.1903) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 39s) Loss: 0.1545(0.1892) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1606(0.1894) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.1086(0.1893) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0478(0.1894) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 26s) Loss: 0.1153(0.1893) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1421(0.1894) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1611(0.1876) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.2463(0.1874) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 10s (remain 9m 13s) Loss: 0.0635(0.1874) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1968(0.1873) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.1018(0.1873) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.2082(0.1881) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0407(0.1881) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 57s) Loss: 0.0566(0.1887) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0754(0.1893) 
EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.3035(0.1901) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0298(0.1898) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2744(0.1896) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 41s) Loss: 0.2739(0.1902) 
EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2717(0.1901) 
EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.1349(0.1895) 
EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 51s (remain 8m 31s) Loss: 0.0522(0.1896) 
EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.1668(0.1892) 
EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 25s) Loss: 0.3312(0.1895) 
EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0278(0.1886) 
EVAL: [1044/3883] Data 0.001 (0.002) Elapsed 3m 3s (remain 8m 18s) Loss: 0.1095(0.1888) 
EVAL: [1062/3883] Data 0.001 (0.002) Elapsed 3m 6s (remain 8m 15s) Loss: 0.2189(0.1888) 
EVAL: [1080/3883] Data 0.001 (0.002) Elapsed 3m 9s (remain 8m 12s) Loss: 0.1481(0.1884) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 13s (remain 8m 9s) Loss: 0.1550(0.1878) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.1831(0.1881) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0593(0.1877) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.2231(0.1880) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1820(0.1874) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 53s) Loss: 0.2113(0.1875) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 32s (remain 7m 50s) Loss: 0.0854(0.1872) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.3458(0.1872) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.2700(0.1875) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4275(0.1879) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2915(0.1882) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 34s) Loss: 0.0369(0.1881) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.1637(0.1884) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 54s (remain 7m 27s) Loss: 0.2470(0.1881) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.1018(0.1882) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.4251(0.1893) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.1210(0.1890) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 15s) Loss: 0.1756(0.1891) 
EVAL: [1422/3883] Data 0.002 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1631(0.1890) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.2113(0.1884) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.1262(0.1886) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.2412(0.1883) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 59s) Loss: 0.3796(0.1882) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 56s) Loss: 0.4339(0.1882) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2757(0.1882) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.2337(0.1880) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 35s (remain 6m 46s) Loss: 0.2390(0.1877) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0368(0.1878) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 40s) Loss: 0.2864(0.1881) 
EVAL: [1620/3883] Data 0.002 (0.001) Elapsed 4m 44s (remain 6m 37s) Loss: 0.3668(0.1889) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2261(0.1891) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1493(0.1892) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2048(0.1894) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 57s (remain 6m 24s) Loss: 0.1273(0.1892) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 21s) Loss: 0.1908(0.1898) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 18s) Loss: 0.1013(0.1901) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1721(0.1903) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1395(0.1906) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3675(0.1906) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 16s (remain 6m 5s) Loss: 0.2951(0.1905) 
EVAL: [1818/3883] Data 0.002 (0.001) Elapsed 5m 19s (remain 6m 2s) Loss: 0.0614(0.1905) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 59s) Loss: 0.1094(0.1907) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0195(0.1906) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3539(0.1907) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.1111(0.1907) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1746(0.1907) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 38s (remain 5m 43s) Loss: 0.0203(0.1905) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 40s) Loss: 0.0824(0.1906) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1172(0.1910) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2845(0.1912) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1303(0.1912) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.1271(0.1914) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 57s (remain 5m 24s) Loss: 0.0948(0.1913) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 6m 0s (remain 5m 21s) Loss: 0.4554(0.1911) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2849(0.1915) 
EVAL: [2088/3883] Data 0.002 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0671(0.1912) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1996(0.1912) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0951(0.1908) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.1733(0.1906) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 19s (remain 5m 2s) Loss: 0.1800(0.1903) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0636(0.1900) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.1252(0.1901) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1880(0.1900) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2995(0.1900) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5661(0.1902) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 38s (remain 4m 43s) Loss: 0.0872(0.1903) 
EVAL: [2286/3883] Data 0.002 (0.001) Elapsed 6m 41s (remain 4m 39s) Loss: 0.3239(0.1901) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.2871(0.1900) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0610(0.1898) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.2039(0.1897) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3252(0.1895) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 24s) Loss: 0.3446(0.1895) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 7m 0s (remain 4m 20s) Loss: 0.4761(0.1894) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 3s (remain 4m 17s) Loss: 0.2868(0.1895) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0533(0.1892) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.1333(0.1889) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.1463(0.1888) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2548(0.1890) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 2s) Loss: 0.0860(0.1893) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 22s (remain 3m 58s) Loss: 0.1616(0.1891) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.2004(0.1894) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5147(0.1893) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4913(0.1892) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0614(0.1893) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 43s) Loss: 0.2861(0.1895) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 41s (remain 3m 39s) Loss: 0.1018(0.1895) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 44s (remain 3m 36s) Loss: 0.2592(0.1895) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.1314(0.1892) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1332(0.1890) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.1461(0.1889) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.3235(0.1888) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0633(0.1889) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 3s (remain 3m 17s) Loss: 0.1780(0.1893) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.2831(0.1895) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0865(0.1894) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.3453(0.1894) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.2211(0.1892) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 2s) Loss: 0.2677(0.1892) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 22s (remain 2m 58s) Loss: 0.3437(0.1892) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 25s (remain 2m 55s) Loss: 0.0621(0.1890) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.3153(0.1890) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2297(0.1890) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3656(0.1888) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 43s) Loss: 0.0387(0.1889) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.1335(0.1889) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 44s (remain 2m 36s) Loss: 0.1914(0.1890) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 47s (remain 2m 33s) Loss: 0.1904(0.1888) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3159(0.1888) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.1018(0.1886) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.2200(0.1884) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.3132(0.1884) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 3s (remain 2m 17s) Loss: 0.5755(0.1884) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 6s (remain 2m 14s) Loss: 0.2952(0.1885) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.1730(0.1884) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.1861(0.1883) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.4344(0.1883) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 2s) Loss: 0.0248(0.1880) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.2072(0.1881) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 25s (remain 1m 55s) Loss: 0.1349(0.1880) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 28s (remain 1m 52s) Loss: 0.5892(0.1881) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0693(0.1880) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.2261(0.1879) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.2283(0.1880) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0369(0.1882) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2752(0.1880) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 47s (remain 1m 33s) Loss: 0.0887(0.1881) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 50s (remain 1m 30s) Loss: 0.0471(0.1879) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.3108(0.1881) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.2136(0.1882) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2830(0.1879) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2344(0.1882) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 6s (remain 1m 14s) Loss: 0.3376(0.1882) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 9s (remain 1m 11s) Loss: 0.0597(0.1882) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.2252(0.1883) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0904(0.1882) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6235(0.1884) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.3725(0.1884) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2265(0.1882) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 28s (remain 0m 52s) Loss: 0.1061(0.1882) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 31s (remain 0m 49s) Loss: 0.1099(0.1883) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.2045(0.1884) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0505(0.1885) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.1724(0.1884) 
EVAL: [3672/3883] Data 0.002 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.6321(0.1886) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 47s (remain 0m 33s) Loss: 0.0539(0.1886) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 50s (remain 0m 30s) Loss: 0.1829(0.1887) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.2587(0.1886) 
EVAL: [3744/3883] Data 0.002 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.2037(0.1887) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0769(0.1886) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0346(0.1885) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0372(0.1885) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 9s (remain 0m 11s) Loss: 0.0599(0.1886) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 12s (remain 0m 8s) Loss: 0.0874(0.1886) 
EVAL: [3852/3883] Data 0.002 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2689(0.1887) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0731(0.1884) 
Epoch 3 - avg_train_loss: 0.4074  avg_val_loss: 0.1884  time: 1126s
Epoch 3 - AUC: 0.9572403385534145 - pAUC: 0.1689444909883783
Epoch 3 - Save Best Score: 0.1689 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.3771(0.1884) 
Epoch: [4][0/650] Data 0.686 (0.686) Elapsed 0m 1s (remain 12m 14s) Loss: 0.5161(0.5161) Grad: 14.3878  LR: 0.000080  
Epoch: [4][18/650] Data 0.265 (0.285) Elapsed 0m 13s (remain 7m 26s) Loss: 0.2806(0.3602) Grad: 8.6952  LR: 0.000080  
Epoch: [4][36/650] Data 0.262 (0.274) Elapsed 0m 25s (remain 7m 6s) Loss: 0.5517(0.3735) Grad: 16.1617  LR: 0.000080  
Epoch: [4][54/650] Data 0.271 (0.271) Elapsed 0m 38s (remain 6m 51s) Loss: 0.2280(0.4022) Grad: 8.3559  LR: 0.000080  
Epoch: [4][72/650] Data 0.271 (0.270) Elapsed 0m 50s (remain 6m 38s) Loss: 0.2354(0.3928) Grad: 8.8909  LR: 0.000080  
Epoch: [4][90/650] Data 0.270 (0.270) Elapsed 1m 2s (remain 6m 24s) Loss: 0.4970(0.3810) Grad: 14.7302  LR: 0.000080  
Epoch: [4][108/650] Data 0.272 (0.270) Elapsed 1m 14s (remain 6m 12s) Loss: 0.4437(0.3693) Grad: 11.2275  LR: 0.000080  
Epoch: [4][126/650] Data 0.271 (0.269) Elapsed 1m 27s (remain 5m 59s) Loss: 0.5415(0.3690) Grad: 11.1365  LR: 0.000080  
Epoch: [4][144/650] Data 0.270 (0.269) Elapsed 1m 39s (remain 5m 46s) Loss: 0.3211(0.3665) Grad: 8.2511  LR: 0.000080  
Epoch: [4][162/650] Data 0.273 (0.267) Elapsed 1m 51s (remain 5m 34s) Loss: 0.5143(0.3617) Grad: 14.5998  LR: 0.000080  
Epoch: [4][180/650] Data 0.272 (0.267) Elapsed 2m 4s (remain 5m 21s) Loss: 0.2955(0.3700) Grad: 8.8253  LR: 0.000080  
Epoch: [4][198/650] Data 0.268 (0.267) Elapsed 2m 16s (remain 5m 9s) Loss: 0.5427(0.3687) Grad: 15.2589  LR: 0.000080  
Epoch: [4][216/650] Data 0.272 (0.267) Elapsed 2m 28s (remain 4m 56s) Loss: 0.3833(0.3719) Grad: 10.8625  LR: 0.000080  
Epoch: [4][234/650] Data 0.269 (0.267) Elapsed 2m 41s (remain 4m 44s) Loss: 0.1834(0.3736) Grad: 6.2694  LR: 0.000080  
Epoch: [4][252/650] Data 0.264 (0.267) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2234(0.3712) Grad: 7.4084  LR: 0.000080  
Epoch: [4][270/650] Data 0.263 (0.267) Elapsed 3m 5s (remain 4m 19s) Loss: 0.3614(0.3713) Grad: 9.3921  LR: 0.000080  
Epoch: [4][288/650] Data 0.266 (0.267) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1921(0.3698) Grad: 5.9696  LR: 0.000080  
Epoch: [4][306/650] Data 0.270 (0.267) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3210(0.3723) Grad: 13.2074  LR: 0.000080  
Epoch: [4][324/650] Data 0.261 (0.267) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1754(0.3719) Grad: 7.1664  LR: 0.000080  
Epoch: [4][342/650] Data 0.272 (0.267) Elapsed 3m 54s (remain 3m 30s) Loss: 0.5245(0.3736) Grad: 13.0820  LR: 0.000080  
Epoch: [4][360/650] Data 0.271 (0.267) Elapsed 4m 7s (remain 3m 17s) Loss: 0.3468(0.3727) Grad: 10.1481  LR: 0.000080  
Epoch: [4][378/650] Data 0.268 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4454(0.3717) Grad: 13.9034  LR: 0.000080  
Epoch: [4][396/650] Data 0.259 (0.267) Elapsed 4m 31s (remain 2m 53s) Loss: 0.2754(0.3706) Grad: 6.4313  LR: 0.000080  
Epoch: [4][414/650] Data 0.271 (0.267) Elapsed 4m 44s (remain 2m 40s) Loss: 0.6202(0.3708) Grad: 12.9967  LR: 0.000080  
Epoch: [4][432/650] Data 0.259 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5804(0.3724) Grad: 12.1082  LR: 0.000080  
Epoch: [4][450/650] Data 0.271 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.3864(0.3757) Grad: 9.0371  LR: 0.000080  
Epoch: [4][468/650] Data 0.271 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.2503(0.3775) Grad: 7.8070  LR: 0.000080  
Epoch: [4][486/650] Data 0.265 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.3014(0.3765) Grad: 9.6994  LR: 0.000080  
Epoch: [4][504/650] Data 0.269 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.3121(0.3782) Grad: 11.0597  LR: 0.000080  
Epoch: [4][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1828(0.3775) Grad: 7.3395  LR: 0.000080  
Epoch: [4][540/650] Data 0.266 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.2960(0.3762) Grad: 9.0681  LR: 0.000080  
Epoch: [4][558/650] Data 0.272 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.0975(0.3755) Grad: 4.3532  LR: 0.000080  
Epoch: [4][576/650] Data 0.272 (0.267) Elapsed 6m 34s (remain 0m 49s) Loss: 0.1466(0.3741) Grad: 5.3166  LR: 0.000080  
Epoch: [4][594/650] Data 0.271 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3465(0.3749) Grad: 10.6198  LR: 0.000080  
Epoch: [4][612/650] Data 0.268 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1756(0.3742) Grad: 5.4288  LR: 0.000080  
Epoch: [4][630/650] Data 0.270 (0.267) Elapsed 7m 11s (remain 0m 13s) Loss: 0.3968(0.3753) Grad: 11.0711  LR: 0.000080  
Epoch: [4][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.3950(0.3740) Grad: 9.7122  LR: 0.000080  
Epoch: [4][649/650] Data 0.273 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2463(0.3738) Grad: 7.9042  LR: 0.000080  
EVAL: [0/3883] Data 0.517 (0.517) Elapsed 0m 0s (remain 45m 2s) Loss: 0.1744(0.1744) 
EVAL: [18/3883] Data 0.001 (0.028) Elapsed 0m 3s (remain 13m 3s) Loss: 0.0904(0.0916) 
EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 7s (remain 12m 7s) Loss: 0.0279(0.0933) 
EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 46s) Loss: 0.0095(0.0953) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 34s) Loss: 0.2201(0.1056) 
EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 25s) Loss: 0.2000(0.1101) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 19s) Loss: 0.2576(0.1140) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 13s) Loss: 0.2517(0.1213) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 7s) Loss: 0.2046(0.1214) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0318(0.1208) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.0070(0.1170) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 54s) Loss: 0.0571(0.1194) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.0684(0.1189) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1119(0.1178) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.1373(0.1154) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 39s) Loss: 0.0585(0.1177) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.2303(0.1211) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0783(0.1208) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0541(0.1200) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0056(0.1209) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0679(0.1211) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.1329(0.1212) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 14s) Loss: 0.2599(0.1212) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1759(0.1217) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0501(0.1221) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.2530(0.1221) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0374(0.1230) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1687(0.1221) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0701(0.1222) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.4162(0.1222) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0462(0.1210) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.2565(0.1218) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0497(0.1209) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0988(0.1204) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1239(0.1201) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0357(0.1200) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0214(0.1204) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1063(0.1206) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1424(0.1210) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1193(0.1198) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1081(0.1192) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0191(0.1193) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1421(0.1194) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0749(0.1194) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1189(0.1199) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0176(0.1199) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0231(0.1201) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0293(0.1204) 
EVAL: [864/3883] Data 0.002 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2445(0.1211) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0061(0.1211) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2322(0.1210) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1425(0.1212) 
EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.1735(0.1212) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0570(0.1207) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0210(0.1210) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 54s (remain 8m 27s) Loss: 0.1130(0.1207) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.2202(0.1210) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0312(0.1204) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0473(0.1204) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0818(0.1206) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0937(0.1205) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0801(0.1199) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.1384(0.1201) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0367(0.1196) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1314(0.1197) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.0794(0.1192) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0985(0.1192) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0231(0.1190) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.2084(0.1190) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1423(0.1189) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3262(0.1191) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2441(0.1194) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0267(0.1195) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0851(0.1198) 
EVAL: [1332/3883] Data 0.002 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.0855(0.1193) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0145(0.1192) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.3208(0.1198) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0439(0.1195) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0580(0.1196) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1370(0.1196) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.0706(0.1191) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0640(0.1192) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0973(0.1190) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2943(0.1188) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3109(0.1187) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2503(0.1188) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.1211(0.1185) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1949(0.1182) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 43s) Loss: 0.0171(0.1182) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2507(0.1186) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2390(0.1192) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1125(0.1194) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1062(0.1194) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1613(0.1197) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0368(0.1196) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0915(0.1200) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0432(0.1201) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0965(0.1201) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.0704(0.1204) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3704(0.1205) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2139(0.1205) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0328(0.1205) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0566(0.1207) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0041(0.1206) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2407(0.1207) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0307(0.1208) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1211(0.1208) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0077(0.1206) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0348(0.1207) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0679(0.1210) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1704(0.1210) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1392(0.1211) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0269(0.1213) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0545(0.1212) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3110(0.1213) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.1791(0.1215) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0198(0.1214) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1074(0.1214) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0632(0.1210) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0730(0.1210) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1044(0.1207) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0148(0.1204) 
EVAL: [2196/3883] Data 0.002 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0376(0.1204) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1374(0.1204) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2171(0.1205) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4115(0.1207) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0834(0.1208) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1775(0.1207) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1654(0.1205) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0457(0.1205) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1289(0.1205) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1745(0.1203) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2559(0.1205) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3498(0.1203) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1721(0.1203) 
EVAL: [2430/3883] Data 0.002 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0130(0.1201) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0988(0.1198) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0732(0.1197) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2352(0.1200) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0292(0.1201) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0827(0.1201) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0991(0.1204) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4510(0.1204) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4533(0.1204) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0364(0.1205) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1393(0.1207) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0252(0.1208) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1561(0.1206) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0534(0.1206) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1192(0.1205) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0751(0.1204) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1639(0.1204) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0222(0.1205) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0977(0.1209) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1908(0.1209) 
EVAL: [2790/3883] Data 0.002 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0522(0.1208) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1483(0.1209) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.0817(0.1207) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1356(0.1207) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3131(0.1208) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0131(0.1205) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2349(0.1205) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1541(0.1205) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2375(0.1204) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0162(0.1205) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0534(0.1204) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0924(0.1205) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1318(0.1204) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.2042(0.1204) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0462(0.1202) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1251(0.1202) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1728(0.1202) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5085(0.1202) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1523(0.1203) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.1005(0.1202) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0726(0.1200) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.3385(0.1200) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0068(0.1198) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0890(0.1198) 
EVAL: [3222/3883] Data 0.002 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1349(0.1198) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4512(0.1199) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0236(0.1199) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1464(0.1198) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1414(0.1199) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0097(0.1200) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.1940(0.1198) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0519(0.1198) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0451(0.1198) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.2096(0.1199) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1446(0.1200) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2546(0.1199) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1266(0.1201) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2522(0.1201) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0237(0.1202) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1047(0.1202) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0759(0.1201) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5256(0.1203) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2270(0.1204) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.2062(0.1201) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0461(0.1201) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0664(0.1202) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1970(0.1202) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0520(0.1203) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0692(0.1202) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4023(0.1203) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0137(0.1203) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0711(0.1203) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1352(0.1202) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1439(0.1203) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0819(0.1202) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0124(0.1202) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0090(0.1202) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0227(0.1202) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0326(0.1202) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2164(0.1203) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0509(0.1201) 
Epoch 4 - avg_train_loss: 0.3738  avg_val_loss: 0.1201  time: 1125s
Epoch 4 - AUC: 0.9644866920152091 - pAUC: 0.17463384245236402
Epoch 4 - Save Best Score: 0.1746 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2134(0.1201) 
Epoch: [5][0/650] Data 0.717 (0.717) Elapsed 0m 1s (remain 12m 37s) Loss: 0.1759(0.1759) Grad: 5.9223  LR: 0.000066  
Epoch: [5][18/650] Data 0.258 (0.289) Elapsed 0m 13s (remain 7m 27s) Loss: 0.1697(0.2975) Grad: 5.3467  LR: 0.000066  
Epoch: [5][36/650] Data 0.272 (0.280) Elapsed 0m 25s (remain 7m 7s) Loss: 0.4989(0.3398) Grad: 17.8310  LR: 0.000066  
Epoch: [5][54/650] Data 0.267 (0.274) Elapsed 0m 38s (remain 6m 51s) Loss: 0.4198(0.3398) Grad: 9.3448  LR: 0.000066  
Epoch: [5][72/650] Data 0.268 (0.272) Elapsed 0m 50s (remain 6m 38s) Loss: 0.1747(0.3416) Grad: 5.6316  LR: 0.000066  
Epoch: [5][90/650] Data 0.258 (0.271) Elapsed 1m 2s (remain 6m 25s) Loss: 0.7529(0.3419) Grad: 13.8368  LR: 0.000066  
Epoch: [5][108/650] Data 0.270 (0.270) Elapsed 1m 14s (remain 6m 12s) Loss: 0.3285(0.3356) Grad: 8.2665  LR: 0.000066  
Epoch: [5][126/650] Data 0.268 (0.270) Elapsed 1m 27s (remain 5m 59s) Loss: 0.4040(0.3349) Grad: 7.7235  LR: 0.000066  
Epoch: [5][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 46s) Loss: 0.4024(0.3342) Grad: 9.2124  LR: 0.000066  
Epoch: [5][162/650] Data 0.271 (0.270) Elapsed 1m 51s (remain 5m 34s) Loss: 0.2935(0.3355) Grad: 9.3955  LR: 0.000066  
Epoch: [5][180/650] Data 0.257 (0.269) Elapsed 2m 4s (remain 5m 21s) Loss: 0.3530(0.3429) Grad: 10.1772  LR: 0.000066  
Epoch: [5][198/650] Data 0.272 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4249(0.3399) Grad: 13.0205  LR: 0.000066  
Epoch: [5][216/650] Data 0.270 (0.269) Elapsed 2m 28s (remain 4m 56s) Loss: 0.3282(0.3373) Grad: 8.8688  LR: 0.000066  
Epoch: [5][234/650] Data 0.270 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.5423(0.3415) Grad: 10.8492  LR: 0.000066  
Epoch: [5][252/650] Data 0.271 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.2552(0.3411) Grad: 7.4406  LR: 0.000066  
Epoch: [5][270/650] Data 0.270 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.2963(0.3379) Grad: 10.1796  LR: 0.000066  
Epoch: [5][288/650] Data 0.271 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.2279(0.3368) Grad: 7.5042  LR: 0.000066  
Epoch: [5][306/650] Data 0.273 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.4556(0.3375) Grad: 10.9485  LR: 0.000066  
Epoch: [5][324/650] Data 0.267 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1420(0.3360) Grad: 8.8681  LR: 0.000066  
Epoch: [5][342/650] Data 0.270 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.1718(0.3342) Grad: 5.5009  LR: 0.000066  
Epoch: [5][360/650] Data 0.262 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.3163(0.3360) Grad: 12.0073  LR: 0.000066  
Epoch: [5][378/650] Data 0.272 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.5190(0.3367) Grad: 15.7462  LR: 0.000066  
Epoch: [5][396/650] Data 0.260 (0.268) Elapsed 4m 31s (remain 2m 53s) Loss: 0.2492(0.3349) Grad: 8.5157  LR: 0.000066  
Epoch: [5][414/650] Data 0.272 (0.268) Elapsed 4m 44s (remain 2m 40s) Loss: 0.2921(0.3327) Grad: 7.1778  LR: 0.000066  
Epoch: [5][432/650] Data 0.272 (0.268) Elapsed 4m 56s (remain 2m 28s) Loss: 0.6185(0.3317) Grad: 19.2257  LR: 0.000066  
Epoch: [5][450/650] Data 0.272 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.2464(0.3328) Grad: 7.0113  LR: 0.000066  
Epoch: [5][468/650] Data 0.262 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.1170(0.3356) Grad: 5.1384  LR: 0.000066  
Epoch: [5][486/650] Data 0.271 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2204(0.3359) Grad: 6.6311  LR: 0.000066  
Epoch: [5][504/650] Data 0.261 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1590(0.3356) Grad: 4.3151  LR: 0.000066  
Epoch: [5][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0500(0.3347) Grad: 2.0946  LR: 0.000066  
Epoch: [5][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.6337(0.3337) Grad: 16.5904  LR: 0.000066  
Epoch: [5][558/650] Data 0.264 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1482(0.3346) Grad: 4.7727  LR: 0.000066  
Epoch: [5][576/650] Data 0.265 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.2739(0.3331) Grad: 10.2463  LR: 0.000066  
Epoch: [5][594/650] Data 0.240 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3891(0.3348) Grad: 9.1534  LR: 0.000066  
Epoch: [5][612/650] Data 0.262 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.5128(0.3350) Grad: 14.7601  LR: 0.000066  
Epoch: [5][630/650] Data 0.272 (0.267) Elapsed 7m 11s (remain 0m 13s) Loss: 1.2112(0.3351) Grad: 18.7087  LR: 0.000066  
Epoch: [5][648/650] Data 0.273 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.4695(0.3367) Grad: 12.8143  LR: 0.000066  
Epoch: [5][649/650] Data 0.274 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.4268(0.3368) Grad: 11.7171  LR: 0.000066  
EVAL: [0/3883] Data 0.602 (0.602) Elapsed 0m 0s (remain 50m 18s) Loss: 0.0588(0.0588) 
EVAL: [18/3883] Data 0.001 (0.033) Elapsed 0m 3s (remain 13m 19s) Loss: 0.0497(0.0475) 
EVAL: [36/3883] Data 0.001 (0.017) Elapsed 0m 7s (remain 12m 16s) Loss: 0.0143(0.0542) 
EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 52s) Loss: 0.0033(0.0541) 
EVAL: [72/3883] Data 0.001 (0.009) Elapsed 0m 13s (remain 11m 38s) Loss: 0.1774(0.0628) 
EVAL: [90/3883] Data 0.001 (0.008) Elapsed 0m 16s (remain 11m 29s) Loss: 0.1237(0.0705) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 21s) Loss: 0.1626(0.0725) 
EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 15s) Loss: 0.2051(0.0760) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 10s) Loss: 0.0994(0.0737) 
EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0122(0.0736) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 0s) Loss: 0.0023(0.0717) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.0158(0.0752) 
EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 51s) Loss: 0.0315(0.0756) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 48s) Loss: 0.0976(0.0745) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.0433(0.0721) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.0192(0.0744) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.1576(0.0765) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.0380(0.0764) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0263(0.0758) 
EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0043(0.0772) 
EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.0253(0.0770) 
EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0295(0.0768) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.1876(0.0765) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1282(0.0775) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0238(0.0775) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1296(0.0780) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0123(0.0785) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.1640(0.0779) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 55s) Loss: 0.0436(0.0780) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.1649(0.0777) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0437(0.0768) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.2094(0.0774) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0059(0.0768) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0822(0.0762) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.1109(0.0758) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0252(0.0758) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0087(0.0759) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1095(0.0758) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.0964(0.0758) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.0443(0.0749) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.0774(0.0745) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0098(0.0746) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.0656(0.0746) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0344(0.0743) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.0835(0.0748) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0079(0.0747) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0116(0.0751) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0134(0.0753) 
EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.1624(0.0759) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0029(0.0759) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.1615(0.0759) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.0804(0.0761) 
EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.0738(0.0760) 
EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0498(0.0756) 
EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0209(0.0760) 
EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 27s) Loss: 0.0594(0.0756) 
EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.1562(0.0758) 
EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0105(0.0754) 
EVAL: [1044/3883] Data 0.002 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0110(0.0755) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0657(0.0754) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0595(0.0752) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0641(0.0749) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0719(0.0748) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0091(0.0746) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.0887(0.0745) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.0637(0.0742) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0417(0.0742) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0163(0.0741) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.1403(0.0742) 
EVAL: [1242/3883] Data 0.002 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.0905(0.0742) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.3220(0.0745) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.1530(0.0749) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0106(0.0750) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0334(0.0752) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.0804(0.0752) 
EVAL: [1350/3883] Data 0.002 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0020(0.0752) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2127(0.0756) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0189(0.0754) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0509(0.0755) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.0660(0.0754) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.0574(0.0751) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0215(0.0752) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0470(0.0750) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.1523(0.0749) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.2545(0.0749) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.1618(0.0749) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0751(0.0748) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.1615(0.0745) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0056(0.0746) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.1515(0.0748) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1545(0.0752) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1542(0.0755) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.0752(0.0755) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1375(0.0756) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0117(0.0755) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0410(0.0757) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0228(0.0757) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0820(0.0757) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.0582(0.0758) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.2917(0.0759) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.1685(0.0760) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0205(0.0759) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0303(0.0761) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0013(0.0761) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.1502(0.0762) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0159(0.0762) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0720(0.0763) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0019(0.0762) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0068(0.0764) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.0766(0.0766) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1369(0.0767) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1648(0.0767) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0100(0.0770) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0161(0.0770) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.1857(0.0769) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.1530(0.0770) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0099(0.0769) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0753(0.0769) 
EVAL: [2124/3883] Data 0.002 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0215(0.0766) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0440(0.0765) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.0560(0.0763) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0082(0.0761) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0441(0.0761) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0658(0.0760) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1798(0.0761) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.4081(0.0763) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0296(0.0764) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1495(0.0764) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.0978(0.0762) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0129(0.0762) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0920(0.0762) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.1591(0.0761) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1886(0.0763) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.2469(0.0763) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1245(0.0762) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0063(0.0761) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0536(0.0759) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0411(0.0758) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.1582(0.0760) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0383(0.0762) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0345(0.0761) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0700(0.0763) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.3643(0.0763) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.4259(0.0764) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0195(0.0765) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.0881(0.0766) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0137(0.0767) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.0793(0.0766) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0307(0.0766) 
EVAL: [2682/3883] Data 0.002 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.0757(0.0765) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0265(0.0765) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1416(0.0765) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0171(0.0766) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0432(0.0770) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.0990(0.0769) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0068(0.0769) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1253(0.0769) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.0576(0.0768) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.0729(0.0768) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.1980(0.0768) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0140(0.0767) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2125(0.0767) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.0896(0.0766) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.1641(0.0766) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0048(0.0765) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0215(0.0765) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0359(0.0766) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0501(0.0765) 
EVAL: [3024/3883] Data 0.002 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.1773(0.0765) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0305(0.0763) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.0624(0.0764) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1505(0.0763) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.3916(0.0764) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.0713(0.0764) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0710(0.0763) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0602(0.0762) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.1611(0.0761) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0043(0.0760) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0123(0.0759) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1212(0.0759) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.3712(0.0760) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0189(0.0761) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0440(0.0760) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1172(0.0761) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0023(0.0761) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.1429(0.0760) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0461(0.0760) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0097(0.0760) 
EVAL: [3384/3883] Data 0.003 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1002(0.0760) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1016(0.0761) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1569(0.0760) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.0987(0.0761) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.1878(0.0762) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0157(0.0762) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.0398(0.0763) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0289(0.0762) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5603(0.0765) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.1925(0.0765) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.0781(0.0763) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0247(0.0764) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0341(0.0764) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1679(0.0764) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0117(0.0765) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0180(0.0764) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.3095(0.0765) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0208(0.0766) 
EVAL: [3708/3883] Data 0.002 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0253(0.0767) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.0846(0.0766) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.0817(0.0767) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0124(0.0766) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0040(0.0766) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0063(0.0766) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0110(0.0767) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0281(0.0767) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.1866(0.0768) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0265(0.0766) 
Epoch 5 - avg_train_loss: 0.3368  avg_val_loss: 0.0766  time: 1125s
Epoch 5 - AUC: 0.9636182896178385 - pAUC: 0.17513211316620478
Epoch 5 - Save Best Score: 0.1751 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.1657(0.0766) 
Epoch: [6][0/650] Data 0.863 (0.863) Elapsed 0m 1s (remain 14m 8s) Loss: 0.4135(0.4135) Grad: 10.3022  LR: 0.000051  
Epoch: [6][18/650] Data 0.272 (0.295) Elapsed 0m 13s (remain 7m 31s) Loss: 0.2558(0.3027) Grad: 9.4665  LR: 0.000051  
Epoch: [6][36/650] Data 0.272 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.7502(0.3213) Grad: 19.7929  LR: 0.000051  
Epoch: [6][54/650] Data 0.272 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.2778(0.3280) Grad: 8.3278  LR: 0.000051  
Epoch: [6][72/650] Data 0.270 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1938(0.3381) Grad: 5.6240  LR: 0.000051  
Epoch: [6][90/650] Data 0.269 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.4761(0.3438) Grad: 13.7877  LR: 0.000051  
Epoch: [6][108/650] Data 0.272 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4925(0.3434) Grad: 13.0967  LR: 0.000051  
Epoch: [6][126/650] Data 0.254 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.2403(0.3350) Grad: 7.3619  LR: 0.000051  
Epoch: [6][144/650] Data 0.271 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.4898(0.3346) Grad: 11.5122  LR: 0.000051  
Epoch: [6][162/650] Data 0.272 (0.270) Elapsed 1m 52s (remain 5m 34s) Loss: 0.2717(0.3278) Grad: 5.9697  LR: 0.000051  
Epoch: [6][180/650] Data 0.271 (0.270) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2895(0.3302) Grad: 9.1390  LR: 0.000051  
Epoch: [6][198/650] Data 0.263 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4375(0.3285) Grad: 13.7522  LR: 0.000051  
Epoch: [6][216/650] Data 0.270 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3005(0.3269) Grad: 6.7133  LR: 0.000051  
Epoch: [6][234/650] Data 0.266 (0.269) Elapsed 2m 41s (remain 4m 44s) Loss: 0.4027(0.3250) Grad: 8.7941  LR: 0.000051  
Epoch: [6][252/650] Data 0.264 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.4270(0.3273) Grad: 15.2972  LR: 0.000051  
Epoch: [6][270/650] Data 0.269 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.2048(0.3263) Grad: 5.9822  LR: 0.000051  
Epoch: [6][288/650] Data 0.266 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3121(0.3239) Grad: 6.9544  LR: 0.000051  
Epoch: [6][306/650] Data 0.266 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3492(0.3253) Grad: 12.4343  LR: 0.000051  
Epoch: [6][324/650] Data 0.270 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2124(0.3242) Grad: 6.5450  LR: 0.000051  
Epoch: [6][342/650] Data 0.271 (0.267) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2346(0.3212) Grad: 7.9441  LR: 0.000051  
Epoch: [6][360/650] Data 0.272 (0.267) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1302(0.3203) Grad: 4.9421  LR: 0.000051  
Epoch: [6][378/650] Data 0.271 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.4421(0.3207) Grad: 11.5121  LR: 0.000051  
Epoch: [6][396/650] Data 0.266 (0.267) Elapsed 4m 32s (remain 2m 53s) Loss: 0.1618(0.3203) Grad: 9.9625  LR: 0.000051  
Epoch: [6][414/650] Data 0.271 (0.267) Elapsed 4m 44s (remain 2m 41s) Loss: 0.5075(0.3190) Grad: 13.9074  LR: 0.000051  
Epoch: [6][432/650] Data 0.271 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.3391(0.3179) Grad: 9.9177  LR: 0.000051  
Epoch: [6][450/650] Data 0.271 (0.267) Elapsed 5m 9s (remain 2m 16s) Loss: 0.4648(0.3190) Grad: 10.5599  LR: 0.000051  
Epoch: [6][468/650] Data 0.272 (0.267) Elapsed 5m 21s (remain 2m 4s) Loss: 0.3260(0.3201) Grad: 12.7766  LR: 0.000051  
Epoch: [6][486/650] Data 0.270 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.4032(0.3203) Grad: 8.6771  LR: 0.000051  
Epoch: [6][504/650] Data 0.272 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1699(0.3190) Grad: 5.1662  LR: 0.000051  
Epoch: [6][522/650] Data 0.247 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0791(0.3187) Grad: 2.8019  LR: 0.000051  
Epoch: [6][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3469(0.3183) Grad: 9.7233  LR: 0.000051  
Epoch: [6][558/650] Data 0.260 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1958(0.3176) Grad: 5.4328  LR: 0.000051  
Epoch: [6][576/650] Data 0.257 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.5494(0.3177) Grad: 16.0662  LR: 0.000051  
Epoch: [6][594/650] Data 0.271 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3536(0.3175) Grad: 10.4143  LR: 0.000051  
Epoch: [6][612/650] Data 0.272 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.4899(0.3169) Grad: 13.6072  LR: 0.000051  
Epoch: [6][630/650] Data 0.272 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5676(0.3152) Grad: 10.7350  LR: 0.000051  
Epoch: [6][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2386(0.3145) Grad: 8.8718  LR: 0.000051  
Epoch: [6][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2590(0.3144) Grad: 8.6117  LR: 0.000051  
EVAL: [0/3883] Data 0.520 (0.520) Elapsed 0m 0s (remain 45m 10s) Loss: 0.1100(0.1100) 
EVAL: [18/3883] Data 0.001 (0.029) Elapsed 0m 3s (remain 13m 10s) Loss: 0.0982(0.0787) 
EVAL: [36/3883] Data 0.001 (0.016) Elapsed 0m 7s (remain 12m 11s) Loss: 0.0551(0.0910) 
EVAL: [54/3883] Data 0.001 (0.011) Elapsed 0m 10s (remain 11m 49s) Loss: 0.0035(0.0879) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 36s) Loss: 0.2558(0.1049) 
EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 27s) Loss: 0.2138(0.1120) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 20s) Loss: 0.1706(0.1132) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 14s) Loss: 0.3124(0.1174) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 8s) Loss: 0.1551(0.1148) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0169(0.1143) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 59s) Loss: 0.0042(0.1108) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 55s) Loss: 0.0417(0.1151) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 51s) Loss: 0.0148(0.1158) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 47s) Loss: 0.1236(0.1151) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 43s) Loss: 0.0838(0.1124) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 39s) Loss: 0.0477(0.1144) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 36s) Loss: 0.3248(0.1168) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0614(0.1165) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0603(0.1168) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0034(0.1187) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0504(0.1188) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0836(0.1184) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 15s) Loss: 0.3076(0.1182) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1857(0.1193) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0414(0.1195) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1544(0.1201) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0489(0.1216) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.3141(0.1212) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 55s) Loss: 0.0491(0.1216) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2091(0.1213) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0712(0.1201) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3125(0.1211) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0145(0.1200) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.0973(0.1192) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0881(0.1188) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0547(0.1185) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0176(0.1182) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1475(0.1183) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1657(0.1187) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1310(0.1173) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1102(0.1167) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0230(0.1169) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1053(0.1169) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0588(0.1165) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1306(0.1172) 
EVAL: [810/3883] Data 0.004 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0154(0.1176) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0188(0.1183) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0634(0.1184) 
EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2903(0.1193) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0032(0.1192) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2467(0.1192) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1278(0.1195) 
EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.1745(0.1197) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0909(0.1194) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0275(0.1198) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 54s (remain 8m 27s) Loss: 0.0926(0.1193) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3017(0.1201) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0233(0.1195) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0149(0.1195) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0787(0.1194) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0711(0.1192) 
EVAL: [1098/3883] Data 0.002 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.0944(0.1187) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0819(0.1186) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0187(0.1183) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1165(0.1180) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1045(0.1174) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0752(0.1174) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0141(0.1174) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1940(0.1175) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1001(0.1176) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4672(0.1179) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2081(0.1183) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0083(0.1185) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0359(0.1189) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1475(0.1186) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0139(0.1187) 
EVAL: [1368/3883] Data 0.002 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2664(0.1194) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0946(0.1192) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0638(0.1193) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1028(0.1191) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1162(0.1187) 
EVAL: [1458/3883] Data 0.004 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0335(0.1188) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0592(0.1185) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2484(0.1185) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3675(0.1184) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2425(0.1184) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0898(0.1181) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2298(0.1178) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0047(0.1179) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2268(0.1181) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2021(0.1188) 
EVAL: [1638/3883] Data 0.003 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2108(0.1191) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1192(0.1192) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.1663(0.1192) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0333(0.1189) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.1189(0.1193) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0423(0.1193) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1374(0.1193) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1158(0.1195) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4451(0.1197) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.2391(0.1197) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0170(0.1198) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0289(0.1201) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0006(0.1201) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.1896(0.1202) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0209(0.1202) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.1260(0.1201) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0178(0.1200) 
EVAL: [1944/3883] Data 0.002 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0069(0.1203) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1123(0.1207) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1865(0.1208) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.0814(0.1207) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0117(0.1210) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0185(0.1210) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3959(0.1210) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2530(0.1214) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0139(0.1211) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0936(0.1211) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0238(0.1206) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0595(0.1205) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1373(0.1202) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0026(0.1200) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0650(0.1199) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0695(0.1197) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2816(0.1198) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5368(0.1199) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0355(0.1200) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2814(0.1201) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1359(0.1199) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0408(0.1199) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1064(0.1198) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2808(0.1197) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2605(0.1199) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3779(0.1198) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.2102(0.1198) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0151(0.1196) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0848(0.1193) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0747(0.1192) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2582(0.1194) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0445(0.1196) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0325(0.1193) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.1030(0.1196) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5155(0.1195) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5506(0.1195) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0389(0.1196) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1480(0.1199) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0215(0.1199) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1375(0.1199) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0885(0.1197) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1115(0.1196) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0283(0.1196) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.2116(0.1196) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0169(0.1198) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0813(0.1203) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 6s (remain 3m 14s) Loss: 0.1609(0.1203) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0075(0.1202) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2102(0.1202) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1106(0.1201) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1306(0.1201) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3577(0.1201) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0156(0.1199) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2957(0.1199) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1684(0.1199) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2122(0.1199) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0089(0.1199) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0473(0.1199) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0715(0.1200) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0616(0.1198) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3140(0.1199) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0550(0.1197) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1241(0.1198) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2542(0.1197) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5405(0.1197) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1332(0.1197) 
EVAL: [3132/3883] Data 0.002 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.0933(0.1196) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0652(0.1195) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2345(0.1194) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0110(0.1192) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0253(0.1192) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.1718(0.1192) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4883(0.1192) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0197(0.1192) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0398(0.1192) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1589(0.1192) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0030(0.1193) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2455(0.1191) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0854(0.1191) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0114(0.1191) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1482(0.1192) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1423(0.1194) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1642(0.1192) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1889(0.1193) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.3119(0.1194) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0278(0.1195) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.0927(0.1195) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0438(0.1194) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6840(0.1197) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2169(0.1197) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.0727(0.1194) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0599(0.1195) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0495(0.1194) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1761(0.1194) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0229(0.1195) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0331(0.1194) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4167(0.1195) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0123(0.1196) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0550(0.1198) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 53s (remain 0m 27s) Loss: 0.1469(0.1197) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1247(0.1197) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0100(0.1196) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0063(0.1195) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0042(0.1197) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0113(0.1197) 
EVAL: [3834/3883] Data 0.002 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0383(0.1198) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2638(0.1198) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0234(0.1196) 
Epoch 6 - avg_train_loss: 0.3144  avg_val_loss: 0.1197  time: 1126s
Epoch 6 - AUC: 0.9630759811819294 - pAUC: 0.17557517561384284
Epoch 6 - Save Best Score: 0.1756 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2157(0.1197) 
Epoch: [7][0/650] Data 0.821 (0.821) Elapsed 0m 1s (remain 14m 21s) Loss: 0.1612(0.1612) Grad: 5.7295  LR: 0.000035  
Epoch: [7][18/650] Data 0.270 (0.291) Elapsed 0m 13s (remain 7m 30s) Loss: 0.1724(0.2488) Grad: 5.6207  LR: 0.000035  
Epoch: [7][36/650] Data 0.267 (0.277) Elapsed 0m 25s (remain 7m 8s) Loss: 0.7155(0.2845) Grad: 15.1702  LR: 0.000035  
Epoch: [7][54/650] Data 0.269 (0.274) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3322(0.2922) Grad: 12.9610  LR: 0.000035  
Epoch: [7][72/650] Data 0.271 (0.272) Elapsed 0m 50s (remain 6m 39s) Loss: 0.2008(0.2959) Grad: 7.8997  LR: 0.000035  
Epoch: [7][90/650] Data 0.269 (0.271) Elapsed 1m 2s (remain 6m 25s) Loss: 0.5616(0.3076) Grad: 11.5798  LR: 0.000035  
Epoch: [7][108/650] Data 0.266 (0.270) Elapsed 1m 15s (remain 6m 12s) Loss: 0.5660(0.3085) Grad: 13.9055  LR: 0.000035  
Epoch: [7][126/650] Data 0.266 (0.269) Elapsed 1m 27s (remain 5m 59s) Loss: 0.3743(0.3073) Grad: 8.8937  LR: 0.000035  
Epoch: [7][144/650] Data 0.270 (0.269) Elapsed 1m 39s (remain 5m 47s) Loss: 0.3292(0.2989) Grad: 7.4580  LR: 0.000035  
Epoch: [7][162/650] Data 0.217 (0.268) Elapsed 1m 52s (remain 5m 34s) Loss: 0.3137(0.2995) Grad: 8.5468  LR: 0.000035  
Epoch: [7][180/650] Data 0.267 (0.268) Elapsed 2m 4s (remain 5m 22s) Loss: 0.3964(0.3079) Grad: 7.2522  LR: 0.000035  
Epoch: [7][198/650] Data 0.267 (0.268) Elapsed 2m 16s (remain 5m 9s) Loss: 0.4010(0.3030) Grad: 10.4140  LR: 0.000035  
Epoch: [7][216/650] Data 0.269 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.4564(0.3009) Grad: 16.9879  LR: 0.000035  
Epoch: [7][234/650] Data 0.262 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.3584(0.3019) Grad: 8.7614  LR: 0.000035  
Epoch: [7][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1436(0.2989) Grad: 4.8792  LR: 0.000035  
Epoch: [7][270/650] Data 0.263 (0.267) Elapsed 3m 5s (remain 4m 19s) Loss: 0.0901(0.2963) Grad: 3.4946  LR: 0.000035  
Epoch: [7][288/650] Data 0.272 (0.266) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1869(0.2987) Grad: 5.2454  LR: 0.000035  
Epoch: [7][306/650] Data 0.264 (0.266) Elapsed 3m 30s (remain 3m 55s) Loss: 0.3532(0.2999) Grad: 12.4663  LR: 0.000035  
Epoch: [7][324/650] Data 0.263 (0.266) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2684(0.2972) Grad: 7.3688  LR: 0.000035  
Epoch: [7][342/650] Data 0.257 (0.266) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2496(0.2954) Grad: 7.3326  LR: 0.000035  
Epoch: [7][360/650] Data 0.271 (0.266) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1261(0.2934) Grad: 4.1121  LR: 0.000035  
Epoch: [7][378/650] Data 0.270 (0.266) Elapsed 4m 19s (remain 3m 5s) Loss: 0.3463(0.2930) Grad: 10.4062  LR: 0.000035  
Epoch: [7][396/650] Data 0.272 (0.266) Elapsed 4m 32s (remain 2m 53s) Loss: 0.2351(0.2914) Grad: 13.9747  LR: 0.000035  
Epoch: [7][414/650] Data 0.272 (0.266) Elapsed 4m 44s (remain 2m 41s) Loss: 0.3755(0.2889) Grad: 9.9085  LR: 0.000035  
Epoch: [7][432/650] Data 0.272 (0.266) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5131(0.2889) Grad: 13.5715  LR: 0.000035  
Epoch: [7][450/650] Data 0.267 (0.266) Elapsed 5m 9s (remain 2m 16s) Loss: 0.5115(0.2919) Grad: 14.3260  LR: 0.000035  
Epoch: [7][468/650] Data 0.272 (0.266) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1944(0.2926) Grad: 8.0784  LR: 0.000035  
Epoch: [7][486/650] Data 0.269 (0.266) Elapsed 5m 33s (remain 1m 51s) Loss: 0.3605(0.2924) Grad: 10.9265  LR: 0.000035  
Epoch: [7][504/650] Data 0.263 (0.266) Elapsed 5m 45s (remain 1m 39s) Loss: 0.2336(0.2913) Grad: 7.1656  LR: 0.000035  
Epoch: [7][522/650] Data 0.259 (0.266) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0984(0.2902) Grad: 3.6238  LR: 0.000035  
Epoch: [7][540/650] Data 0.271 (0.266) Elapsed 6m 10s (remain 1m 14s) Loss: 0.2180(0.2894) Grad: 6.0811  LR: 0.000035  
Epoch: [7][558/650] Data 0.270 (0.266) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2116(0.2892) Grad: 6.2230  LR: 0.000035  
Epoch: [7][576/650] Data 0.238 (0.266) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1135(0.2884) Grad: 4.9512  LR: 0.000035  
Epoch: [7][594/650] Data 0.272 (0.266) Elapsed 6m 47s (remain 0m 37s) Loss: 0.2819(0.2886) Grad: 8.3961  LR: 0.000035  
Epoch: [7][612/650] Data 0.260 (0.266) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1868(0.2891) Grad: 6.7516  LR: 0.000035  
Epoch: [7][630/650] Data 0.272 (0.266) Elapsed 7m 12s (remain 0m 13s) Loss: 0.8211(0.2892) Grad: 15.7491  LR: 0.000035  
Epoch: [7][648/650] Data 0.272 (0.266) Elapsed 7m 24s (remain 0m 0s) Loss: 0.2454(0.2885) Grad: 5.6429  LR: 0.000035  
Epoch: [7][649/650] Data 0.273 (0.266) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3781(0.2886) Grad: 8.3929  LR: 0.000035  
EVAL: [0/3883] Data 0.475 (0.475) Elapsed 0m 0s (remain 42m 15s) Loss: 0.1448(0.1448) 
EVAL: [18/3883] Data 0.001 (0.026) Elapsed 0m 3s (remain 12m 56s) Loss: 0.1257(0.1032) 
EVAL: [36/3883] Data 0.001 (0.014) Elapsed 0m 6s (remain 12m 4s) Loss: 0.0713(0.1238) 
EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 44s) Loss: 0.0202(0.1184) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 32s) Loss: 0.2557(0.1350) 
EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 24s) Loss: 0.3096(0.1405) 
EVAL: [108/3883] Data 0.001 (0.005) Elapsed 0m 19s (remain 11m 17s) Loss: 0.2121(0.1443) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 11s) Loss: 0.3703(0.1498) 
EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 6s) Loss: 0.2624(0.1474) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.0421(0.1482) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 57s) Loss: 0.0147(0.1443) 
EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 53s) Loss: 0.0701(0.1475) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 49s) Loss: 0.0716(0.1474) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1318(0.1460) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.1274(0.1428) 
EVAL: [270/3883] Data 0.002 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.0621(0.1451) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.3662(0.1474) 
EVAL: [306/3883] Data 0.001 (0.002) Elapsed 0m 54s (remain 10m 31s) Loss: 0.0571(0.1467) 
EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0643(0.1469) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.0230(0.1488) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.1482(0.1486) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.0814(0.1489) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.3477(0.1487) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 10s) Loss: 0.1741(0.1494) 
EVAL: [432/3883] Data 0.002 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.0966(0.1498) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1760(0.1502) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0746(0.1515) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.3365(0.1511) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0790(0.1513) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.3282(0.1508) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.0916(0.1495) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.3884(0.1496) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0251(0.1486) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1690(0.1478) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.0962(0.1473) 
EVAL: [630/3883] Data 0.002 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0494(0.1470) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0699(0.1469) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1903(0.1467) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 21s) Loss: 0.2393(0.1472) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1916(0.1456) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1720(0.1450) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0194(0.1455) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.1851(0.1455) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 5s) Loss: 0.0723(0.1450) 
EVAL: [792/3883] Data 0.003 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1385(0.1455) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0397(0.1455) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0311(0.1457) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0646(0.1459) 
EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.3254(0.1465) 
EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0056(0.1465) 
EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2954(0.1461) 
EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1702(0.1460) 
EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2105(0.1462) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 33s) Loss: 0.0736(0.1458) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0447(0.1460) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.1096(0.1455) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3416(0.1460) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0340(0.1453) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0195(0.1453) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0971(0.1450) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.1188(0.1450) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1510(0.1446) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 15s (remain 8m 5s) Loss: 0.1118(0.1447) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0248(0.1443) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 58s) Loss: 0.1688(0.1442) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.0970(0.1436) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1000(0.1437) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0272(0.1434) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1667(0.1436) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1179(0.1437) 
EVAL: [1260/3883] Data 0.002 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.5210(0.1441) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2604(0.1446) 
EVAL: [1296/3883] Data 0.002 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0227(0.1448) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0513(0.1452) 
EVAL: [1332/3883] Data 0.002 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1692(0.1449) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0225(0.1450) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.3262(0.1457) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0735(0.1454) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0821(0.1455) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1582(0.1456) 
EVAL: [1440/3883] Data 0.002 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1308(0.1451) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0395(0.1450) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 18s (remain 7m 1s) Loss: 0.0714(0.1448) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2972(0.1446) 
EVAL: [1512/3883] Data 0.004 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3462(0.1445) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3383(0.1448) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0908(0.1445) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2241(0.1442) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0186(0.1442) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.3535(0.1444) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.2238(0.1451) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.2715(0.1454) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1248(0.1454) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2577(0.1455) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0355(0.1452) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.0729(0.1455) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0598(0.1456) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1460(0.1455) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1734(0.1458) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4779(0.1459) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.2189(0.1460) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0204(0.1459) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 21s (remain 5m 58s) Loss: 0.0513(0.1462) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0081(0.1463) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.3009(0.1462) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0316(0.1462) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.1280(0.1462) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0212(0.1461) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0161(0.1462) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.2127(0.1466) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2378(0.1466) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1003(0.1466) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0159(0.1469) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0318(0.1468) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3910(0.1466) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.3007(0.1470) 
EVAL: [2088/3883] Data 0.002 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0154(0.1467) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1147(0.1466) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0700(0.1461) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0586(0.1460) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1286(0.1456) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0095(0.1453) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0699(0.1452) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.1051(0.1452) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2346(0.1453) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.6499(0.1455) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0751(0.1457) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2255(0.1457) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.2285(0.1456) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0221(0.1455) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1367(0.1455) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3123(0.1453) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2926(0.1455) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3914(0.1453) 
EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.2428(0.1452) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0446(0.1450) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0977(0.1447) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0916(0.1446) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.3714(0.1447) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0582(0.1450) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0471(0.1446) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0880(0.1450) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.5781(0.1451) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.6286(0.1451) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0596(0.1453) 
EVAL: [2610/3883] Data 0.002 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1816(0.1455) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0436(0.1456) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2813(0.1454) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.1067(0.1453) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1723(0.1452) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0946(0.1452) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.2067(0.1452) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0383(0.1454) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0946(0.1459) 
EVAL: [2772/3883] Data 0.002 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1875(0.1461) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0398(0.1460) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2054(0.1460) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1293(0.1458) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1667(0.1458) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3024(0.1458) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0246(0.1455) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2965(0.1456) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.2060(0.1456) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2619(0.1456) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0173(0.1456) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0544(0.1455) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0998(0.1456) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.1103(0.1455) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3508(0.1456) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0529(0.1454) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1617(0.1455) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2239(0.1455) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.6479(0.1456) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1604(0.1455) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0970(0.1454) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.0931(0.1453) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.3334(0.1452) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0168(0.1450) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0597(0.1450) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.3116(0.1450) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.5216(0.1451) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0224(0.1450) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.1087(0.1449) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1454(0.1449) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0083(0.1450) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2845(0.1448) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0731(0.1449) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0281(0.1448) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1612(0.1449) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1911(0.1450) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.2325(0.1449) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2356(0.1451) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.3399(0.1453) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0534(0.1453) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1249(0.1454) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0685(0.1453) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.7123(0.1456) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2773(0.1456) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1855(0.1454) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0971(0.1454) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0804(0.1454) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.2167(0.1454) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0445(0.1455) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0644(0.1454) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4958(0.1456) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0485(0.1457) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0757(0.1458) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.2444(0.1458) 
EVAL: [3744/3883] Data 0.002 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1375(0.1458) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0313(0.1457) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0116(0.1456) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0133(0.1457) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0298(0.1457) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0490(0.1458) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.3004(0.1458) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0393(0.1456) 
Epoch 7 - avg_train_loss: 0.2886  avg_val_loss: 0.1456  time: 1126s
Epoch 7 - AUC: 0.9630336620051126 - pAUC: 0.17501879658868763
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.4468(0.1456) 
Epoch: [8][0/650] Data 0.873 (0.873) Elapsed 0m 1s (remain 14m 20s) Loss: 0.2792(0.2792) Grad: 10.2268  LR: 0.000021  
Epoch: [8][18/650] Data 0.246 (0.294) Elapsed 0m 13s (remain 7m 31s) Loss: 0.0727(0.2962) Grad: 4.0751  LR: 0.000021  
Epoch: [8][36/650] Data 0.263 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.4277(0.2959) Grad: 9.9074  LR: 0.000021  
Epoch: [8][54/650] Data 0.271 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3205(0.3050) Grad: 11.2686  LR: 0.000021  
Epoch: [8][72/650] Data 0.268 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1730(0.3062) Grad: 8.0313  LR: 0.000021  
Epoch: [8][90/650] Data 0.269 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.3141(0.3067) Grad: 11.0121  LR: 0.000021  
Epoch: [8][108/650] Data 0.270 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.2129(0.3038) Grad: 10.4008  LR: 0.000021  
Epoch: [8][126/650] Data 0.271 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.5752(0.3019) Grad: 10.9308  LR: 0.000021  
Epoch: [8][144/650] Data 0.272 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2908(0.2975) Grad: 10.3126  LR: 0.000021  
Epoch: [8][162/650] Data 0.265 (0.270) Elapsed 1m 52s (remain 5m 34s) Loss: 0.3415(0.2936) Grad: 8.3322  LR: 0.000021  
Epoch: [8][180/650] Data 0.260 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2925(0.3028) Grad: 6.4062  LR: 0.000021  
Epoch: [8][198/650] Data 0.264 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.2568(0.3009) Grad: 9.4483  LR: 0.000021  
Epoch: [8][216/650] Data 0.271 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.5663(0.2988) Grad: 11.4609  LR: 0.000021  
Epoch: [8][234/650] Data 0.267 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2216(0.3038) Grad: 7.8575  LR: 0.000021  
Epoch: [8][252/650] Data 0.268 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1475(0.2997) Grad: 4.8561  LR: 0.000021  
Epoch: [8][270/650] Data 0.257 (0.268) Elapsed 3m 5s (remain 4m 20s) Loss: 0.1878(0.2969) Grad: 6.0573  LR: 0.000021  
Epoch: [8][288/650] Data 0.245 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1843(0.2954) Grad: 7.6893  LR: 0.000021  
Epoch: [8][306/650] Data 0.270 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.2635(0.2950) Grad: 9.8591  LR: 0.000021  
Epoch: [8][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1655(0.2929) Grad: 6.1340  LR: 0.000021  
Epoch: [8][342/650] Data 0.271 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.4283(0.2915) Grad: 14.1310  LR: 0.000021  
Epoch: [8][360/650] Data 0.267 (0.267) Elapsed 4m 7s (remain 3m 18s) Loss: 0.2797(0.2907) Grad: 10.3024  LR: 0.000021  
Epoch: [8][378/650] Data 0.264 (0.267) Elapsed 4m 19s (remain 3m 5s) Loss: 0.1454(0.2892) Grad: 6.6493  LR: 0.000021  
Epoch: [8][396/650] Data 0.266 (0.267) Elapsed 4m 32s (remain 2m 53s) Loss: 0.3242(0.2889) Grad: 10.5415  LR: 0.000021  
Epoch: [8][414/650] Data 0.270 (0.266) Elapsed 4m 44s (remain 2m 41s) Loss: 0.1584(0.2855) Grad: 5.0831  LR: 0.000021  
Epoch: [8][432/650] Data 0.270 (0.266) Elapsed 4m 56s (remain 2m 28s) Loss: 0.5808(0.2872) Grad: 14.7858  LR: 0.000021  
Epoch: [8][450/650] Data 0.264 (0.266) Elapsed 5m 9s (remain 2m 16s) Loss: 0.3834(0.2865) Grad: 10.4115  LR: 0.000021  
Epoch: [8][468/650] Data 0.271 (0.266) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1216(0.2855) Grad: 8.7635  LR: 0.000021  
Epoch: [8][486/650] Data 0.263 (0.266) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2855(0.2885) Grad: 8.0948  LR: 0.000021  
Epoch: [8][504/650] Data 0.270 (0.266) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1560(0.2872) Grad: 5.3591  LR: 0.000021  
Epoch: [8][522/650] Data 0.272 (0.266) Elapsed 5m 58s (remain 1m 26s) Loss: 0.1662(0.2871) Grad: 7.6691  LR: 0.000021  
Epoch: [8][540/650] Data 0.271 (0.266) Elapsed 6m 10s (remain 1m 14s) Loss: 0.4267(0.2865) Grad: 10.7915  LR: 0.000021  
Epoch: [8][558/650] Data 0.271 (0.266) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2560(0.2869) Grad: 7.7097  LR: 0.000021  
Epoch: [8][576/650] Data 0.269 (0.266) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1878(0.2853) Grad: 5.7360  LR: 0.000021  
Epoch: [8][594/650] Data 0.263 (0.266) Elapsed 6m 47s (remain 0m 37s) Loss: 0.5244(0.2854) Grad: 11.0146  LR: 0.000021  
Epoch: [8][612/650] Data 0.271 (0.266) Elapsed 6m 59s (remain 0m 25s) Loss: 0.4782(0.2861) Grad: 10.7222  LR: 0.000021  
Epoch: [8][630/650] Data 0.265 (0.266) Elapsed 7m 12s (remain 0m 13s) Loss: 0.7181(0.2857) Grad: 16.2429  LR: 0.000021  
Epoch: [8][648/650] Data 0.273 (0.266) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1003(0.2849) Grad: 3.6371  LR: 0.000021  
Epoch: [8][649/650] Data 0.273 (0.266) Elapsed 7m 25s (remain 0m 0s) Loss: 0.2135(0.2848) Grad: 5.0881  LR: 0.000021  
EVAL: [0/3883] Data 0.526 (0.526) Elapsed 0m 0s (remain 45m 30s) Loss: 0.1214(0.1214) 
EVAL: [18/3883] Data 0.001 (0.029) Elapsed 0m 3s (remain 13m 6s) Loss: 0.0971(0.0917) 
EVAL: [36/3883] Data 0.001 (0.015) Elapsed 0m 7s (remain 12m 9s) Loss: 0.0528(0.1009) 
EVAL: [54/3883] Data 0.001 (0.011) Elapsed 0m 10s (remain 11m 47s) Loss: 0.0162(0.0975) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 34s) Loss: 0.2502(0.1117) 
EVAL: [90/3883] Data 0.001 (0.007) Elapsed 0m 16s (remain 11m 26s) Loss: 0.2226(0.1168) 
EVAL: [108/3883] Data 0.001 (0.006) Elapsed 0m 19s (remain 11m 19s) Loss: 0.1690(0.1192) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 13s) Loss: 0.3011(0.1223) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 25s (remain 11m 8s) Loss: 0.1659(0.1185) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 3s) Loss: 0.0256(0.1189) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 58s) Loss: 0.0082(0.1160) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 54s) Loss: 0.0314(0.1199) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 50s) Loss: 0.0655(0.1207) 
EVAL: [234/3883] Data 0.001 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1108(0.1200) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.0682(0.1171) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 39s) Loss: 0.0660(0.1190) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.3260(0.1208) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 32s) Loss: 0.0473(0.1205) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0577(0.1201) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 25s) Loss: 0.0083(0.1222) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0975(0.1220) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 18s) Loss: 0.0593(0.1220) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 10s (remain 10m 14s) Loss: 0.2474(0.1217) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1649(0.1225) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 8s) Loss: 0.0641(0.1224) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1556(0.1231) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0385(0.1244) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.2705(0.1240) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0547(0.1249) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2338(0.1243) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0475(0.1233) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.2796(0.1237) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0206(0.1229) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1175(0.1222) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0703(0.1217) 
EVAL: [630/3883] Data 0.002 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0438(0.1214) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0498(0.1215) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.1721(0.1215) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.2022(0.1217) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1204(0.1203) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.1220(0.1197) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0128(0.1200) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1355(0.1202) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0416(0.1198) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1321(0.1203) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0174(0.1203) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0182(0.1206) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0520(0.1204) 
EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 31s (remain 8m 50s) Loss: 0.2389(0.1212) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0057(0.1210) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2384(0.1206) 
EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1497(0.1207) 
EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2207(0.1208) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0393(0.1204) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0443(0.1205) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.0967(0.1200) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3404(0.1207) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0217(0.1201) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0156(0.1199) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0889(0.1198) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0585(0.1197) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1269(0.1193) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0762(0.1195) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0128(0.1193) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1500(0.1191) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.1003(0.1186) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1230(0.1185) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0177(0.1185) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1887(0.1185) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1118(0.1186) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.4290(0.1188) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2272(0.1192) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0121(0.1193) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0347(0.1198) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1512(0.1196) 
EVAL: [1350/3883] Data 0.002 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0163(0.1196) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.2239(0.1204) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0635(0.1204) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0670(0.1206) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1122(0.1206) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1574(0.1201) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0500(0.1202) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 1s) Loss: 0.0802(0.1199) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2303(0.1197) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3411(0.1196) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2978(0.1198) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0970(0.1196) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2444(0.1194) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0083(0.1194) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2415(0.1198) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1871(0.1203) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1731(0.1206) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1200(0.1206) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2178(0.1206) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0259(0.1204) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.1079(0.1208) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0366(0.1208) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1137(0.1208) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1400(0.1211) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.3607(0.1212) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.2070(0.1213) 
EVAL: [1818/3883] Data 0.002 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0129(0.1213) 
EVAL: [1836/3883] Data 0.002 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0369(0.1215) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0064(0.1216) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2279(0.1216) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0164(0.1215) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 45s) Loss: 0.1070(0.1215) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0104(0.1214) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0096(0.1216) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1740(0.1219) 
EVAL: [1980/3883] Data 0.002 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1918(0.1220) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1128(0.1220) 
EVAL: [2016/3883] Data 0.002 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0116(0.1223) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0219(0.1222) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.3381(0.1221) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.2170(0.1224) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0133(0.1221) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0963(0.1221) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0834(0.1218) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0632(0.1217) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1359(0.1214) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0076(0.1212) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0729(0.1211) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0841(0.1210) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.2169(0.1212) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.5387(0.1212) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0686(0.1214) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2542(0.1215) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.1923(0.1213) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0269(0.1212) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0904(0.1211) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2938(0.1210) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1867(0.1212) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.3843(0.1211) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1632(0.1211) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0386(0.1210) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0925(0.1207) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0870(0.1206) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.2603(0.1207) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0400(0.1209) 
EVAL: [2520/3883] Data 0.002 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0408(0.1206) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.1029(0.1210) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4657(0.1209) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5159(0.1210) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0422(0.1211) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1755(0.1213) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0424(0.1213) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1898(0.1213) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.0613(0.1211) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1343(0.1211) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0783(0.1211) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1798(0.1212) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0410(0.1213) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0875(0.1217) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1354(0.1218) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0179(0.1217) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1994(0.1217) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1359(0.1216) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1025(0.1216) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.3260(0.1215) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0124(0.1213) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2616(0.1214) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1344(0.1213) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3126(0.1212) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0097(0.1213) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0546(0.1213) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0642(0.1214) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0730(0.1213) 
EVAL: [3024/3883] Data 0.003 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3613(0.1213) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0355(0.1211) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 23s) Loss: 0.1339(0.1212) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.2039(0.1211) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5491(0.1212) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1301(0.1212) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0671(0.1211) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 11s (remain 2m 8s) Loss: 0.0762(0.1210) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2602(0.1209) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0073(0.1206) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0414(0.1206) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2588(0.1206) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4346(0.1207) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0139(0.1206) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0763(0.1205) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 42s) Loss: 0.1680(0.1206) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0047(0.1206) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2259(0.1205) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0737(0.1205) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0149(0.1204) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1307(0.1206) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1870(0.1207) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1482(0.1206) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.2171(0.1208) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2650(0.1209) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0282(0.1210) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1525(0.1211) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 14s (remain 1m 5s) Loss: 0.0677(0.1210) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.5701(0.1212) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2618(0.1212) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1475(0.1210) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0878(0.1210) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0617(0.1209) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1590(0.1210) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0380(0.1210) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0724(0.1209) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4441(0.1211) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0223(0.1212) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0575(0.1214) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1847(0.1214) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.1493(0.1214) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0287(0.1213) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0065(0.1212) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0079(0.1213) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0170(0.1213) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0360(0.1214) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2034(0.1214) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 17s (remain 0m 2s) Loss: 0.0397(0.1212) 
Epoch 8 - avg_train_loss: 0.2848  avg_val_loss: 0.1212  time: 1125s
Epoch 8 - AUC: 0.9662144744473803 - pAUC: 0.1779875835105583
Epoch 8 - Save Best Score: 0.1780 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.2291(0.1212) 
Epoch: [9][0/650] Data 0.838 (0.838) Elapsed 0m 1s (remain 13m 56s) Loss: 0.3655(0.3655) Grad: 8.7047  LR: 0.000010  
Epoch: [9][18/650] Data 0.264 (0.292) Elapsed 0m 13s (remain 7m 31s) Loss: 0.1435(0.2465) Grad: 3.1952  LR: 0.000010  
Epoch: [9][36/650] Data 0.268 (0.281) Elapsed 0m 25s (remain 7m 8s) Loss: 0.5164(0.2674) Grad: 26.3932  LR: 0.000010  
Epoch: [9][54/650] Data 0.270 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.3373(0.2798) Grad: 9.3891  LR: 0.000010  
Epoch: [9][72/650] Data 0.264 (0.273) Elapsed 0m 50s (remain 6m 39s) Loss: 0.1889(0.2873) Grad: 5.5536  LR: 0.000010  
Epoch: [9][90/650] Data 0.266 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.2429(0.2793) Grad: 8.8979  LR: 0.000010  
Epoch: [9][108/650] Data 0.270 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.2673(0.2798) Grad: 7.3901  LR: 0.000010  
Epoch: [9][126/650] Data 0.269 (0.271) Elapsed 1m 27s (remain 6m 0s) Loss: 0.1782(0.2701) Grad: 5.3718  LR: 0.000010  
Epoch: [9][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2164(0.2670) Grad: 7.8991  LR: 0.000010  
Epoch: [9][162/650] Data 0.268 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.1930(0.2619) Grad: 4.7832  LR: 0.000010  
Epoch: [9][180/650] Data 0.269 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.2784(0.2681) Grad: 6.4986  LR: 0.000010  
Epoch: [9][198/650] Data 0.259 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.1528(0.2678) Grad: 8.0429  LR: 0.000010  
Epoch: [9][216/650] Data 0.265 (0.269) Elapsed 2m 28s (remain 4m 57s) Loss: 0.2480(0.2691) Grad: 7.6756  LR: 0.000010  
Epoch: [9][234/650] Data 0.264 (0.269) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2100(0.2726) Grad: 5.1607  LR: 0.000010  
Epoch: [9][252/650] Data 0.269 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.3568(0.2699) Grad: 13.9682  LR: 0.000010  
Epoch: [9][270/650] Data 0.271 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.1150(0.2669) Grad: 4.2333  LR: 0.000010  
Epoch: [9][288/650] Data 0.266 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.1834(0.2663) Grad: 5.0920  LR: 0.000010  
Epoch: [9][306/650] Data 0.261 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.5434(0.2685) Grad: 15.8955  LR: 0.000010  
Epoch: [9][324/650] Data 0.264 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.1137(0.2680) Grad: 5.6304  LR: 0.000010  
Epoch: [9][342/650] Data 0.262 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2298(0.2652) Grad: 6.9337  LR: 0.000010  
Epoch: [9][360/650] Data 0.271 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1744(0.2623) Grad: 4.6304  LR: 0.000010  
Epoch: [9][378/650] Data 0.258 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.3014(0.2604) Grad: 9.6371  LR: 0.000010  
Epoch: [9][396/650] Data 0.262 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.0740(0.2579) Grad: 3.9812  LR: 0.000010  
Epoch: [9][414/650] Data 0.271 (0.268) Elapsed 4m 44s (remain 2m 41s) Loss: 0.5871(0.2571) Grad: 13.6334  LR: 0.000010  
Epoch: [9][432/650] Data 0.270 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.2971(0.2568) Grad: 13.8589  LR: 0.000010  
Epoch: [9][450/650] Data 0.263 (0.267) Elapsed 5m 8s (remain 2m 16s) Loss: 0.3135(0.2602) Grad: 11.2665  LR: 0.000010  
Epoch: [9][468/650] Data 0.266 (0.267) Elapsed 5m 21s (remain 2m 3s) Loss: 0.1600(0.2603) Grad: 5.3893  LR: 0.000010  
Epoch: [9][486/650] Data 0.270 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2368(0.2602) Grad: 6.0548  LR: 0.000010  
Epoch: [9][504/650] Data 0.272 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.4316(0.2605) Grad: 11.6346  LR: 0.000010  
Epoch: [9][522/650] Data 0.268 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0571(0.2610) Grad: 2.2202  LR: 0.000010  
Epoch: [9][540/650] Data 0.271 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3164(0.2617) Grad: 6.8745  LR: 0.000010  
Epoch: [9][558/650] Data 0.266 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.1056(0.2611) Grad: 3.3047  LR: 0.000010  
Epoch: [9][576/650] Data 0.247 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.1810(0.2611) Grad: 5.5736  LR: 0.000010  
Epoch: [9][594/650] Data 0.268 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.3238(0.2622) Grad: 7.9921  LR: 0.000010  
Epoch: [9][612/650] Data 0.271 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1670(0.2614) Grad: 9.7635  LR: 0.000010  
Epoch: [9][630/650] Data 0.254 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5266(0.2618) Grad: 11.0858  LR: 0.000010  
Epoch: [9][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1832(0.2627) Grad: 5.1640  LR: 0.000010  
Epoch: [9][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3681(0.2629) Grad: 8.9645  LR: 0.000010  
EVAL: [0/3883] Data 0.480 (0.480) Elapsed 0m 0s (remain 42m 31s) Loss: 0.1005(0.1005) 
EVAL: [18/3883] Data 0.001 (0.027) Elapsed 0m 3s (remain 12m 57s) Loss: 0.0831(0.0763) 
EVAL: [36/3883] Data 0.001 (0.014) Elapsed 0m 6s (remain 12m 4s) Loss: 0.0466(0.0909) 
EVAL: [54/3883] Data 0.001 (0.010) Elapsed 0m 10s (remain 11m 44s) Loss: 0.0182(0.0892) 
EVAL: [72/3883] Data 0.001 (0.008) Elapsed 0m 13s (remain 11m 32s) Loss: 0.2322(0.1021) 
EVAL: [90/3883] Data 0.001 (0.006) Elapsed 0m 16s (remain 11m 24s) Loss: 0.2105(0.1064) 
EVAL: [108/3883] Data 0.001 (0.005) Elapsed 0m 19s (remain 11m 17s) Loss: 0.1491(0.1084) 
EVAL: [126/3883] Data 0.001 (0.005) Elapsed 0m 22s (remain 11m 11s) Loss: 0.3042(0.1116) 
EVAL: [144/3883] Data 0.001 (0.004) Elapsed 0m 25s (remain 11m 6s) Loss: 0.1500(0.1069) 
EVAL: [162/3883] Data 0.001 (0.004) Elapsed 0m 29s (remain 11m 2s) Loss: 0.0361(0.1076) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 10m 57s) Loss: 0.0075(0.1049) 
EVAL: [198/3883] Data 0.001 (0.003) Elapsed 0m 35s (remain 10m 53s) Loss: 0.0272(0.1086) 
EVAL: [216/3883] Data 0.001 (0.003) Elapsed 0m 38s (remain 10m 49s) Loss: 0.0299(0.1089) 
EVAL: [234/3883] Data 0.002 (0.003) Elapsed 0m 41s (remain 10m 46s) Loss: 0.1235(0.1084) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 42s) Loss: 0.0650(0.1059) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 47s (remain 10m 38s) Loss: 0.0329(0.1076) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 35s) Loss: 0.2880(0.1093) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 31s) Loss: 0.0431(0.1090) 
EVAL: [324/3883] Data 0.001 (0.002) Elapsed 0m 57s (remain 10m 28s) Loss: 0.0581(0.1087) 
EVAL: [342/3883] Data 0.001 (0.002) Elapsed 1m 0s (remain 10m 24s) Loss: 0.0061(0.1105) 
EVAL: [360/3883] Data 0.001 (0.002) Elapsed 1m 3s (remain 10m 21s) Loss: 0.0951(0.1105) 
EVAL: [378/3883] Data 0.001 (0.002) Elapsed 1m 6s (remain 10m 17s) Loss: 0.0498(0.1107) 
EVAL: [396/3883] Data 0.001 (0.002) Elapsed 1m 9s (remain 10m 14s) Loss: 0.2267(0.1106) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 11s) Loss: 0.1190(0.1114) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 7s) Loss: 0.0674(0.1116) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 4s) Loss: 0.1262(0.1123) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 1s) Loss: 0.0328(0.1133) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 57s) Loss: 0.2744(0.1130) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 28s (remain 9m 54s) Loss: 0.0368(0.1134) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 51s) Loss: 0.2586(0.1131) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 47s) Loss: 0.0459(0.1119) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 44s) Loss: 0.2911(0.1122) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 41s) Loss: 0.0165(0.1115) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 38s) Loss: 0.1139(0.1108) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 34s) Loss: 0.0720(0.1104) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 50s (remain 9m 31s) Loss: 0.0230(0.1102) 
EVAL: [648/3883] Data 0.004 (0.002) Elapsed 1m 54s (remain 9m 28s) Loss: 0.0329(0.1102) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 25s) Loss: 0.2063(0.1104) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1626(0.1105) 
EVAL: [702/3883] Data 0.002 (0.002) Elapsed 2m 3s (remain 9m 18s) Loss: 0.1010(0.1091) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 15s) Loss: 0.0803(0.1085) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 12s) Loss: 0.0033(0.1087) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 12s (remain 9m 9s) Loss: 0.1369(0.1088) 
EVAL: [774/3883] Data 0.001 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0332(0.1083) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 2s) Loss: 0.1277(0.1088) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 8m 59s) Loss: 0.0129(0.1090) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0114(0.1091) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0303(0.1089) 
EVAL: [864/3883] Data 0.001 (0.001) Elapsed 2m 31s (remain 8m 49s) Loss: 0.2472(0.1096) 
EVAL: [882/3883] Data 0.001 (0.001) Elapsed 2m 35s (remain 8m 46s) Loss: 0.0071(0.1096) 
EVAL: [900/3883] Data 0.001 (0.001) Elapsed 2m 38s (remain 8m 43s) Loss: 0.2446(0.1092) 
EVAL: [918/3883] Data 0.001 (0.001) Elapsed 2m 41s (remain 8m 40s) Loss: 0.0989(0.1091) 
EVAL: [936/3883] Data 0.001 (0.001) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2149(0.1092) 
EVAL: [954/3883] Data 0.001 (0.001) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0410(0.1087) 
EVAL: [972/3883] Data 0.001 (0.001) Elapsed 2m 50s (remain 8m 30s) Loss: 0.0300(0.1089) 
EVAL: [990/3883] Data 0.001 (0.001) Elapsed 2m 53s (remain 8m 27s) Loss: 0.0865(0.1085) 
EVAL: [1008/3883] Data 0.001 (0.001) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3265(0.1090) 
EVAL: [1026/3883] Data 0.001 (0.001) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0177(0.1083) 
EVAL: [1044/3883] Data 0.001 (0.001) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0175(0.1082) 
EVAL: [1062/3883] Data 0.001 (0.001) Elapsed 3m 6s (remain 8m 14s) Loss: 0.0488(0.1080) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 11s) Loss: 0.0422(0.1078) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1193(0.1075) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0648(0.1076) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0074(0.1075) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1423(0.1072) 
EVAL: [1170/3883] Data 0.002 (0.001) Elapsed 3m 25s (remain 7m 55s) Loss: 0.1163(0.1068) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.0859(0.1067) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0192(0.1067) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 34s (remain 7m 46s) Loss: 0.1777(0.1067) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.0875(0.1067) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 39s) Loss: 0.4459(0.1070) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 36s) Loss: 0.2281(0.1075) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0093(0.1076) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0282(0.1080) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1142(0.1078) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 56s (remain 7m 24s) Loss: 0.0175(0.1079) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 20s) Loss: 0.2056(0.1085) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 17s) Loss: 0.0719(0.1083) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0568(0.1085) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1032(0.1085) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1024(0.1080) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 15s (remain 7m 5s) Loss: 0.0351(0.1082) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 1s) Loss: 0.0687(0.1080) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2244(0.1078) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3324(0.1077) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.2903(0.1080) 
EVAL: [1548/3883] Data 0.001 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0748(0.1078) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2162(0.1075) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 37s (remain 6m 42s) Loss: 0.0059(0.1076) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2413(0.1078) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1888(0.1085) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1893(0.1087) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1045(0.1087) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2248(0.1088) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 23s) Loss: 0.0153(0.1086) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 4m 59s (remain 6m 20s) Loss: 0.0612(0.1089) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0357(0.1089) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.1038(0.1088) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1522(0.1092) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4192(0.1093) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 4s) Loss: 0.1741(0.1093) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 18s (remain 6m 1s) Loss: 0.0129(0.1093) 
EVAL: [1836/3883] Data 0.001 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0317(0.1095) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0032(0.1097) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2144(0.1097) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0181(0.1097) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0809(0.1097) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0093(0.1096) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 40s (remain 5m 39s) Loss: 0.0068(0.1098) 
EVAL: [1962/3883] Data 0.002 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.1869(0.1101) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.1879(0.1101) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1670(0.1102) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0117(0.1105) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 23s) Loss: 0.0180(0.1103) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.2760(0.1102) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 2s (remain 5m 17s) Loss: 0.2451(0.1105) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0162(0.1102) 
EVAL: [2106/3883] Data 0.002 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.0824(0.1102) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0518(0.1098) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 4s) Loss: 0.0589(0.1097) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.0705(0.1094) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 21s (remain 4m 58s) Loss: 0.0060(0.1091) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0325(0.1090) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0705(0.1089) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1807(0.1091) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 45s) Loss: 0.4865(0.1092) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0506(0.1094) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.1842(0.1095) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 43s (remain 4m 36s) Loss: 0.1605(0.1093) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0077(0.1092) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.0990(0.1092) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.2900(0.1091) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.2033(0.1093) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.4054(0.1092) 
EVAL: [2412/3883] Data 0.002 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1463(0.1091) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 5s (remain 4m 14s) Loss: 0.0300(0.1090) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0708(0.1087) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0738(0.1086) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 4s) Loss: 0.2503(0.1087) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0470(0.1088) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0241(0.1085) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 24s (remain 3m 55s) Loss: 0.0569(0.1089) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4817(0.1088) 
EVAL: [2574/3883] Data 0.002 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5602(0.1089) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0343(0.1090) 
EVAL: [2610/3883] Data 0.001 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1469(0.1093) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0355(0.1094) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.1739(0.1093) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 46s (remain 3m 33s) Loss: 0.0464(0.1092) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1367(0.1091) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0510(0.1091) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 23s) Loss: 0.1695(0.1091) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0232(0.1092) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0609(0.1096) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1228(0.1098) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 8s (remain 3m 11s) Loss: 0.0179(0.1097) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.1971(0.1097) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1124(0.1096) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1134(0.1096) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.2529(0.1095) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0116(0.1094) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 27s (remain 2m 52s) Loss: 0.2290(0.1094) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1004(0.1093) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.2913(0.1093) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0115(0.1093) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0395(0.1093) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0544(0.1094) 
EVAL: [3006/3883] Data 0.002 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0600(0.1093) 
EVAL: [3024/3883] Data 0.002 (0.001) Elapsed 8m 49s (remain 2m 30s) Loss: 0.3256(0.1093) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0303(0.1092) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1436(0.1093) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1625(0.1092) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5303(0.1093) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1072(0.1093) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 8s (remain 2m 11s) Loss: 0.0590(0.1092) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0626(0.1091) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2574(0.1090) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0060(0.1087) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0276(0.1087) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2485(0.1087) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4306(0.1087) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 30s (remain 1m 49s) Loss: 0.0181(0.1087) 
EVAL: [3276/3883] Data 0.002 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0693(0.1087) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1603(0.1087) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0044(0.1088) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2515(0.1086) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0375(0.1087) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0079(0.1086) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 52s (remain 1m 27s) Loss: 0.1152(0.1087) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1709(0.1088) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1538(0.1087) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1839(0.1089) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2720(0.1090) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0215(0.1091) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 11s (remain 1m 8s) Loss: 0.1157(0.1091) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0410(0.1091) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6066(0.1093) 
EVAL: [3546/3883] Data 0.002 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2360(0.1093) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1142(0.1091) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0693(0.1092) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0602(0.1091) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 33s (remain 0m 46s) Loss: 0.1459(0.1091) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0206(0.1092) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0406(0.1091) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4368(0.1092) 
EVAL: [3690/3883] Data 0.001 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0235(0.1094) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0338(0.1095) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1619(0.1095) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 55s (remain 0m 24s) Loss: 0.1163(0.1095) 
EVAL: [3762/3883] Data 0.002 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0237(0.1095) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0099(0.1094) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0051(0.1095) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0120(0.1095) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0232(0.1096) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 14s (remain 0m 5s) Loss: 0.2171(0.1095) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0321(0.1093) 
Epoch 9 - avg_train_loss: 0.2629  avg_val_loss: 0.1094  time: 1126s
Epoch 9 - AUC: 0.9662948164378854 - pAUC: 0.17790337479323745
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 19s (remain 0m 0s) Loss: 0.2948(0.1094) 
Epoch: [10][0/650] Data 0.854 (0.854) Elapsed 0m 1s (remain 14m 4s) Loss: 0.1837(0.1837) Grad: 6.7328  LR: 0.000003  
Epoch: [10][18/650] Data 0.271 (0.294) Elapsed 0m 13s (remain 7m 31s) Loss: 0.2002(0.2240) Grad: 6.9892  LR: 0.000003  
Epoch: [10][36/650] Data 0.264 (0.281) Elapsed 0m 25s (remain 7m 9s) Loss: 0.3803(0.2699) Grad: 13.9729  LR: 0.000003  
Epoch: [10][54/650] Data 0.251 (0.276) Elapsed 0m 38s (remain 6m 53s) Loss: 0.2303(0.2602) Grad: 7.6408  LR: 0.000003  
Epoch: [10][72/650] Data 0.257 (0.274) Elapsed 0m 50s (remain 6m 39s) Loss: 0.2180(0.2649) Grad: 5.9644  LR: 0.000003  
Epoch: [10][90/650] Data 0.263 (0.272) Elapsed 1m 2s (remain 6m 25s) Loss: 0.4937(0.2756) Grad: 14.5392  LR: 0.000003  
Epoch: [10][108/650] Data 0.272 (0.271) Elapsed 1m 15s (remain 6m 12s) Loss: 0.4090(0.2763) Grad: 11.8595  LR: 0.000003  
Epoch: [10][126/650] Data 0.270 (0.270) Elapsed 1m 27s (remain 6m 0s) Loss: 0.5089(0.2758) Grad: 11.9833  LR: 0.000003  
Epoch: [10][144/650] Data 0.267 (0.270) Elapsed 1m 39s (remain 5m 47s) Loss: 0.2551(0.2664) Grad: 6.8427  LR: 0.000003  
Epoch: [10][162/650] Data 0.271 (0.269) Elapsed 1m 52s (remain 5m 34s) Loss: 0.2661(0.2622) Grad: 8.2841  LR: 0.000003  
Epoch: [10][180/650] Data 0.272 (0.269) Elapsed 2m 4s (remain 5m 22s) Loss: 0.3128(0.2685) Grad: 7.1546  LR: 0.000003  
Epoch: [10][198/650] Data 0.271 (0.269) Elapsed 2m 16s (remain 5m 9s) Loss: 0.1877(0.2700) Grad: 6.6630  LR: 0.000003  
Epoch: [10][216/650] Data 0.257 (0.268) Elapsed 2m 28s (remain 4m 57s) Loss: 0.3193(0.2698) Grad: 6.1477  LR: 0.000003  
Epoch: [10][234/650] Data 0.268 (0.268) Elapsed 2m 41s (remain 4m 44s) Loss: 0.2159(0.2685) Grad: 7.3384  LR: 0.000003  
Epoch: [10][252/650] Data 0.267 (0.268) Elapsed 2m 53s (remain 4m 32s) Loss: 0.1873(0.2712) Grad: 7.7811  LR: 0.000003  
Epoch: [10][270/650] Data 0.271 (0.268) Elapsed 3m 5s (remain 4m 19s) Loss: 0.1239(0.2689) Grad: 4.2989  LR: 0.000003  
Epoch: [10][288/650] Data 0.259 (0.268) Elapsed 3m 18s (remain 4m 7s) Loss: 0.3556(0.2704) Grad: 9.7239  LR: 0.000003  
Epoch: [10][306/650] Data 0.266 (0.268) Elapsed 3m 30s (remain 3m 55s) Loss: 0.2081(0.2697) Grad: 9.5482  LR: 0.000003  
Epoch: [10][324/650] Data 0.265 (0.268) Elapsed 3m 42s (remain 3m 42s) Loss: 0.2062(0.2694) Grad: 7.4909  LR: 0.000003  
Epoch: [10][342/650] Data 0.258 (0.268) Elapsed 3m 55s (remain 3m 30s) Loss: 0.2902(0.2683) Grad: 10.9024  LR: 0.000003  
Epoch: [10][360/650] Data 0.271 (0.268) Elapsed 4m 7s (remain 3m 18s) Loss: 0.1554(0.2654) Grad: 5.2656  LR: 0.000003  
Epoch: [10][378/650] Data 0.272 (0.268) Elapsed 4m 19s (remain 3m 5s) Loss: 0.2024(0.2630) Grad: 6.0196  LR: 0.000003  
Epoch: [10][396/650] Data 0.272 (0.268) Elapsed 4m 32s (remain 2m 53s) Loss: 0.1046(0.2607) Grad: 5.0755  LR: 0.000003  
Epoch: [10][414/650] Data 0.266 (0.267) Elapsed 4m 44s (remain 2m 41s) Loss: 0.2592(0.2574) Grad: 8.2319  LR: 0.000003  
Epoch: [10][432/650] Data 0.263 (0.267) Elapsed 4m 56s (remain 2m 28s) Loss: 0.2443(0.2572) Grad: 8.5354  LR: 0.000003  
Epoch: [10][450/650] Data 0.268 (0.267) Elapsed 5m 9s (remain 2m 16s) Loss: 0.2911(0.2605) Grad: 7.7450  LR: 0.000003  
Epoch: [10][468/650] Data 0.271 (0.267) Elapsed 5m 21s (remain 2m 4s) Loss: 0.1777(0.2609) Grad: 9.2352  LR: 0.000003  
Epoch: [10][486/650] Data 0.254 (0.267) Elapsed 5m 33s (remain 1m 51s) Loss: 0.2057(0.2605) Grad: 6.2431  LR: 0.000003  
Epoch: [10][504/650] Data 0.265 (0.267) Elapsed 5m 45s (remain 1m 39s) Loss: 0.1882(0.2598) Grad: 6.3767  LR: 0.000003  
Epoch: [10][522/650] Data 0.272 (0.267) Elapsed 5m 58s (remain 1m 26s) Loss: 0.0920(0.2601) Grad: 3.5909  LR: 0.000003  
Epoch: [10][540/650] Data 0.270 (0.267) Elapsed 6m 10s (remain 1m 14s) Loss: 0.3816(0.2606) Grad: 10.6286  LR: 0.000003  
Epoch: [10][558/650] Data 0.262 (0.267) Elapsed 6m 22s (remain 1m 2s) Loss: 0.2080(0.2614) Grad: 6.6038  LR: 0.000003  
Epoch: [10][576/650] Data 0.272 (0.267) Elapsed 6m 35s (remain 0m 49s) Loss: 0.2094(0.2617) Grad: 6.9838  LR: 0.000003  
Epoch: [10][594/650] Data 0.264 (0.267) Elapsed 6m 47s (remain 0m 37s) Loss: 0.2218(0.2617) Grad: 10.5431  LR: 0.000003  
Epoch: [10][612/650] Data 0.267 (0.267) Elapsed 6m 59s (remain 0m 25s) Loss: 0.1747(0.2620) Grad: 5.9795  LR: 0.000003  
Epoch: [10][630/650] Data 0.271 (0.267) Elapsed 7m 12s (remain 0m 13s) Loss: 0.5635(0.2626) Grad: 16.3536  LR: 0.000003  
Epoch: [10][648/650] Data 0.272 (0.267) Elapsed 7m 24s (remain 0m 0s) Loss: 0.1982(0.2622) Grad: 6.5250  LR: 0.000003  
Epoch: [10][649/650] Data 0.273 (0.267) Elapsed 7m 25s (remain 0m 0s) Loss: 0.3294(0.2623) Grad: 10.0549  LR: 0.000003  
EVAL: [0/3883] Data 0.629 (0.629) Elapsed 0m 0s (remain 52m 12s) Loss: 0.1193(0.1193) 
EVAL: [18/3883] Data 0.001 (0.034) Elapsed 0m 3s (remain 13m 25s) Loss: 0.0927(0.0864) 
EVAL: [36/3883] Data 0.001 (0.018) Elapsed 0m 7s (remain 12m 19s) Loss: 0.0558(0.1018) 
EVAL: [54/3883] Data 0.001 (0.012) Elapsed 0m 10s (remain 11m 54s) Loss: 0.0136(0.1002) 
EVAL: [72/3883] Data 0.001 (0.010) Elapsed 0m 13s (remain 11m 40s) Loss: 0.2478(0.1131) 
EVAL: [90/3883] Data 0.002 (0.008) Elapsed 0m 16s (remain 11m 30s) Loss: 0.2071(0.1166) 
EVAL: [108/3883] Data 0.001 (0.007) Elapsed 0m 19s (remain 11m 22s) Loss: 0.1652(0.1190) 
EVAL: [126/3883] Data 0.001 (0.006) Elapsed 0m 22s (remain 11m 16s) Loss: 0.3295(0.1222) 
EVAL: [144/3883] Data 0.001 (0.005) Elapsed 0m 26s (remain 11m 10s) Loss: 0.1643(0.1177) 
EVAL: [162/3883] Data 0.001 (0.005) Elapsed 0m 29s (remain 11m 5s) Loss: 0.0381(0.1184) 
EVAL: [180/3883] Data 0.001 (0.004) Elapsed 0m 32s (remain 11m 1s) Loss: 0.0082(0.1154) 
EVAL: [198/3883] Data 0.001 (0.004) Elapsed 0m 35s (remain 10m 56s) Loss: 0.0300(0.1188) 
EVAL: [216/3883] Data 0.001 (0.004) Elapsed 0m 38s (remain 10m 52s) Loss: 0.0505(0.1194) 
EVAL: [234/3883] Data 0.001 (0.004) Elapsed 0m 41s (remain 10m 48s) Loss: 0.1241(0.1190) 
EVAL: [252/3883] Data 0.001 (0.003) Elapsed 0m 44s (remain 10m 44s) Loss: 0.0723(0.1161) 
EVAL: [270/3883] Data 0.001 (0.003) Elapsed 0m 48s (remain 10m 40s) Loss: 0.0485(0.1182) 
EVAL: [288/3883] Data 0.001 (0.003) Elapsed 0m 51s (remain 10m 37s) Loss: 0.3243(0.1197) 
EVAL: [306/3883] Data 0.001 (0.003) Elapsed 0m 54s (remain 10m 33s) Loss: 0.0484(0.1190) 
EVAL: [324/3883] Data 0.001 (0.003) Elapsed 0m 57s (remain 10m 29s) Loss: 0.0660(0.1188) 
EVAL: [342/3883] Data 0.001 (0.003) Elapsed 1m 0s (remain 10m 26s) Loss: 0.0069(0.1206) 
EVAL: [360/3883] Data 0.001 (0.003) Elapsed 1m 3s (remain 10m 22s) Loss: 0.1103(0.1205) 
EVAL: [378/3883] Data 0.001 (0.003) Elapsed 1m 6s (remain 10m 19s) Loss: 0.0549(0.1208) 
EVAL: [396/3883] Data 0.001 (0.003) Elapsed 1m 10s (remain 10m 15s) Loss: 0.2493(0.1207) 
EVAL: [414/3883] Data 0.001 (0.002) Elapsed 1m 13s (remain 10m 12s) Loss: 0.1204(0.1214) 
EVAL: [432/3883] Data 0.001 (0.002) Elapsed 1m 16s (remain 10m 9s) Loss: 0.0679(0.1215) 
EVAL: [450/3883] Data 0.001 (0.002) Elapsed 1m 19s (remain 10m 5s) Loss: 0.1444(0.1223) 
EVAL: [468/3883] Data 0.001 (0.002) Elapsed 1m 22s (remain 10m 2s) Loss: 0.0361(0.1234) 
EVAL: [486/3883] Data 0.001 (0.002) Elapsed 1m 25s (remain 9m 58s) Loss: 0.2899(0.1232) 
EVAL: [504/3883] Data 0.001 (0.002) Elapsed 1m 29s (remain 9m 55s) Loss: 0.0417(0.1238) 
EVAL: [522/3883] Data 0.001 (0.002) Elapsed 1m 32s (remain 9m 52s) Loss: 0.2516(0.1234) 
EVAL: [540/3883] Data 0.001 (0.002) Elapsed 1m 35s (remain 9m 48s) Loss: 0.0489(0.1222) 
EVAL: [558/3883] Data 0.001 (0.002) Elapsed 1m 38s (remain 9m 45s) Loss: 0.3048(0.1225) 
EVAL: [576/3883] Data 0.001 (0.002) Elapsed 1m 41s (remain 9m 42s) Loss: 0.0170(0.1217) 
EVAL: [594/3883] Data 0.001 (0.002) Elapsed 1m 44s (remain 9m 39s) Loss: 0.1348(0.1210) 
EVAL: [612/3883] Data 0.001 (0.002) Elapsed 1m 47s (remain 9m 35s) Loss: 0.0633(0.1207) 
EVAL: [630/3883] Data 0.001 (0.002) Elapsed 1m 51s (remain 9m 32s) Loss: 0.0349(0.1206) 
EVAL: [648/3883] Data 0.001 (0.002) Elapsed 1m 54s (remain 9m 29s) Loss: 0.0409(0.1206) 
EVAL: [666/3883] Data 0.001 (0.002) Elapsed 1m 57s (remain 9m 26s) Loss: 0.2241(0.1206) 
EVAL: [684/3883] Data 0.001 (0.002) Elapsed 2m 0s (remain 9m 22s) Loss: 0.1897(0.1207) 
EVAL: [702/3883] Data 0.001 (0.002) Elapsed 2m 3s (remain 9m 19s) Loss: 0.1043(0.1192) 
EVAL: [720/3883] Data 0.001 (0.002) Elapsed 2m 6s (remain 9m 16s) Loss: 0.0915(0.1186) 
EVAL: [738/3883] Data 0.001 (0.002) Elapsed 2m 9s (remain 9m 13s) Loss: 0.0054(0.1189) 
EVAL: [756/3883] Data 0.001 (0.002) Elapsed 2m 13s (remain 9m 9s) Loss: 0.1322(0.1190) 
EVAL: [774/3883] Data 0.002 (0.002) Elapsed 2m 16s (remain 9m 6s) Loss: 0.0417(0.1185) 
EVAL: [792/3883] Data 0.001 (0.002) Elapsed 2m 19s (remain 9m 3s) Loss: 0.1381(0.1190) 
EVAL: [810/3883] Data 0.001 (0.002) Elapsed 2m 22s (remain 9m 0s) Loss: 0.0174(0.1192) 
EVAL: [828/3883] Data 0.001 (0.002) Elapsed 2m 25s (remain 8m 56s) Loss: 0.0101(0.1194) 
EVAL: [846/3883] Data 0.001 (0.002) Elapsed 2m 28s (remain 8m 53s) Loss: 0.0361(0.1192) 
EVAL: [864/3883] Data 0.001 (0.002) Elapsed 2m 32s (remain 8m 50s) Loss: 0.2662(0.1200) 
EVAL: [882/3883] Data 0.001 (0.002) Elapsed 2m 35s (remain 8m 47s) Loss: 0.0053(0.1199) 
EVAL: [900/3883] Data 0.001 (0.002) Elapsed 2m 38s (remain 8m 44s) Loss: 0.2504(0.1194) 
EVAL: [918/3883] Data 0.001 (0.002) Elapsed 2m 41s (remain 8m 40s) Loss: 0.1164(0.1193) 
EVAL: [936/3883] Data 0.001 (0.002) Elapsed 2m 44s (remain 8m 37s) Loss: 0.2322(0.1194) 
EVAL: [954/3883] Data 0.001 (0.002) Elapsed 2m 47s (remain 8m 34s) Loss: 0.0407(0.1189) 
EVAL: [972/3883] Data 0.001 (0.002) Elapsed 2m 50s (remain 8m 31s) Loss: 0.0510(0.1191) 
EVAL: [990/3883] Data 0.001 (0.002) Elapsed 2m 54s (remain 8m 28s) Loss: 0.0965(0.1186) 
EVAL: [1008/3883] Data 0.001 (0.002) Elapsed 2m 57s (remain 8m 24s) Loss: 0.3469(0.1192) 
EVAL: [1026/3883] Data 0.001 (0.002) Elapsed 3m 0s (remain 8m 21s) Loss: 0.0226(0.1186) 
EVAL: [1044/3883] Data 0.001 (0.002) Elapsed 3m 3s (remain 8m 18s) Loss: 0.0166(0.1185) 
EVAL: [1062/3883] Data 0.001 (0.002) Elapsed 3m 6s (remain 8m 15s) Loss: 0.0777(0.1182) 
EVAL: [1080/3883] Data 0.001 (0.001) Elapsed 3m 9s (remain 8m 12s) Loss: 0.0513(0.1180) 
EVAL: [1098/3883] Data 0.001 (0.001) Elapsed 3m 12s (remain 8m 8s) Loss: 0.1287(0.1176) 
EVAL: [1116/3883] Data 0.001 (0.001) Elapsed 3m 16s (remain 8m 5s) Loss: 0.0837(0.1177) 
EVAL: [1134/3883] Data 0.001 (0.001) Elapsed 3m 19s (remain 8m 2s) Loss: 0.0082(0.1176) 
EVAL: [1152/3883] Data 0.001 (0.001) Elapsed 3m 22s (remain 7m 59s) Loss: 0.1557(0.1174) 
EVAL: [1170/3883] Data 0.001 (0.001) Elapsed 3m 25s (remain 7m 56s) Loss: 0.1254(0.1170) 
EVAL: [1188/3883] Data 0.001 (0.001) Elapsed 3m 28s (remain 7m 52s) Loss: 0.1065(0.1168) 
EVAL: [1206/3883] Data 0.001 (0.001) Elapsed 3m 31s (remain 7m 49s) Loss: 0.0211(0.1167) 
EVAL: [1224/3883] Data 0.001 (0.001) Elapsed 3m 35s (remain 7m 46s) Loss: 0.1735(0.1167) 
EVAL: [1242/3883] Data 0.001 (0.001) Elapsed 3m 38s (remain 7m 43s) Loss: 0.1099(0.1168) 
EVAL: [1260/3883] Data 0.001 (0.001) Elapsed 3m 41s (remain 7m 40s) Loss: 0.4532(0.1171) 
EVAL: [1278/3883] Data 0.001 (0.001) Elapsed 3m 44s (remain 7m 37s) Loss: 0.2501(0.1176) 
EVAL: [1296/3883] Data 0.001 (0.001) Elapsed 3m 47s (remain 7m 33s) Loss: 0.0106(0.1177) 
EVAL: [1314/3883] Data 0.001 (0.001) Elapsed 3m 50s (remain 7m 30s) Loss: 0.0335(0.1180) 
EVAL: [1332/3883] Data 0.001 (0.001) Elapsed 3m 53s (remain 7m 27s) Loss: 0.1450(0.1179) 
EVAL: [1350/3883] Data 0.001 (0.001) Elapsed 3m 57s (remain 7m 24s) Loss: 0.0164(0.1179) 
EVAL: [1368/3883] Data 0.001 (0.001) Elapsed 4m 0s (remain 7m 21s) Loss: 0.2317(0.1186) 
EVAL: [1386/3883] Data 0.001 (0.001) Elapsed 4m 3s (remain 7m 18s) Loss: 0.0773(0.1184) 
EVAL: [1404/3883] Data 0.001 (0.001) Elapsed 4m 6s (remain 7m 14s) Loss: 0.0714(0.1185) 
EVAL: [1422/3883] Data 0.001 (0.001) Elapsed 4m 9s (remain 7m 11s) Loss: 0.1176(0.1186) 
EVAL: [1440/3883] Data 0.001 (0.001) Elapsed 4m 12s (remain 7m 8s) Loss: 0.1125(0.1181) 
EVAL: [1458/3883] Data 0.001 (0.001) Elapsed 4m 16s (remain 7m 5s) Loss: 0.0483(0.1182) 
EVAL: [1476/3883] Data 0.001 (0.001) Elapsed 4m 19s (remain 7m 2s) Loss: 0.0728(0.1180) 
EVAL: [1494/3883] Data 0.001 (0.001) Elapsed 4m 22s (remain 6m 58s) Loss: 0.2303(0.1178) 
EVAL: [1512/3883] Data 0.001 (0.001) Elapsed 4m 25s (remain 6m 55s) Loss: 0.3369(0.1178) 
EVAL: [1530/3883] Data 0.001 (0.001) Elapsed 4m 28s (remain 6m 52s) Loss: 0.3106(0.1181) 
EVAL: [1548/3883] Data 0.002 (0.001) Elapsed 4m 31s (remain 6m 49s) Loss: 0.0873(0.1178) 
EVAL: [1566/3883] Data 0.001 (0.001) Elapsed 4m 34s (remain 6m 46s) Loss: 0.2376(0.1176) 
EVAL: [1584/3883] Data 0.001 (0.001) Elapsed 4m 38s (remain 6m 43s) Loss: 0.0064(0.1176) 
EVAL: [1602/3883] Data 0.001 (0.001) Elapsed 4m 41s (remain 6m 39s) Loss: 0.2791(0.1180) 
EVAL: [1620/3883] Data 0.001 (0.001) Elapsed 4m 44s (remain 6m 36s) Loss: 0.1936(0.1186) 
EVAL: [1638/3883] Data 0.001 (0.001) Elapsed 4m 47s (remain 6m 33s) Loss: 0.1808(0.1188) 
EVAL: [1656/3883] Data 0.001 (0.001) Elapsed 4m 50s (remain 6m 30s) Loss: 0.1033(0.1189) 
EVAL: [1674/3883] Data 0.001 (0.001) Elapsed 4m 53s (remain 6m 27s) Loss: 0.2458(0.1190) 
EVAL: [1692/3883] Data 0.001 (0.001) Elapsed 4m 56s (remain 6m 24s) Loss: 0.0182(0.1188) 
EVAL: [1710/3883] Data 0.001 (0.001) Elapsed 5m 0s (remain 6m 20s) Loss: 0.0635(0.1191) 
EVAL: [1728/3883] Data 0.001 (0.001) Elapsed 5m 3s (remain 6m 17s) Loss: 0.0499(0.1191) 
EVAL: [1746/3883] Data 0.001 (0.001) Elapsed 5m 6s (remain 6m 14s) Loss: 0.0914(0.1191) 
EVAL: [1764/3883] Data 0.001 (0.001) Elapsed 5m 9s (remain 6m 11s) Loss: 0.1734(0.1195) 
EVAL: [1782/3883] Data 0.001 (0.001) Elapsed 5m 12s (remain 6m 8s) Loss: 0.4139(0.1196) 
EVAL: [1800/3883] Data 0.001 (0.001) Elapsed 5m 15s (remain 6m 5s) Loss: 0.1868(0.1197) 
EVAL: [1818/3883] Data 0.001 (0.001) Elapsed 5m 19s (remain 6m 1s) Loss: 0.0133(0.1197) 
EVAL: [1836/3883] Data 0.002 (0.001) Elapsed 5m 22s (remain 5m 58s) Loss: 0.0377(0.1198) 
EVAL: [1854/3883] Data 0.001 (0.001) Elapsed 5m 25s (remain 5m 55s) Loss: 0.0036(0.1199) 
EVAL: [1872/3883] Data 0.001 (0.001) Elapsed 5m 28s (remain 5m 52s) Loss: 0.2232(0.1199) 
EVAL: [1890/3883] Data 0.001 (0.001) Elapsed 5m 31s (remain 5m 49s) Loss: 0.0216(0.1200) 
EVAL: [1908/3883] Data 0.001 (0.001) Elapsed 5m 34s (remain 5m 46s) Loss: 0.0850(0.1199) 
EVAL: [1926/3883] Data 0.001 (0.001) Elapsed 5m 37s (remain 5m 42s) Loss: 0.0108(0.1198) 
EVAL: [1944/3883] Data 0.001 (0.001) Elapsed 5m 41s (remain 5m 39s) Loss: 0.0075(0.1200) 
EVAL: [1962/3883] Data 0.001 (0.001) Elapsed 5m 44s (remain 5m 36s) Loss: 0.2115(0.1203) 
EVAL: [1980/3883] Data 0.001 (0.001) Elapsed 5m 47s (remain 5m 33s) Loss: 0.2181(0.1204) 
EVAL: [1998/3883] Data 0.001 (0.001) Elapsed 5m 50s (remain 5m 30s) Loss: 0.1364(0.1204) 
EVAL: [2016/3883] Data 0.001 (0.001) Elapsed 5m 53s (remain 5m 27s) Loss: 0.0129(0.1207) 
EVAL: [2034/3883] Data 0.001 (0.001) Elapsed 5m 56s (remain 5m 24s) Loss: 0.0212(0.1205) 
EVAL: [2052/3883] Data 0.001 (0.001) Elapsed 5m 59s (remain 5m 20s) Loss: 0.2810(0.1204) 
EVAL: [2070/3883] Data 0.001 (0.001) Elapsed 6m 3s (remain 5m 17s) Loss: 0.2497(0.1206) 
EVAL: [2088/3883] Data 0.001 (0.001) Elapsed 6m 6s (remain 5m 14s) Loss: 0.0148(0.1203) 
EVAL: [2106/3883] Data 0.001 (0.001) Elapsed 6m 9s (remain 5m 11s) Loss: 0.1064(0.1203) 
EVAL: [2124/3883] Data 0.001 (0.001) Elapsed 6m 12s (remain 5m 8s) Loss: 0.0633(0.1199) 
EVAL: [2142/3883] Data 0.001 (0.001) Elapsed 6m 15s (remain 5m 5s) Loss: 0.0671(0.1199) 
EVAL: [2160/3883] Data 0.001 (0.001) Elapsed 6m 18s (remain 5m 1s) Loss: 0.1183(0.1196) 
EVAL: [2178/3883] Data 0.001 (0.001) Elapsed 6m 22s (remain 4m 58s) Loss: 0.0083(0.1193) 
EVAL: [2196/3883] Data 0.001 (0.001) Elapsed 6m 25s (remain 4m 55s) Loss: 0.0462(0.1192) 
EVAL: [2214/3883] Data 0.001 (0.001) Elapsed 6m 28s (remain 4m 52s) Loss: 0.0874(0.1192) 
EVAL: [2232/3883] Data 0.001 (0.001) Elapsed 6m 31s (remain 4m 49s) Loss: 0.1958(0.1193) 
EVAL: [2250/3883] Data 0.001 (0.001) Elapsed 6m 34s (remain 4m 46s) Loss: 0.5086(0.1194) 
EVAL: [2268/3883] Data 0.001 (0.001) Elapsed 6m 37s (remain 4m 42s) Loss: 0.0716(0.1196) 
EVAL: [2286/3883] Data 0.001 (0.001) Elapsed 6m 40s (remain 4m 39s) Loss: 0.2039(0.1197) 
EVAL: [2304/3883] Data 0.001 (0.001) Elapsed 6m 44s (remain 4m 36s) Loss: 0.1937(0.1195) 
EVAL: [2322/3883] Data 0.001 (0.001) Elapsed 6m 47s (remain 4m 33s) Loss: 0.0081(0.1194) 
EVAL: [2340/3883] Data 0.001 (0.001) Elapsed 6m 50s (remain 4m 30s) Loss: 0.1219(0.1194) 
EVAL: [2358/3883] Data 0.001 (0.001) Elapsed 6m 53s (remain 4m 27s) Loss: 0.3142(0.1193) 
EVAL: [2376/3883] Data 0.001 (0.001) Elapsed 6m 56s (remain 4m 23s) Loss: 0.1997(0.1195) 
EVAL: [2394/3883] Data 0.001 (0.001) Elapsed 6m 59s (remain 4m 20s) Loss: 0.4206(0.1194) 
EVAL: [2412/3883] Data 0.001 (0.001) Elapsed 7m 2s (remain 4m 17s) Loss: 0.1616(0.1193) 
EVAL: [2430/3883] Data 0.001 (0.001) Elapsed 7m 6s (remain 4m 14s) Loss: 0.0279(0.1192) 
EVAL: [2448/3883] Data 0.001 (0.001) Elapsed 7m 9s (remain 4m 11s) Loss: 0.0847(0.1189) 
EVAL: [2466/3883] Data 0.001 (0.001) Elapsed 7m 12s (remain 4m 8s) Loss: 0.0852(0.1188) 
EVAL: [2484/3883] Data 0.001 (0.001) Elapsed 7m 15s (remain 4m 5s) Loss: 0.2764(0.1189) 
EVAL: [2502/3883] Data 0.001 (0.001) Elapsed 7m 18s (remain 4m 1s) Loss: 0.0444(0.1191) 
EVAL: [2520/3883] Data 0.001 (0.001) Elapsed 7m 21s (remain 3m 58s) Loss: 0.0269(0.1187) 
EVAL: [2538/3883] Data 0.001 (0.001) Elapsed 7m 25s (remain 3m 55s) Loss: 0.0688(0.1191) 
EVAL: [2556/3883] Data 0.001 (0.001) Elapsed 7m 28s (remain 3m 52s) Loss: 0.4862(0.1191) 
EVAL: [2574/3883] Data 0.001 (0.001) Elapsed 7m 31s (remain 3m 49s) Loss: 0.5755(0.1192) 
EVAL: [2592/3883] Data 0.001 (0.001) Elapsed 7m 34s (remain 3m 46s) Loss: 0.0426(0.1193) 
EVAL: [2610/3883] Data 0.003 (0.001) Elapsed 7m 37s (remain 3m 42s) Loss: 0.1562(0.1196) 
EVAL: [2628/3883] Data 0.001 (0.001) Elapsed 7m 40s (remain 3m 39s) Loss: 0.0435(0.1197) 
EVAL: [2646/3883] Data 0.001 (0.001) Elapsed 7m 43s (remain 3m 36s) Loss: 0.2066(0.1196) 
EVAL: [2664/3883] Data 0.001 (0.001) Elapsed 7m 47s (remain 3m 33s) Loss: 0.0542(0.1195) 
EVAL: [2682/3883] Data 0.001 (0.001) Elapsed 7m 50s (remain 3m 30s) Loss: 0.1454(0.1194) 
EVAL: [2700/3883] Data 0.001 (0.001) Elapsed 7m 53s (remain 3m 27s) Loss: 0.0557(0.1194) 
EVAL: [2718/3883] Data 0.001 (0.001) Elapsed 7m 56s (remain 3m 24s) Loss: 0.1880(0.1194) 
EVAL: [2736/3883] Data 0.001 (0.001) Elapsed 7m 59s (remain 3m 20s) Loss: 0.0352(0.1196) 
EVAL: [2754/3883] Data 0.001 (0.001) Elapsed 8m 2s (remain 3m 17s) Loss: 0.0789(0.1200) 
EVAL: [2772/3883] Data 0.001 (0.001) Elapsed 8m 5s (remain 3m 14s) Loss: 0.1448(0.1201) 
EVAL: [2790/3883] Data 0.001 (0.001) Elapsed 8m 9s (remain 3m 11s) Loss: 0.0223(0.1200) 
EVAL: [2808/3883] Data 0.001 (0.001) Elapsed 8m 12s (remain 3m 8s) Loss: 0.2040(0.1200) 
EVAL: [2826/3883] Data 0.001 (0.001) Elapsed 8m 15s (remain 3m 5s) Loss: 0.1215(0.1199) 
EVAL: [2844/3883] Data 0.001 (0.001) Elapsed 8m 18s (remain 3m 1s) Loss: 0.1209(0.1199) 
EVAL: [2862/3883] Data 0.001 (0.001) Elapsed 8m 21s (remain 2m 58s) Loss: 0.2697(0.1198) 
EVAL: [2880/3883] Data 0.001 (0.001) Elapsed 8m 24s (remain 2m 55s) Loss: 0.0137(0.1197) 
EVAL: [2898/3883] Data 0.001 (0.001) Elapsed 8m 28s (remain 2m 52s) Loss: 0.2341(0.1197) 
EVAL: [2916/3883] Data 0.001 (0.001) Elapsed 8m 31s (remain 2m 49s) Loss: 0.1367(0.1196) 
EVAL: [2934/3883] Data 0.001 (0.001) Elapsed 8m 34s (remain 2m 46s) Loss: 0.3458(0.1196) 
EVAL: [2952/3883] Data 0.001 (0.001) Elapsed 8m 37s (remain 2m 42s) Loss: 0.0121(0.1197) 
EVAL: [2970/3883] Data 0.001 (0.001) Elapsed 8m 40s (remain 2m 39s) Loss: 0.0445(0.1197) 
EVAL: [2988/3883] Data 0.001 (0.001) Elapsed 8m 43s (remain 2m 36s) Loss: 0.0625(0.1198) 
EVAL: [3006/3883] Data 0.001 (0.001) Elapsed 8m 46s (remain 2m 33s) Loss: 0.0730(0.1197) 
EVAL: [3024/3883] Data 0.001 (0.001) Elapsed 8m 50s (remain 2m 30s) Loss: 0.3513(0.1197) 
EVAL: [3042/3883] Data 0.001 (0.001) Elapsed 8m 53s (remain 2m 27s) Loss: 0.0354(0.1196) 
EVAL: [3060/3883] Data 0.001 (0.001) Elapsed 8m 56s (remain 2m 24s) Loss: 0.1735(0.1196) 
EVAL: [3078/3883] Data 0.001 (0.001) Elapsed 8m 59s (remain 2m 20s) Loss: 0.1837(0.1196) 
EVAL: [3096/3883] Data 0.001 (0.001) Elapsed 9m 2s (remain 2m 17s) Loss: 0.5497(0.1197) 
EVAL: [3114/3883] Data 0.001 (0.001) Elapsed 9m 5s (remain 2m 14s) Loss: 0.1262(0.1197) 
EVAL: [3132/3883] Data 0.001 (0.001) Elapsed 9m 9s (remain 2m 11s) Loss: 0.0709(0.1196) 
EVAL: [3150/3883] Data 0.001 (0.001) Elapsed 9m 12s (remain 2m 8s) Loss: 0.0749(0.1195) 
EVAL: [3168/3883] Data 0.001 (0.001) Elapsed 9m 15s (remain 2m 5s) Loss: 0.2820(0.1194) 
EVAL: [3186/3883] Data 0.001 (0.001) Elapsed 9m 18s (remain 2m 1s) Loss: 0.0034(0.1191) 
EVAL: [3204/3883] Data 0.001 (0.001) Elapsed 9m 21s (remain 1m 58s) Loss: 0.0324(0.1191) 
EVAL: [3222/3883] Data 0.001 (0.001) Elapsed 9m 24s (remain 1m 55s) Loss: 0.2705(0.1191) 
EVAL: [3240/3883] Data 0.001 (0.001) Elapsed 9m 27s (remain 1m 52s) Loss: 0.4488(0.1191) 
EVAL: [3258/3883] Data 0.001 (0.001) Elapsed 9m 31s (remain 1m 49s) Loss: 0.0225(0.1191) 
EVAL: [3276/3883] Data 0.001 (0.001) Elapsed 9m 34s (remain 1m 46s) Loss: 0.0930(0.1190) 
EVAL: [3294/3883] Data 0.001 (0.001) Elapsed 9m 37s (remain 1m 43s) Loss: 0.1752(0.1191) 
EVAL: [3312/3883] Data 0.001 (0.001) Elapsed 9m 40s (remain 1m 39s) Loss: 0.0049(0.1192) 
EVAL: [3330/3883] Data 0.001 (0.001) Elapsed 9m 43s (remain 1m 36s) Loss: 0.2752(0.1190) 
EVAL: [3348/3883] Data 0.001 (0.001) Elapsed 9m 46s (remain 1m 33s) Loss: 0.0474(0.1191) 
EVAL: [3366/3883] Data 0.001 (0.001) Elapsed 9m 49s (remain 1m 30s) Loss: 0.0093(0.1190) 
EVAL: [3384/3883] Data 0.001 (0.001) Elapsed 9m 53s (remain 1m 27s) Loss: 0.1289(0.1191) 
EVAL: [3402/3883] Data 0.001 (0.001) Elapsed 9m 56s (remain 1m 24s) Loss: 0.1870(0.1192) 
EVAL: [3420/3883] Data 0.001 (0.001) Elapsed 9m 59s (remain 1m 20s) Loss: 0.1693(0.1191) 
EVAL: [3438/3883] Data 0.001 (0.001) Elapsed 10m 2s (remain 1m 17s) Loss: 0.1884(0.1193) 
EVAL: [3456/3883] Data 0.001 (0.001) Elapsed 10m 5s (remain 1m 14s) Loss: 0.2983(0.1195) 
EVAL: [3474/3883] Data 0.001 (0.001) Elapsed 10m 8s (remain 1m 11s) Loss: 0.0266(0.1195) 
EVAL: [3492/3883] Data 0.001 (0.001) Elapsed 10m 12s (remain 1m 8s) Loss: 0.1362(0.1195) 
EVAL: [3510/3883] Data 0.001 (0.001) Elapsed 10m 15s (remain 1m 5s) Loss: 0.0612(0.1195) 
EVAL: [3528/3883] Data 0.001 (0.001) Elapsed 10m 18s (remain 1m 2s) Loss: 0.6071(0.1197) 
EVAL: [3546/3883] Data 0.001 (0.001) Elapsed 10m 21s (remain 0m 58s) Loss: 0.2709(0.1197) 
EVAL: [3564/3883] Data 0.001 (0.001) Elapsed 10m 24s (remain 0m 55s) Loss: 0.1413(0.1195) 
EVAL: [3582/3883] Data 0.001 (0.001) Elapsed 10m 27s (remain 0m 52s) Loss: 0.0880(0.1196) 
EVAL: [3600/3883] Data 0.001 (0.001) Elapsed 10m 30s (remain 0m 49s) Loss: 0.0672(0.1195) 
EVAL: [3618/3883] Data 0.001 (0.001) Elapsed 10m 34s (remain 0m 46s) Loss: 0.1433(0.1195) 
EVAL: [3636/3883] Data 0.001 (0.001) Elapsed 10m 37s (remain 0m 43s) Loss: 0.0203(0.1196) 
EVAL: [3654/3883] Data 0.001 (0.001) Elapsed 10m 40s (remain 0m 39s) Loss: 0.0481(0.1195) 
EVAL: [3672/3883] Data 0.001 (0.001) Elapsed 10m 43s (remain 0m 36s) Loss: 0.4344(0.1196) 
EVAL: [3690/3883] Data 0.002 (0.001) Elapsed 10m 46s (remain 0m 33s) Loss: 0.0268(0.1198) 
EVAL: [3708/3883] Data 0.001 (0.001) Elapsed 10m 49s (remain 0m 30s) Loss: 0.0414(0.1199) 
EVAL: [3726/3883] Data 0.001 (0.001) Elapsed 10m 52s (remain 0m 27s) Loss: 0.1762(0.1199) 
EVAL: [3744/3883] Data 0.001 (0.001) Elapsed 10m 56s (remain 0m 24s) Loss: 0.1394(0.1199) 
EVAL: [3762/3883] Data 0.001 (0.001) Elapsed 10m 59s (remain 0m 21s) Loss: 0.0218(0.1199) 
EVAL: [3780/3883] Data 0.001 (0.001) Elapsed 11m 2s (remain 0m 17s) Loss: 0.0093(0.1198) 
EVAL: [3798/3883] Data 0.001 (0.001) Elapsed 11m 5s (remain 0m 14s) Loss: 0.0059(0.1198) 
EVAL: [3816/3883] Data 0.001 (0.001) Elapsed 11m 8s (remain 0m 11s) Loss: 0.0142(0.1199) 
EVAL: [3834/3883] Data 0.001 (0.001) Elapsed 11m 11s (remain 0m 8s) Loss: 0.0298(0.1200) 
EVAL: [3852/3883] Data 0.001 (0.001) Elapsed 11m 15s (remain 0m 5s) Loss: 0.2243(0.1200) 
EVAL: [3870/3883] Data 0.001 (0.001) Elapsed 11m 18s (remain 0m 2s) Loss: 0.0365(0.1198) 
Epoch 10 - avg_train_loss: 0.2623  avg_val_loss: 0.1198  time: 1126s
Epoch 10 - AUC: 0.9672965135013212 - pAUC: 0.178370604283474
Epoch 10 - Save Best Score: 0.1784 Model
EVAL: [3882/3883] Data 0.001 (0.001) Elapsed 11m 20s (remain 0m 0s) Loss: 0.2894(0.1198) 
========== CV ==========
In [ ]:
 
In [ ]:
 
In [56]:
use Accelerator
  Cell In[56], line 1
    use Accelerator
        ^
SyntaxError: invalid syntax
In [ ]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    
    
            
    for step, (images, labels , file_name , data) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
#         Automated action while using HF accelerator
        images = images.to(device)
        labels = labels.to(device)

        batch_size = labels.size(0)
        meta = data[1].to(device)
        print(data[1])
        y_preds = model(images , meta)


# debug
#         print(torch.nn.functional.softmax(y_preds, dim=1))
#         print(labels)
        loss = criterion(y_preds, labels)
        
        # record loss
        losses.update(loss.item(), batch_size)
        # Backward pass with accelerator
        accelerator.backward(loss)

        # https://huggingface.co/docs/accelerate/package_reference/accelerator 
        # read more about why you need to put optimizer.step() after the gradient clipping here else the grad_norm will be always 0 (always even without accelerator)
        if accelerator.sync_gradients:
            grad_norm = accelerator.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
            
        # optimizer step
        optimizer.step()
        optimizer.zero_grad()                
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
#             optimizer.step()
#             optimizer.zero_grad()
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(
                   epoch+1, step, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(train_loader)),
                   grad_norm=grad_norm,
                   lr=scheduler.get_lr()[0],
                   ))


        wandb.log({
    "Train Loss": losses.val,
    "Step": step,
    "Gradient Norm": grad_norm,
    "Learning Rate": scheduler.get_lr()[0], # Add this line to log the learning rate
})
    return losses.avg


def valid_fn(valid_loader, model, criterion , accelerator):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    
    model.eval()
    preds = []
    start = end = time.time()
    
    
    gathered_labels = []
    gathered_file_names = []
    gathered_y_preds = []
    for step, (images, labels , file_name , data) in enumerate(valid_loader):
        data_time.update(time.time() - end)
#         images = images.to(accelerator.device)
#         labels = labels.to(accelerator.device)
        batch_size = labels.size(0)
        

        
        meta = data[1].to(device)
        with torch.no_grad():
            y_preds = model(images , meta)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        
        
        y_preds = torch.nn.functional.softmax(y_preds, dim=1)
        
        
        # Gather needed items
        ####################################
        # checkout gather_for_metrics https://huggingface.co/docs/accelerate/quicktour
        gathered_y_preds.append(accelerator.gather_for_metrics(y_preds).cpu().numpy())
        gathered_labels.append(accelerator.gather_for_metrics(labels).cpu().numpy())
        file_name_array = np.array(file_name)
        gathered_file_names.append(np.array(accelerator.gather_for_metrics(file_name_array)))
        ####################################
        
        preds.append(y_preds.to('cpu').numpy())
#         preds.append(y_preds.numpy())
#         if (step + 1) % CFG.gradient_accumulation_steps == 0:
#             pass

        batch_time.update(time.time() - end)
        end = time.time()
        
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
        
        wandb.log({
            "Val Loss ": losses.val,
            "Val Step": step,
        }) 
        
        
    gathered_y_preds = np.concatenate(gathered_y_preds)
    gathered_labels = np.concatenate(gathered_labels)
    gathered_file_names = np.concatenate(gathered_file_names)
    predictions = np.concatenate(preds)
    
    
    return losses.avg, predictions ,gathered_labels , gathered_file_names , gathered_y_preds
In [ ]:
def train_loop(folds, fold , model, accelerator): #train + validation

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)



#     print(f'valid shape : {valid_folds.shape}')
    
    train_dataset = TrainDataset(train_folds, 
                                 transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, 
                                 transform=get_transforms(data='valid'))

    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers , 
                              pin_memory=True, drop_last=True )
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False )
    

        
        
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    
    
    
    
#     model.to(iceice)
    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False , )
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # apex
    # ====================================================
    if CFG.apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # ====================================================
    # loop
    # ====================================================
    if CFG.weighted_loss : 
        criterion = nn.CrossEntropyLoss(weight = class_weights_tensor.to(accelerator.device))
    else : 
        criterion = nn.CrossEntropyLoss()
        
        
    
    
    model, optimizer, train_loader, scheduler, valid_loader = accelerator.prepare(
        model, optimizer, train_loader, scheduler, valid_loader
    )
    
    
    best_score = 50000
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        torch.cuda.empty_cache()
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler , accelerator)
        
        
#         accelerator.wait_for_everyone()
#         unwrapped_model = accelerator.unwrap_model(model)
#         LOGGER.info(f'Epoch {epoch+1} - Saving model before evaluation.')
#         accelerator.save(unwrapped_model.state_dict(), OUTPUT_DIR+f'{CFG.model_name}_epoch{epoch}_pre_eval.pth')
        
        
        # eval
        avg_val_loss, preds , gathered_labels , gathered_file_names , gathered_preds = valid_fn(valid_loader, model, criterion, accelerator)

        # Convert predictions to a tensor if they are in NumPy format
        if isinstance(preds, np.ndarray):
            preds = torch.tensor(preds).to(accelerator.device)
        
        
     

        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
            
#         print(f"where {gathered_preds[:, 1]}")
#         print(f"gathered true: {len(gathered_labels)}, gathered pred {len(gathered_preds[:, 1])}, true shape {len(valid_folds['img_name'].values)} , new technique : {len(gathered_file_names)}")
        
    
        # scoring
        score = get_score(gathered_labels, gathered_preds)
        score2 = roc_auc_score(gathered_labels, gathered_preds[:, 1]) 
        wandb.log({"pAUC" : score2})
        
        
        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s') #.info makes the msg shows in red cadre
        LOGGER.info(f'Epoch {epoch+1} - LogLoss: {score} - AUC: {score2}')
        
        # Unwraps the model from the additional layer possible added by prepare(). Useful before saving the model.
        accelerator.wait_for_everyone() 
        model = accelerator.unwrap_model(model)
        
        

    
        oof_pred = pd.DataFrame({"True" : gathered_labels , "Predicted" : gathered_preds[:, 1] , "img_name" : gathered_file_names  })
        oof_pred.to_csv(f'OOF_pred_fold{fold}_epoch_{epoch}.csv' , index =False)
        
        
        print("Checking files names of the oof ...")
        if (valid_folds['img_name'].values == oof_pred['img_name'].values).all() and (valid_folds['target'].values == oof_pred['True'].values).all() : 
            LOGGER.info('Successful OOF predictions released!')
        else : 
            LOGGER.info('There is a problem with the released OOF predictions.')
            
            
            
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            accelerator.save(model,OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    
#     check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
    #valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    #valid_folds['preds'] = check_point['preds'].argmax(1)

    return
In [ ]:
def init_model(accelerator,  pretrained=True):
    with accelerator.main_process_first():
        print(f"Initializing {CFG.model_name} model")
        model = CustomResNext(CFG.model_name,num_classes = 0 ,n_meta_features=len(meta_features), pretrained = True) 
    return model
In [ ]:
def inference(model, states, test_loader):
    

    
# Used for manual data parallesim but you have to address some problems caused by it too
#     model = nn.DataParallel(model)
    
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    
    for i, (images) in tk0:
#         images = images.to(device)
        avg_preds = []
        for state in states:
            
            model.load_state_dict(state['model'])
            model.eval()
            model, test_loader = accelerator.prepare(model, test_loader)
            with torch.no_grad():
                y_preds = model(images)
            avg_preds.append(y_preds.to('cpu').numpy())
            
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs
In [ ]:
def main():

    """
    Prepare: 1.train  2.test  3.submission  4.folds
    """
    
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
    else:
        print("Only one GPU available.")

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.5f}')

    if CFG.train:
        # move from 32-bit floating point presicion to 16-bit precision / gradient accumulation / Multiple Gpu training and managment of the data flow
        accelerator = Accelerator(gradient_accumulation_steps=CFG.gradient_accumulation_steps , 
                            mixed_precision='fp16') #bf16
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
#                 print(f'Starting fold {fold} ...')
                model = init_model(accelerator)
                train_loop(folds, fold ,model,  accelerator )
                #oof_df = pd.concat([oof_df, _oof_df])
                #LOGGER.info(f"========== fold: {fold} result ==========")
                #get_result(_oof_df)
        # CV result
        LOGGER.info(f"========== CV ==========")
        #get_result(oof_df)
        # save result
        #oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
    
    if CFG.inference:
        # inference
        model = CustomResNext(CFG.model_name, pretrained=False)
        states = [torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth') for fold in CFG.trn_fold]
        test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
        test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                                 num_workers=CFG.num_workers, pin_memory=True)
        predictions = inference(model, states, test_loader)
        # submission
        print(predictions)
        test['label'] = torch.nn.functional.softmax(torch.from_numpy(predictions), dim=1).numpy()[:,1]
        print(test['label'])
        test[['img_name', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)

gathered true: 280, gathered pred 280, true shape 267gathered true: 280, gathered pred 280, true shape 267

In [ ]:
if __name__ == '__main__': 
    notebook_launcher(main , num_processes=2 , )
In [ ]:
# torch.load("/kaggle/working/efficientnet_b4_fold0_best.pth")

Training with gradient cum = 2 :¶

Epoch: [1][0/7296] Data 4.836 (4.836) Elapsed 0m 12s (remain 1552m 8s) Loss: 1.2454(1.2454) Grad: 3.4937
Epoch: [1][0/7296] Data 4.755 (4.755) Elapsed 0m 12s (remain 1553m 48s) Loss: 1.5479(1.5479) Grad: 3.4937
Epoch: [1][20/7296] Data 0.020 (0.444) Elapsed 0m 36s (remain 210m 13s) Loss: 0.1604(0.4781) Grad: 0.2797
Epoch: [1][20/7296] Data 0.014 (0.546) Elapsed 0m 36s (remain 210m 18s) Loss: 0.2713(0.5126) Grad: 0.2797
Epoch: [1][40/7296] Data 0.008 (0.538) Elapsed 1m 7s (remain 197m 57s) Loss: 0.3300(0.4262) Grad: 0.6303
Epoch: [1][40/7296] Data 0.015 (0.525) Elapsed 1m 7s (remain 198m 15s) Loss: 0.6318(0.4116) Grad: 0.6303
Epoch: [1][60/7296] Data 0.033 (0.379) Elapsed 1m 35s (remain 188m 44s) Loss: 0.3123(0.3905) Grad: 0.3299
Epoch: [1][60/7296] Data 0.016 (0.523) Elapsed 1m 35s (remain 188m 47s) Loss: 0.3251(0.3737) Grad: 0.3299
Epoch: [1][80/7296] Data 0.039 (0.446) Elapsed 2m 4s (remain 185m 3s) Loss: 0.3523(0.3495) Grad: 0.3777
Epoch: [1][80/7296] Data 0.010 (0.428) Elapsed 2m 4s (remain 185m 8s) Loss: 0.2265(0.3773) Grad: 0.3777

Training with gradient cum = 1 (normal training) :¶

Epoch: [1][0/7296] Data 4.909 (4.909) Elapsed 0m 12s (remain 1578m 30s) Loss: 1.5479(1.5479) Grad: inf
Epoch: [1][0/7296] Data 5.003 (5.003) Elapsed 0m 12s (remain 1580m 6s) Loss: 1.2454(1.2454) Grad: inf
Epoch: [1][20/7296] Data 0.009 (0.518) Elapsed 0m 35s (remain 205m 33s) Loss: 0.1662(0.5732) Grad: 0.5712
Epoch: [1][20/7296] Data 0.018 (0.462) Elapsed 0m 35s (remain 205m 46s) Loss: 0.2877(0.5936) Grad: 0.5712
Epoch: [1][40/7296] Data 0.029 (0.573) Elapsed 1m 5s (remain 193m 58s) Loss: 0.6414(0.4690) Grad: 1.3110
Epoch: [1][40/7296] Data 0.016 (0.480) Elapsed 1m 5s (remain 194m 1s) Loss: 0.3384(0.4711) Grad: 1.3110
Epoch: [1][60/7296] Data 0.029 (0.568) Elapsed 1m 33s (remain 185m 8s) Loss: 0.3308(0.4143) Grad: 0.6530
Epoch: [1][60/7296] Data 0.020 (0.435) Elapsed 1m 33s (remain 185m 11s) Loss: 0.3063(0.4213) Grad: 0.6530
Epoch: [1][80/7296] Data 0.009 (0.367) Elapsed 2m 2s (remain 181m 33s) Loss: 0.2340(0.4010) Grad: 0.7946
Epoch: [1][80/7296] Data 0.023 (0.574) Elapsed 2m 2s (remain 181m 33s) Loss: 0.3610(0.3808) Grad: 0.7946

* Training without gradient cum is a little bit faster¶

In [ ]:
def print_library_versions():
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"torchvision version: {torchvision.__version__}")
#     print(f"accelerate version: {Accelerator.__version__}")
    print(f"NumPy version: {np.__version__}")
    print(f"Pandas version: {pd.__version__}")