Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start setting up the improved W&B integration #1948

Merged
merged 36 commits into from
Feb 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3530aa5
Add helper functions for wandb and artifacts
AyushExel Jan 15, 2021
1ef16b1
cleanup
AyushExel Jan 15, 2021
bd37ac7
Reorganize files
AyushExel Jan 15, 2021
7cd5623
Update wandb_utils.py
glenn-jocher Jan 17, 2021
d8daa64
Update log_dataset.py
glenn-jocher Jan 17, 2021
fc7d8f9
Reorganize and update dataloader call
AyushExel Jan 18, 2021
9b8f46a
yaml.SafeLoader
glenn-jocher Jan 20, 2021
2c67ba1
PEP8 reformat
glenn-jocher Jan 20, 2021
9ffb887
remove redundant checks
glenn-jocher Jan 20, 2021
1eca722
Add helper functions for wandb and artifacts
AyushExel Jan 15, 2021
ad8408f
cleanup
AyushExel Jan 15, 2021
8b039b3
Reorganize files
AyushExel Jan 15, 2021
0b715eb
Update wandb_utils.py
glenn-jocher Jan 17, 2021
58ae23f
Update log_dataset.py
glenn-jocher Jan 17, 2021
6671347
Reorganize and update dataloader call
AyushExel Jan 18, 2021
bbc5271
yaml.SafeLoader
glenn-jocher Jan 20, 2021
9bbecc8
PEP8 reformat
glenn-jocher Jan 20, 2021
ceb63cd
remove redundant checks
glenn-jocher Jan 20, 2021
2529fb7
Update util files
AyushExel Jan 21, 2021
efdcd7e
Update wandb_utils.py
AyushExel Jan 21, 2021
0d84870
Remove word size
AyushExel Jan 21, 2021
93219b9
Change path of labels.zip
AyushExel Jan 21, 2021
ac8fad2
remove unused imports
glenn-jocher Jan 23, 2021
e40b817
remove --rect
glenn-jocher Jan 23, 2021
0e21989
log_dataset.py cleanup
glenn-jocher Jan 23, 2021
3e53865
log_dataset.py cleanup2
glenn-jocher Jan 23, 2021
4ebfb83
wandb_utils.py cleanup
glenn-jocher Jan 23, 2021
58616a6
remove redundant id_count
glenn-jocher Jan 23, 2021
a8168c3
wandb_utils.py cleanup2
glenn-jocher Jan 23, 2021
1bb261d
rename cls
glenn-jocher Jan 23, 2021
850b59d
use pathlib for zip
glenn-jocher Jan 23, 2021
94b7631
rename dataloader to dataset
glenn-jocher Jan 23, 2021
b959a02
Change import order
AyushExel Jan 27, 2021
4e9a54c
Remove redundant code
AyushExel Jan 30, 2021
28ad4ab
remove unused import
AyushExel Jan 30, 2021
5651225
remove unused imports
glenn-jocher Feb 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add helper functions for wandb and artifacts
  • Loading branch information
AyushExel committed Feb 1, 2021
commit 3530aa5740860bcd48f460001de383c89aa41b0c
86 changes: 86 additions & 0 deletions log_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import argparse
import logging
import os
import random
import time
from pathlib import Path
from threading import Thread
from warnings import warn

import torch
import yaml
import wandb
from utils.wandb_utils import WandbLogger

from utils.general import check_dataset
from utils.torch_utils import torch_distributed_zero_first
from utils.datasets import create_dataloader

WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'

def create_dataset_artifact(opt):
with open(opt.data) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict
wandb_logger = WandbLogger(opt, '', None, data_dict, job_type='create_dataset')
# Hyperparameters
with open(opt.hyp) as f:
hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps
if 'box' not in hyp:
warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' %
(opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120'))
hyp['box'] = hyp.pop('giou')

with torch_distributed_zero_first(-1):
check_dataset(data_dict) # check
train_path = data_dict['train']
test_path = data_dict['val']
nc, names = (1, ['item']) if opt.single_cls else (int(data_dict['nc']), data_dict['names'])
imgsz, batch_size = opt.img_size, opt.batch_size
assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check
trainloader = create_dataloader(train_path, imgsz, batch_size, 32, opt,
hyp=hyp, cache=opt.cache_images, rect=opt.rect, rank=-1,
world_size=1, workers=opt.workers)[0]

testloader = create_dataloader(test_path, imgsz, batch_size, 32, opt, # testloader
hyp=hyp, cache=opt.cache_images, rect=True,
rank=-1, world_size=1, workers=opt.workers, pad=0.5)[0]
names_to_ids = {k: v for k, v in enumerate(names)}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
wandb_logger.log_dataset_artifact(trainloader, device, names_to_ids, name='train')
wandb_logger.log_dataset_artifact(testloader, device, names_to_ids, name='val')
#Update/Create new config file with links to artifact
data_dict['train'] = WANDB_ARTIFACT_PREFIX + opt.project + '/train'
data_dict['val'] = WANDB_ARTIFACT_PREFIX + opt.project + '/val'
ouput_data_config = opt.data if opt.overwrite_config else opt.data.replace('.','_wandb.')
data_dict.pop('download',None) #Don't download the original dataset. Use artifacts
with open(ouput_data_config, 'w') as fp:
yaml.dump(data_dict, fp)
print("New Config file => ", ouput_data_config)



if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', type=str, default='yolov5', help='name of W&B Project')
parser.add_argument('--img-size', nargs='+', type=int, default=640, help='[train, test] image sizes')
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
parser.add_argument('--overwrite_config', action='store_true', help='replace the origin data config file')
opt = parser.parse_args()

create_dataset_artifact(opt)








187 changes: 187 additions & 0 deletions utils/wandb_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from datetime import datetime
import shutil
from pathlib import Path
import os
import stat
import logging

import tqdm
import torch
import json
from utils.general import xywh2xyxy
logger = logging.getLogger(__name__)


try:
import wandb
except ImportError:
wandb = None
print("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")

WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'

def remove_prefix(from_string, prefix):
return from_string[len(prefix):]

class WandbLogger():
def __init__(self, opt, name, run_id, data_dict, job_type='Training'):
self.wandb = wandb
if self.wandb:
self.wandb_run = wandb.init(config=opt, resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
name=name,
job_type=job_type,
id=run_id)
else:
self.wandb_run = None
if job_type == 'Training':
self.setup_training(opt, data_dict)
if opt.bbox_interval == -1:
opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else opt.epochs
if opt.save_period == -1:
opt.save_period = (opt.epochs // 10) if opt.epochs > 10 else opt.epochs

def setup_training(self, opt, data_dict):
self.log_dict = {}
self.train_artifact_path, self.trainset_artifact = self.download_dataset_artifact(data_dict['train'], opt.artifact_alias)
self.test_artifact_path, self.testset_artifact = self.download_dataset_artifact(data_dict['val'], opt.artifact_alias)
self.result_artifact, self.result_table, self.weights = None, None, None
if self.train_artifact_path is not None:
train_path = self.train_artifact_path + '/data/images/'
data_dict['train'] = train_path
if self.test_artifact_path is not None:
test_path = self.test_artifact_path + '/data/images/'
data_dict['val'] = test_path
self.result_artifact = wandb.Artifact("run_"+wandb.run.id+"_progress", "evaluation")
self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
if opt.resume_from_artifact:
modeldir, _ = self.download_model_artifact(opt.resume_from_artifact)
if modeldir:
self.weights = modeldir + "/best.pt"
opt.weights = self.weights


def download_dataset_artifact(self,path, alias):
if path.startswith(WANDB_ARTIFACT_PREFIX):
dataset_artifact = wandb.use_artifact(remove_prefix(path,WANDB_ARTIFACT_PREFIX)+":"+alias)
if dataset_artifact is None:
logger.error('Error: W&B dataset artifact doesn\'t exist')
raise ValueError('Artifact doesn\'t exist')
datadir = dataset_artifact.download()
labels_zip = datadir+"/data/labels.zip"
shutil.unpack_archive(labels_zip, datadir+'/data/labels', 'zip')
print("Downloaded dataset to : ", datadir)
return datadir, dataset_artifact
return None, None

def download_model_artifact(self,name):
model_artifact = wandb.use_artifact(name+":latest")
if model_artifact is None:
logger.error('Error: W&B model artifact doesn\'t exist')
raise ValueError('Artifact doesn\'t exist')
modeldir = model_artifact.download()
print("Downloaded model to : ", modeldir)
return modeldir, model_artifact

def log_model(self, path, opt, epoch):
datetime_suffix = datetime.today().strftime('%Y-%m-%d-%H-%M-%S')
model_artifact = wandb.Artifact('run_'+wandb.run.id+'_model', type='model', metadata={
'original_url': str(path),
'epoch': epoch+1,
'save period': opt.save_period,
'project': opt.project,
'datetime': datetime_suffix
})
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
model_artifact.add_file(str(path / 'best.pt'), name='best.pt')
wandb.log_artifact(model_artifact)

if epoch+1 == opt.epochs:
model_artifact = wandb.Artifact('final_model', type='model', metadata={
'run_id': wandb.run.id,
'datetime': datetime_suffix
})
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
model_artifact.add_file(str(path / 'best.pt'), name='best.pt')
wandb.log_artifact(model_artifact)
print("Saving model artifact on epoch ", epoch+1)

def log_dataset_artifact(self, dataloader, device, class_to_id, name='dataset'):
artifact = wandb.Artifact(name=name, type="dataset")
image_path = dataloader.dataset.path
artifact.add_dir(image_path,name='data/images')
table = wandb.Table(
columns=["id", "train_image", "Classes"]
)
id_count = 0
class_set = wandb.Classes([{'id':id , 'name':name} for id,name in class_to_id.items()])
for batch_i, (img, targets, paths, shapes) in enumerate(dataloader):
targets = targets.to(device)
nb, _, height, width = img.shape # batch size, channels, height, width
targets[:,2:] = (xywh2xyxy(targets[:,2:].view(-1, 4)))
for si, _ in enumerate(img):
height, width = shapes[si][0]
labels = targets[targets[:, 0] == si]
labels[:,2:] *= torch.Tensor([width, height, width, height]).to(device)
labels = labels[:, 1:]
box_data = []
img_classes = {}
for cls, *xyxy in labels.tolist():
class_id = int(cls)
box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": class_id,
"box_caption": "%s" % (class_to_id[class_id]),
"scores": {"acc": 1},
"domain": "pixel"})
img_classes[class_id] = class_to_id[class_id]
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
table.add_data(id_count,wandb.Image(paths[si], classes=class_set, boxes=boxes), json.dumps(img_classes))
id_count = id_count+1
artifact.add(table, name)
label_path = image_path.replace('images','labels')
# Workaround for: Unable to log empty txt files via artifacts
if not os.path.isfile(name+'_labels.zip'): # make_archive won't check if file exists
shutil.make_archive(name+'_labels', 'zip', label_path)
artifact.add_file(name+'_labels.zip', name='data/labels.zip')
wandb.log_artifact(artifact)
print("Saving data to W&B...")

def log(self, log_dict):
if self.wandb_run:
for key, value in log_dict.items():
self.log_dict[key] = value

def end_epoch(self):
if self.wandb_run and self.log_dict:
wandb.log(self.log_dict)
self.log_dict = {}

def finish_run(self):
if self.wandb_run:
if self.result_artifact:
print("Add Training Progress Artifact")
self.result_artifact.add(self.result_table, 'result')
train_results = wandb.JoinedTable(self.testset_artifact.get("val"), self.result_table, "id")
self.result_artifact.add(train_results, 'joined_result')
wandb.log_artifact(self.result_artifact)
if self.log_dict:
wandb.log(self.log_dict)
wandb.run.finish()

# !!!!! WIP !!!!
def add_to_training_progress(self, pred, class_map, class_dict_list, epoch, index):
# Painfully slow!!! investigate. 1) pred too large? replace class map dicts with calss variable?
if self.wandb_run and self.result_table and self.testset_artifact:
box_data = []
testset_table = self.testset_artifact.get("val").data
for *xyxy, conf, cls in pred.tolist():
if conf >= 0.175: # Arbitrary conf, make this an argparse
box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"box_caption": "%s %.3f" % (class_map[cls], conf),
"scores": {"class_score": conf},
"domain": "pixel"})
boxes = {"predictions": {"box_data": box_data, "class_labels": class_map}} # inference-space
self.result_table.add_data(epoch,
index,
wandb.Image(testset_table[index][1], boxes=boxes, classes=class_dict_list))