mmdetection training with clearml hooks
- ClearML API keys set up
- Training
- Evaluation (imbalanced as well)
- Direct S3 reading
- Hyperparam search
cd tools && ./dist_train_clearml-example.sh <config file> <num gpus>
CONFIG=../configs/coco_mini/coco_mini.py
GPUS=1
PORT=${PORT:-29500}
export AWS_ENDPOINT_URL=https://play.min.io
export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID
export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY
export CERT_PATH=
python3 dist_run.py --nproc_per_node=$GPUS --master_port=$PORT \
--clml-run-locally --clml-proj mmdet --clml-task-name coco_mini_train --download-models 'resnet50_msra-5891d200.pth' --s3-models-bucket mmdet-wts --s3-models-path '' --download-data coco_mini --s3-data-bucket coco --s3-data-path '' \
$(dirname "$0")/train.py ../configs/coco_mini/coco_mini.py --launcher pytorch --clearml ${@:3}
Expects following environment variables to be set:
- AWS_ENDPOINT_URL
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- CERT_PATH (optional)
- CERT_DL_URL (optional)
See coco_mini_s3_direct.py for example config file.
# with the same env var exports
python dist_run.py --nproc_per_node=$GPUS --master_port=$PORT \
--clml-run-locally --clml-proj mmdet --clml-task-name coco_mini_train_s3_direct --download-models 'resnet50_msra-5891d200.pth' --s3-models-bucket mmdet-wts --s3-models-path '' --s3-direct-read --download-data coco_mini/train.json coco_mini/val.json --s3-data-bucket coco --s3-data-path '' \
$(dirname "$0")/train.py ../configs/coco_mini/coco_mini_s3_direct.py --launcher pytorch --clearml ${@:3}
python3 test.py ../configs/coco_mini/coco_mini.py weights/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth --fuse-conv-bn --write-result --eval bbox --show-dir eval_viz --eval-options classwise=True
python3 dist_run.py --nproc_per_node=1 --master_port=29500 --clml-run-locally --clml-proj mmdet --clml-task-name coco_mini-test --skip-s3 \
test.py ../configs/coco_mini/coco_mini.py weights/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth --clearml --fuse-conv-bn --write-result --eval bbox --show-dir eval_viz --eval-options classwise=True
python3 dist_run.py --nproc_per_node=1 --master_port=29500 --clml-run-locally --clml-proj mmdet --clml-task-name coco_mini-test --skip-s3 \
test.py ../configs/coco_mini/coco_mini.py weights/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth --clearml --fuse-conv-bn --write-result --eval bbox --sub-eval datasets/coco_mini/sampled/ --show-dir eval_viz