Automatic eval using autotune

zhangkaiitugithub · Nov 7, 2021 · f0888aa · f0888aa
1 parent f56e4cd
commit f0888aa
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -22,3 +22,6 @@ See `opt/render_imgs.py`
 ## Automatic hypertuning
 
 See `opt/autotune.py`. Configs in `opt/tasks/*.json`
+
+Automatic eval:
+`python autotune.py -g '<space delimited GPU ids>' tasks/eval.json`. Configs in `opt/tasks/*.json`
diff --git a/opt/autotune.py b/opt/autotune.py
@@ -20,13 +20,17 @@
 parser.add_argument("--gpus", "-g", type=str, required=True,
                             help="space delimited GPU id list (global id in nvidia-smi, "
                                  "not considering CUDA_VISIBLE_DEVICES)")
+parser.add_argument('--eval', action='store_true', default=False,
+                   help='evaluation mode (run the render_imgs script)')
 args = parser.parse_args()
 
 PSNR_FILE_NAME = 'test_psnr.txt'
 
 def run_exp(env, train_dir, data_dir, flags):
-    opt_base_cmd = [
-        "python", "-u", "opt.py", "--tune_mode",
+    opt_base_cmd = [ "python", "-u", "opt.py", "--tune_mode" ]
+
+    opt_base_cmd += ["--tune_nosave"]
+    opt_base_cmd += [
         "-t", train_dir,
         data_dir
     ]
@@ -43,13 +47,29 @@ def run_exp(env, train_dir, data_dir, flags):
         opt_ret = subprocess.check_output(opt_cmd, shell=True, env=env).decode(
                 sys.stdout.encoding)
     except subprocess.CalledProcessError:
-        print('Error occurred while running exp', train_dir)
+        print('Error occurred while running OPT for exp', train_dir)
         return
     with open(log_file_path, 'w') as f:
         f.write(opt_ret)
-    test_stats = [eval(x.split('eval stats:')[-1].strip())
-                  for x in opt_ret.split('\n') if
-                  x.startswith('eval stats: ')]
+
+    if args.eval:
+        eval_base_cmd = [
+            "python", "-u", "render_imgs.py",
+            path.join(train_dir, 'ckpt.npz'),
+            data_dir
+        ]
+        try:
+            eval_ret = subprocess.check_output(eval_base_cmd, shell=True, env=env).decode(
+                    sys.stdout.encoding)
+        except subprocess.CalledProcessError:
+            print('Error occurred while running EVAL for exp', train_dir)
+            return
+        test_stats = [{x.split(':')[0].strip(): float(x.split(':')[1])
+                      for x in eval_ret.strip().split('\n')[-3:] if ':' in x}]
+    else:
+        test_stats = [eval(x.split('eval stats:')[-1].strip())
+                      for x in opt_ret.split('\n') if
+                      x.startswith('eval stats: ')]
     if len(test_stats) == 0:
         print('note: invalid config or crash')
         final_test_psnr = 0.0
@@ -133,6 +153,10 @@ def recursive_replace(data, variables):
     data_root = path.expanduser(tasks_file['data_root'])  # Required
     train_root = path.expanduser(tasks_file['train_root'])  # Required
     base_flags = tasks_file.get('base_flags', [])
+
+    if 'eval' in tasks_file:
+        args.eval = tasks_file['eval']
+        print('Eval mode?', args.eval)
     pqueue = Queue()
 
     leaderboard_path = path.join(train_root, 'leaderboard.txt')

diff --git a/opt/opt.py b/opt/opt.py
@@ -42,13 +42,15 @@
                         type=str,
                         default=
                         #  "[[128, 128, 128], [256, 256, 256], [512, 512, 512], [768, 768, 768]]",
-                        "[[128, 128, 128], [256, 256, 256], [512, 512, 512]]",
+                        #  "[[128, 128, 128], [256, 256, 256], [512, 512, 512]]",
+                        "[[256, 256, 256], [512, 512, 512]]",
                        help='List of grid resolution (will be evaled as json);'
                             'resamples to the next one every upsamp_every iters, then ' +
                             'stays at the last one; ' +
                             'should be a list where each item is a list of 3 ints or an int')
 group.add_argument('--upsamp_every', type=int, default=
-                     2 * 12800,
+                     #  2 * 12800,
+                     3 * 12800,
                     help='upsample the grid every x iters')
 group.add_argument('--init_iters', type=int, default=
                      0, #-12800,
@@ -152,6 +154,8 @@
 
 group.add_argument('--tune_mode', action='store_true', default=False,
                    help='hypertuning mode (do not save, for speed)')
+group.add_argument('--tune_nosave', action='store_true', default=False,
+                   help='do not save any checkpoint even at the end')
 
 
 
@@ -569,6 +573,6 @@ def train_step():
     if gstep_id_base >= args.n_iters:
         print('* Final eval and save')
         eval_step()
-        if not args.tune_mode:
+        if not args.tune_nosave:
             grid.save(ckpt_path)
         break
diff --git a/opt/render_imgs.py b/opt/render_imgs.py
@@ -141,13 +141,15 @@
         n_images_gen += 1
     avg_psnr /= n_images_gen
     avg_ssim /= n_images_gen
-    print('average PSNR', avg_psnr, 'SSIM', avg_ssim)
+    print('AVERAGES')
+    print('PSNR:', avg_psnr)
+    print('SSIM:', avg_ssim)
     with open(path.join(render_dir, 'psnr.txt'), 'w') as f:
         f.write(str(avg_psnr))
     with open(path.join(render_dir, 'ssim.txt'), 'w') as f:
         f.write(str(avg_ssim))
     if not args.no_lpips:
         avg_lpips /= n_images_gen
-        print('average LPIPS', avg_lpips)
+        print('LPIPS:', avg_lpips)
         with open(path.join(render_dir, 'lpips.txt'), 'w') as f:
             f.write(str(avg_lpips))
diff --git a/opt/tasks/eval.json b/opt/tasks/eval.json
@@ -0,0 +1,12 @@
+{
+    "eval": true,
+    "data_root": "/home/sxyu/data/nerf_synthetic",
+    "train_root": "/home/sxyu/proj/svox2/opt/ckpt_auto/256_to_512_u3_10e",
+    "variables": {
+        "scene": ["lego", "mic", "ship", "chair", "ficus", "materials", "drums", "hotdog"]
+    },
+    "tasks": [{
+        "train_dir": "train_{scene}",
+        "data_dir": "{scene}"
+    }]
+}