Skip to content

Commit

Permalink
calculating memory metrics for cpu usage (fastai#2411)
Browse files Browse the repository at this point in the history
* calculating memory metrics for cpu usage

* small fixes
  • Loading branch information
lscarpato authored and sgugger committed Nov 20, 2019
1 parent 02d7776 commit 08abcde
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
42 changes: 42 additions & 0 deletions fastai/callbacks/cpu_mem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
" Memory profiling callbacks "

import tracemalloc, threading, torch, time
from ..utils.mem import *
from ..basic_train import *
from ..torch_core import *

class CpuPeakMemMetric(LearnerCallback):
"Callback that measures used and peaked general and CPU memory."

_order = -20 # Needs to run before the recorder

def peak_monitor_start(self):
self.peak_monitoring = True

# start RAM tracing
tracemalloc.start()

# this thread samples RAM usage as long as the current epoch of the fit loop is running
peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
peak_monitor_thread.daemon = True
peak_monitor_thread.start()

def peak_monitor_stop(self):
tracemalloc.stop()
self.peak_monitoring = False

def peak_monitor_func(self):
self.cpu_mem_used_peak = -1
while True:
if not self.peak_monitoring: break
time.sleep(0.001) # 1msec

def on_train_begin(self, **kwargs): self.learn.recorder.add_metric_names(['cpu used', 'cpu_peak'])

def on_epoch_begin(self, **kwargs): self.peak_monitor_start()

def on_epoch_end(self, last_metrics, **kwargs):
cpu_used, cpu_peak = list(map(lambda x: float(x / 2 ** 20), tracemalloc.get_traced_memory()))
self.peak_monitor_stop()
# The numbers are deltas in MBs (beginning of the epoch and the end)
return add_metrics(last_metrics, [cpu_used, cpu_peak])
20 changes: 20 additions & 0 deletions tests/test_callbacks_cpu_mem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from fastai.callbacks.cpu_mem import *
from fastai.gen_doc.doctest import this_tests
from utils.fakes import *
from utils.text import CaptureStdout

@pytest.mark.skip("occassional random failures")
@pytest.mark.cuda
def test_peak_mem_metric():
learn = fake_learner()
learn.callbacks.append(CpuPeakMemMetric(learn))
this_tests(CpuPeakMemMetric)
with CaptureStdout() as cs:
learn.fit_one_cycle(3, max_lr=1e-2)
for s in ['cpu used', 'cpu_peak']:
assert s in cs.out, f"expecting '{s}' in \n{cs.out}"
# XXX: needs a better test to assert some numbers here (at least >0)
# epochs 2-3 it shouldn't allocate more general or CPU RAM
for s in ['0 0']:
assert s in cs.out, f"expecting '{s}' in \n{cs.out}"

0 comments on commit 08abcde

Please sign in to comment.