More comprehensive examples can be found in our Examples Repo.

import os
import engineml.torch as eml
import torch
import torch.optim as optim
# Partition your data set across replicas. This is generally a list
# of files.
data = get_data_files(data_dir='/engine/data')
data =
# Pins your PyTorch code to a single GPU while enabling fast
# GPU-to-GPU communication. Make sure you run `init_devices()`
# before any `.cuda()` calls
model = MyModel().cuda()
# Scale the learning rate by the number of model replicas
lr = 0.1
lr = eml.optimizer.scale_learning_rate(lr)
# Wrap your optimizer in the Allreduce Optimizer
opt = optim.Adam(model.parameters(), lr=lr)
opt = eml.optimizer.distribute(opt)
# Create a handler to automatically write a checkpoint when a run is preempted
def save_checkpoint(model, opt, checkpoint_path):
state = {
'model_state': model.state_dict(),
'optimizer_state': optimizer.state_dict(),
}, checkpoint_path)
# Set the preempted checkpoint handler
eml.preempted_handler(save_checkpoint, model, opt, os.path.join(, ''))
# Synchronize all replica weights
# Optionally, set the input directory for restoring from a previous
# run's checkpoints
restore_dir =
torch.load(os.path.join(restore_dir, ''))
# Set the output directory for saving event files (if using
# tensorboardX) and checkpoints ``
# returns `None` when running locally
save_dir = or '/path/when/training/locally'
state = {
'model': model.state_dict(),
'optimizer': opt.state_dict(),
# Use `` instead of `` to ensure checkpoints
# are saved correctly when using Engine ML, os.path.join(save_dir, ''))