..
|
1cycle_lr.png
|
DeepSpeed-vs-Megatron.png
|
bert-large-training-time.png
|
deepspeed-speedup.png
|
end-to-end-bert-training.PNG
|
layernorm_animation.gif
|
layernorm_deepspeed.gif
|
layernorm_ds.png
|
layernorm_pytorch.gif
|
layernorm_torch.png
|
loss_and_lr.png
|
lr_schedule.png
|
megatron-gpt2-perf-test.png
|
model_convergence.png
|
qkv_fusion.png
|
softmax_animation.gif
|
softmax_deepspeed.gif
|
softmax_ds.png
|
softmax_pytorch.gif
|
softmax_torch.png
|
transformer_kernel_perf.png
|
transformer_kernel_perf_seq128.PNG
|
transformer_kernel_perf_seq512.PNG
|
transformer_preln_arch.png
|
zero-full.png
|