..
|
1cycle_lr.png
|
DeepSpeed-vs-Megatron.png
|
bert-large-training-time.png
|
deepspeed-speedup.png
|
end-to-end-bert-training.PNG
|
layernorm_animation.gif
|
layernorm_deepspeed.gif
|
layernorm_ds.png
|
layernorm_pytorch.gif
|
layernorm_torch.png
|
loss_and_lr.png
|
lr_schedule.png
|
megatron-gpt2-perf-test.png
|
model_convergence.png
|
qkv_fusion.png
|
sa_backward_pass.png
|
sa_bert_base_time_result.png
|
sa_bert_large_time_result.png
|
sa_fixed_sparsity_structure.png
|
sa_forward_pass.png
|
sa_gpt2_time_result.png
|
sa_long_document_comprehension_result.png
|
sa_maximum_sequence_runnable_on_bert.png
|
sa_variable_sparsity_structure.png
|
softmax_animation.gif
|
softmax_deepspeed.gif
|
softmax_ds.png
|
softmax_pytorch.gif
|
softmax_torch.png
|
transformer_kernel_perf.png
|
transformer_kernel_perf_seq128.PNG
|
transformer_kernel_perf_seq512.PNG
|
transformer_preln_arch.png
|
variable_sparsity_pattern.png
|
webinar-aug2020.png
|
zero-full.png
|