__init__.py
|
up
|
2023-09-26 14:30:47 +08:00 |
baichuan_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
distributed.py
|
up
|
2023-09-26 14:30:47 +08:00 |
fused_bias_gelu.py
|
change megatron to ascendspeed
|
2023-06-10 21:26:01 +08:00 |
fused_layer_norm.py
|
create megatron core
|
2023-07-24 15:00:57 +08:00 |
fused_softmax.py
|
up
|
2023-09-26 14:30:47 +08:00 |
glu_activations.py
|
change megatron to ascendspeed
|
2023-06-10 21:26:01 +08:00 |
gpt_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
internlm_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
language_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
llama2_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
llama_model.py
|
up
|
2023-09-26 14:30:47 +08:00 |
lora_modules.py
|
up
|
2023-09-26 14:30:47 +08:00 |
lora_utils.py
|
up
|
2023-09-26 14:30:47 +08:00 |
manual_pipe.py
|
up
|
2023-09-26 14:30:47 +08:00 |
module.py
|
up
|
2023-09-26 14:30:47 +08:00 |
positional_embeddings.py
|
create megatron core
|
2023-07-24 15:00:57 +08:00 |
transformer.py
|
up
|
2023-09-26 14:30:47 +08:00 |
triangle_attention.py
|
up
|
2023-09-26 14:30:47 +08:00 |
utils.py
|
change megatron to ascendspeed
|
2023-06-10 21:26:01 +08:00 |