Skip to content

Commit d5af436

Browse files
committed
"distributed" model loaders; initial support of exaone-4.0
1 parent 3285c50 commit d5af436

File tree

9 files changed

+522
-225
lines changed

9 files changed

+522
-225
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ set(core_files src/backend.cpp
5656
src/vision_process.cpp
5757
src/audio_process.cpp
5858
models/ernie.cpp
59+
models/exaone.cpp
5960
models/hunyuan.cpp
6061
models/llama.cpp
61-
models/qwen.cpp
6262
models/pangu.cpp
63+
models/qwen.cpp
6364
models/smol.cpp
6465
)
6566

convert.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ class ModelType(Enum):
203203

204204
SmolLM3 = 0x2700
205205

206+
Exaone4 = 0x2800
207+
206208
BCE_Embedding = 0x10000100
207209
BCE_ReRanker = 0x10000101
208210
BGE_M3 = 0x10000102
@@ -2985,6 +2987,68 @@ def get_weight_names(config):
29852987

29862988
return weight_names
29872989

2990+
class Exaone4Converter(BaseConverter):
2991+
MODEL_TYPE = ModelType.Exaone4
2992+
2993+
@staticmethod
2994+
def dump_config(f, config, ggml_type):
2995+
MAX_LAYERS = 128
2996+
assert config.num_hidden_layers < MAX_LAYERS
2997+
assert config.rope_scaling['rope_type'] == 'llama3'
2998+
assert not config.attention_bias, "attention_bias must be False"
2999+
assert config.head_dim == config.hidden_size // config.num_attention_heads
3000+
3001+
dump_llama_like_config(f, config, ggml_type)
3002+
3003+
config_values = [
3004+
config.num_key_value_heads,
3005+
config.sliding_window if config.sliding_window is not None else -1,
3006+
1 if config.tie_word_embeddings else 0,
3007+
]
3008+
f.write(struct.pack("<" + "i" * len(config_values), *config_values))
3009+
3010+
config_values = [
3011+
config.rope_theta,
3012+
config.rope_scaling['original_max_position_embeddings'],
3013+
config.rope_scaling['factor'],
3014+
config.rope_scaling['low_freq_factor'],
3015+
config.rope_scaling['high_freq_factor'],
3016+
]
3017+
f.write(struct.pack("<fifff", *config_values))
3018+
3019+
def check_is_sliding(config, layer_idx):
3020+
if config.sliding_window is None:
3021+
return False
3022+
if config.layer_types is not None:
3023+
return config.layer_types[layer_idx] == "sliding_attention"
3024+
if isinstance(config.sliding_window_pattern, int):
3025+
return ((layer_idx + 1) % config.sliding_window_pattern) != 0
3026+
elif isinstance(config.sliding_window_pattern, str):
3027+
assert isinstance(config.sliding_window, int), (
3028+
f"Sliding window must be positive integer, but got {config.sliding_window}"
3029+
)
3030+
return (
3031+
layer_idx != config.num_hidden_layers - 1
3032+
and config.sliding_window_pattern[layer_idx % len(config.sliding_window_pattern)] == "L"
3033+
)
3034+
else:
3035+
pass
3036+
return False
3037+
3038+
config_values = [0] * MAX_LAYERS
3039+
for i in range(config.num_hidden_layers):
3040+
if check_is_sliding(config, i):
3041+
config_values[i] = 1
3042+
f.write(struct.pack("<" + "i" * len(config_values), *config_values))
3043+
3044+
@staticmethod
3045+
def get_weight_names(config):
3046+
weight_names = OLMo2Converter.get_weight_names(config)
3047+
if config.tie_word_embeddings:
3048+
weight_names = weight_names[:-1]
3049+
3050+
return weight_names
3051+
29883052
class InstellaConverter(BaseConverter):
29893053
MODEL_TYPE = ModelType.Instella
29903054

@@ -7663,6 +7727,8 @@ def main():
76637727
GraniteMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
76647728
elif arch == 'ExaoneForCausalLM':
76657729
ExaoneConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7730+
elif arch == 'Exaone4ForCausalLM':
7731+
Exaone4Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
76667732
elif arch == 'TeleChat2ForCausalLM':
76677733
TeleChat2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
76687734
elif arch == 'HunYuanForCausalLM':

0 commit comments

Comments
 (0)