@@ -203,6 +203,8 @@ class ModelType(Enum):
203
203
204
204
SmolLM3 = 0x2700
205
205
206
+ Exaone4 = 0x2800
207
+
206
208
BCE_Embedding = 0x10000100
207
209
BCE_ReRanker = 0x10000101
208
210
BGE_M3 = 0x10000102
@@ -2985,6 +2987,68 @@ def get_weight_names(config):
2985
2987
2986
2988
return weight_names
2987
2989
2990
+ class Exaone4Converter (BaseConverter ):
2991
+ MODEL_TYPE = ModelType .Exaone4
2992
+
2993
+ @staticmethod
2994
+ def dump_config (f , config , ggml_type ):
2995
+ MAX_LAYERS = 128
2996
+ assert config .num_hidden_layers < MAX_LAYERS
2997
+ assert config .rope_scaling ['rope_type' ] == 'llama3'
2998
+ assert not config .attention_bias , "attention_bias must be False"
2999
+ assert config .head_dim == config .hidden_size // config .num_attention_heads
3000
+
3001
+ dump_llama_like_config (f , config , ggml_type )
3002
+
3003
+ config_values = [
3004
+ config .num_key_value_heads ,
3005
+ config .sliding_window if config .sliding_window is not None else - 1 ,
3006
+ 1 if config .tie_word_embeddings else 0 ,
3007
+ ]
3008
+ f .write (struct .pack ("<" + "i" * len (config_values ), * config_values ))
3009
+
3010
+ config_values = [
3011
+ config .rope_theta ,
3012
+ config .rope_scaling ['original_max_position_embeddings' ],
3013
+ config .rope_scaling ['factor' ],
3014
+ config .rope_scaling ['low_freq_factor' ],
3015
+ config .rope_scaling ['high_freq_factor' ],
3016
+ ]
3017
+ f .write (struct .pack ("<fifff" , * config_values ))
3018
+
3019
+ def check_is_sliding (config , layer_idx ):
3020
+ if config .sliding_window is None :
3021
+ return False
3022
+ if config .layer_types is not None :
3023
+ return config .layer_types [layer_idx ] == "sliding_attention"
3024
+ if isinstance (config .sliding_window_pattern , int ):
3025
+ return ((layer_idx + 1 ) % config .sliding_window_pattern ) != 0
3026
+ elif isinstance (config .sliding_window_pattern , str ):
3027
+ assert isinstance (config .sliding_window , int ), (
3028
+ f"Sliding window must be positive integer, but got { config .sliding_window } "
3029
+ )
3030
+ return (
3031
+ layer_idx != config .num_hidden_layers - 1
3032
+ and config .sliding_window_pattern [layer_idx % len (config .sliding_window_pattern )] == "L"
3033
+ )
3034
+ else :
3035
+ pass
3036
+ return False
3037
+
3038
+ config_values = [0 ] * MAX_LAYERS
3039
+ for i in range (config .num_hidden_layers ):
3040
+ if check_is_sliding (config , i ):
3041
+ config_values [i ] = 1
3042
+ f .write (struct .pack ("<" + "i" * len (config_values ), * config_values ))
3043
+
3044
+ @staticmethod
3045
+ def get_weight_names (config ):
3046
+ weight_names = OLMo2Converter .get_weight_names (config )
3047
+ if config .tie_word_embeddings :
3048
+ weight_names = weight_names [:- 1 ]
3049
+
3050
+ return weight_names
3051
+
2988
3052
class InstellaConverter (BaseConverter ):
2989
3053
MODEL_TYPE = ModelType .Instella
2990
3054
@@ -7663,6 +7727,8 @@ def main():
7663
7727
GraniteMoEConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7664
7728
elif arch == 'ExaoneForCausalLM' :
7665
7729
ExaoneConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7730
+ elif arch == 'Exaone4ForCausalLM' :
7731
+ Exaone4Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
7666
7732
elif arch == 'TeleChat2ForCausalLM' :
7667
7733
TeleChat2Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
7668
7734
elif arch == 'HunYuanForCausalLM' :
0 commit comments