foldl
diff --git a/‎CMakeLists.txt
Lines changed: 2 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎convert.py
Lines changed: 92 additions & 1 deletion b/‎convert.py
Lines changed: 92 additions & 1 deletion
diff --git a/‎docs/models.md
Lines changed: 1 addition & 0 deletions b/‎docs/models.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎models/grok.cpp
Lines changed: 1 addition & 1 deletion b/‎models/grok.cpp
Lines changed: 1 addition & 1 deletion
@@ -55,7 +55,8 @@ set(core_files src/backend.cpp
     src/unicode-data.cpp
     src/vision_process.cpp
     src/audio_process.cpp
-    models/qwen.cpp)
+    models/qwen.cpp
+    models/hunyuan.cpp)
 
 add_library(libchatllm SHARED EXCLUDE_FROM_ALL src/main.cpp ${core_files})
 target_link_libraries(libchatllm PRIVATE ggml)
 
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
 
 **What's New:**
 
+* 2025-06-30: Hunyuan-A13B
 * 2025-06-21: [I can hear](./docs/multimodal.md): Qwen2-Audio
 * 2025-06-10: SmolVLM2
 * 2025-06-07: MiniCPM4
 
@@ -16,7 +16,7 @@
 from pathlib import Path
 from typing import IO, Any, Iterable, List, Optional, Tuple
 import numpy as np
-import math
+import math, gc
 
 import torch
 from torch import nn
@@ -184,6 +184,7 @@ class ModelType(Enum):
     TeleChat2       = 0x1e00
 
     HunYuanDense    = 0x1f00
+    HunYuanMoEV1    = 0x1f01
 
     MoonLight       = 0x2000
 
@@ -605,6 +606,8 @@ def dump_state_dict(f, weight_names, model_files, ggml_type, config, state_dict_
             dump_tensor(f, name, tensor, tensor_ggml_type)
             tensor_info.append((name, tensor.shape, tensor_ggml_type.name))
 
+        gc.collect()
+
     print(tabulate(tensor_info, headers=["name", "shape", "dtype"]))
 
     if len(tensor_info) != len(weight_names):
@@ -6521,6 +6524,92 @@ def get_weight_names(config):
 
         return weight_names
 
+class HunYuanMoEV1Converter(BaseConverter):
+    MODEL_TYPE = ModelType.HunYuanMoEV1
+
+    @classmethod
+    def state_dict_pp(cls, config, state_dict):
+        new_dict = {}
+
+        for name in state_dict:
+            tensor: torch.Tensor = state_dict[name]
+            new_name = name
+            new_name = new_name.replace('.mlp.gate.wg.', '.mlp.gate.')
+            new_name = new_name.replace('.shared_mlp.', '.shared_expert.')
+
+            new_dict[new_name] = tensor
+
+        return new_dict
+
+    @staticmethod
+    def dump_config(f, config, ggml_type):
+        assert config.tie_word_embeddings, "tie_word_embeddings must be True"
+        assert config.attention_bias == False, "attention_bias must be False"
+        assert config.mlp_bias == False, "mlp_bias must be False"
+        assert not config.use_cla, "use_cla must be False"
+        assert not config.use_mla, "use_mla must be False"
+        assert config.rope_scaling['type'] == 'dynamic', "rope_scaling['type'] must be 'dynamic'"
+        assert config.use_qk_norm, "use_qk_norm must be True"
+        assert config.rope_scaling['alpha'] > 0, "rope_scaling['alpha'] must be > 0"
+        assert config.moe_layer_num_skipped == 0
+        assert config.use_mixed_mlp_moe
+        assert len(set(config.moe_intermediate_size)) == 1
+        assert len(set(config.moe_topk)) == 1
+        assert len(set(config.num_shared_expert)) == 1
+        assert config.attention_head_dim == config.hidden_size / config.num_attention_heads
+
+        head_dim = config.attention_head_dim
+        config.rope_theta = config.rope_theta * config.rope_scaling['alpha'] ** (head_dim / (head_dim - 2))
+
+        dump_llama_like_config(f, config, ggml_type)
+
+        config_values = [
+            config.num_key_value_heads,
+            config.num_experts,
+
+            list(set(config.moe_intermediate_size))[0],
+            list(set(config.moe_topk))[0],
+            list(set(config.num_shared_expert))[0],
+        ]
+        f.write(struct.pack("<" + "i" * len(config_values), *config_values))
+
+        config_values = [
+            config.rope_theta,
+        ]
+        f.write(struct.pack("<" + "f" * len(config_values), *config_values))
+
+    @staticmethod
+    def get_weight_names(config):
+        weight_names = ["model.embed_tokens.weight"]
+        for i in range(config.num_hidden_layers):
+            for j in range(config.num_experts):
+                    weight_names += [
+                        f"model.layers.{i}.mlp.experts.{j}.down_proj.weight",
+                        f"model.layers.{i}.mlp.experts.{j}.gate_proj.weight",
+                        f"model.layers.{i}.mlp.experts.{j}.up_proj.weight",
+                    ]
+
+            weight_names += [
+                f"model.layers.{i}.mlp.gate.weight",
+                f"model.layers.{i}.mlp.shared_expert.down_proj.weight",
+                f"model.layers.{i}.mlp.shared_expert.gate_proj.weight",
+                f"model.layers.{i}.mlp.shared_expert.up_proj.weight",
+                f"model.layers.{i}.input_layernorm.weight",
+                f"model.layers.{i}.post_attention_layernorm.weight",
+                f"model.layers.{i}.self_attn.k_proj.weight",
+                f"model.layers.{i}.self_attn.o_proj.weight",
+                f"model.layers.{i}.self_attn.q_proj.weight",
+                f"model.layers.{i}.self_attn.v_proj.weight",
+                f"model.layers.{i}.self_attn.key_layernorm.weight",
+                f"model.layers.{i}.self_attn.query_layernorm.weight",
+            ]
+
+        weight_names += [
+            "model.norm.weight",
+        ]
+
+        return weight_names
+
 class SolarConverter(BaseConverter):
     MODEL_TYPE = ModelType.SolarPro
 
@@ -7380,6 +7469,8 @@ def main():
             (isinstance(config.num_experts, list) and max(config.num_experts) > 1)):
             raise Exception('HunYuanForCausalLM: only dense model is supported')
         HunYuanDenseConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
+    elif arch == 'HunYuanMoEV1ForCausalLM':
+        HunYuanMoEV1Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'InstellaForCausalLM':
         InstellaConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'DeciLMForCausalLM':
 
@@ -78,6 +78,7 @@
 
 * HunYuan (`HunYuanForCausalLM`)
     * [x] Dense: [Instruct-7B](https://huggingface.co/tencent/Hunyuan-7B-Instruct)
+    * [x] MoE: [A13B-Instruct](https://huggingface.co/tencent/Hunyuan-A13B-Instruct/tree/202c9758065873e0ac7c80211e6275593f165442)
 
 * Instella (`InstellaForCausalLM`)
     * [x] [Instruct-3B](https://huggingface.co/amd/Instella-3B-Instruct)
 
@@ -25,7 +25,7 @@ const int NUM_EXPERTS                   =  8;
 const int EXPERTS_PER_TOK               =  2;
 
 // make it easy to test with different number of experts.
-#define EFFECTIVE_EXPERTS_PER_TOK       EXPERTS_PER_TOK
+const int EFFECTIVE_EXPERTS_PER_TOK     =  EXPERTS_PER_TOK;
 
 class GrokBaseAttention : public BaseAttention
 {
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ const int NUM_EXPERTS = 8;`
`25`	`25`	`const int EXPERTS_PER_TOK = 2;`
`26`	`26`
`27`	`27`	`// make it easy to test with different number of experts.`
`28`		`-#define EFFECTIVE_EXPERTS_PER_TOK EXPERTS_PER_TOK`
	`28`	`+const int EFFECTIVE_EXPERTS_PER_TOK = EXPERTS_PER_TOK;`
`29`	`29`
`30`	`30`	`class GrokBaseAttention : public BaseAttention`
`31`	`31`	`{`