@@ -15925,19 +15925,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15925
15925
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
15926
15926
else new_type = GGML_TYPE_Q4_K;
15927
15927
}
15928
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15928
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
15929
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ||
15930
+ ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
15929
15931
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
15930
15932
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
15931
15933
else new_type = GGML_TYPE_Q5_K;
15932
15934
}
15933
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
15934
- ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ||
15935
- ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
15935
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
15936
15936
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
15937
15937
else new_type = GGML_TYPE_Q5_K;
15938
15938
}
15939
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15940
- if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
15939
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15940
+ if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
15941
+ else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
15941
15942
else new_type = GGML_TYPE_Q6_K;
15942
15943
}
15943
15944
else if (new_type != GGML_TYPE_Q8_0) {
@@ -15970,10 +15971,13 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15970
15971
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15971
15972
new_type = GGML_TYPE_IQ2_S;
15972
15973
}
15973
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ) {
15974
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
15974
15975
if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_S;
15975
15976
else new_type = GGML_TYPE_IQ3_XXS;
15976
15977
}
15978
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15979
+ new_type = GGML_TYPE_IQ3_XXS;
15980
+ }
15977
15981
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
15978
15982
if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ3_XXS;
15979
15983
else new_type = GGML_TYPE_IQ3_S;
@@ -16018,10 +16022,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16018
16022
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
16019
16023
difquant_half_tensors(qs.i_attention_wv, qs.n_attention_wv)) new_type = GGML_TYPE_Q6_K;
16020
16024
else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && qs.i_attention_wv < 4) new_type = GGML_TYPE_Q5_K;
16021
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16022
- new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
16023
- }
16024
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16025
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16026
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16025
16027
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
16026
16028
}
16027
16029
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
@@ -16035,7 +16037,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16035
16037
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ||
16036
16038
ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL) {
16037
16039
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
16038
- else new_type = GGML_TYPE_IQ4_XS ;
16040
+ else new_type = GGML_TYPE_Q4_K ;
16039
16041
}
16040
16042
else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) &&
16041
16043
(qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -16072,11 +16074,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16072
16074
ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
16073
16075
new_type = GGML_TYPE_Q5_K;
16074
16076
}
16075
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16076
- if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_XS;
16077
- else new_type = GGML_TYPE_IQ2_XXS;
16078
- }
16079
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) {
16077
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
16080
16078
if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ2_S;
16081
16079
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ2_XXS;
16082
16080
else new_type = GGML_TYPE_IQ1_M;
@@ -16204,21 +16202,24 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16204
16202
// same quantization as before imatrix stuff, and b) Q4_1/Q5_1 do go crazy on ffn_down without an imatrix.
16205
16203
new_type = ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q5_1;
16206
16204
}
16207
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (i_layer < n_layer/8 )) new_type = GGML_TYPE_IQ2_XXS;
16208
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ) {
16209
- if (difquant_half_tensors (i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
16205
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors( i_layer, n_layer) )) new_type = GGML_TYPE_IQ2_XXS;
16206
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
16207
+ if (difquant_three_eights_tensors (i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
16210
16208
}
16211
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
16212
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_half_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16209
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
16210
+ if (difquant_three_eights_tensors(i_layer, n_layer)) new_type = GGML_TYPE_IQ2_XXS;
16211
+ }
16212
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16213
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16213
16214
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16214
- if (i_layer < n_layer/8 ) new_type = GGML_TYPE_IQ2_S;
16215
+ if (difquant_three_eights_tensors( i_layer, n_layer) ) new_type = GGML_TYPE_IQ2_S;
16215
16216
}
16216
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16217
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16217
16218
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
16218
16219
new_type = difquant_init_end_tensors(i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
16219
16220
}
16220
16221
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16221
- new_type = difquant_half_tensors (i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
16222
+ new_type = difquant_six_eights_tensors (i_layer, n_layer) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
16222
16223
}
16223
16224
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
16224
16225
new_type = difquant_init_tensors(i_layer, n_layer) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
@@ -16254,24 +16255,37 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16254
16255
} else if (name.find("attn_output.weight") != std::string::npos) {
16255
16256
if (arch != LLM_ARCH_FALCON) {
16256
16257
if (qs.model.hparams.n_expert >= 4) {
16257
- if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
16258
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16259
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16260
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S ||
16261
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16262
+ new_type = GGML_TYPE_Q4_K;
16263
+ }
16264
+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
16258
16265
ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
16259
16266
ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXXL ||
16260
16267
ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_NL ||
16261
16268
ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ||
16262
16269
ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR ||
16263
- ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) new_type = GGML_TYPE_Q5_K;
16264
- }
16265
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16266
- ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
16267
- ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) {
16268
- if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q4_K;
16269
- else {
16270
- if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16271
- ftype == LLAMA_FTYPE_MOSTLY_IQ1_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL) new_type = GGML_TYPE_IQ2_XXS;
16272
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16273
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
16270
+ ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ4_XS) {
16271
+ new_type = GGML_TYPE_Q5_K;
16272
+ }
16273
+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q5_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) new_type = GGML_TYPE_Q5_K;
16274
+ else new_type = GGML_TYPE_Q8_0;
16275
+ }
16276
+ else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) {
16277
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16278
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_XS;
16279
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS ||
16280
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16281
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
16274
16282
}
16283
+ else if (qs.model.hparams.n_gqa() >= 2) {
16284
+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
16285
+ ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_XXS;
16286
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) new_type = GGML_TYPE_IQ2_XS;
16287
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) new_type = GGML_TYPE_IQ2_S;
16288
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
16275
16289
} else {
16276
16290
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) new_type = GGML_TYPE_Q3_K;
16277
16291
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
@@ -16323,10 +16337,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16323
16337
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ1_M;
16324
16338
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16325
16339
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16326
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16340
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16327
16341
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16328
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_init_end_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS ;
16329
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16342
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_three_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S ;
16343
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16330
16344
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
16331
16345
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
16332
16346
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && (difquant_init_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_S;
@@ -16343,10 +16357,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16343
16357
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ1_M;
16344
16358
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16345
16359
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16346
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16360
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XXS;
16347
16361
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS && (difquant_three_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS;
16348
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_init_end_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_XS ;
16349
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_half_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16362
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS && (difquant_three_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S ;
16363
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S && (difquant_six_eights_tensors (i_layer, n_layer))) new_type = GGML_TYPE_IQ2_S;
16350
16364
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M && (difquant_init_end_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
16351
16365
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL && (difquant_six_eights_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_XXS;
16352
16366
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && (difquant_init_tensors(i_layer, n_layer))) new_type = GGML_TYPE_IQ3_S;
0 commit comments