From 4614ce3c3d928134d8bbcf7e270be92abc3ca80f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 9 Apr 2026 23:45:44 +0800 Subject: [PATCH 1/7] Fix the multi-LoRA issue in Twinkle. --- src/mcore_bridge/bridge/gpt_bridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 7039a91..4871c39 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -267,7 +267,7 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): new_state_dict = {} for k, v in hf_state_dict.items(): if self._peft_format: - if '.lora_A.' in k or '.lora_B.' in k or '.modules_to_save.' in k: + if ('.lora_A.' in k or '.lora_B.' in k or '.modules_to_save.' in k) and f'{self._adapter_name}.' in k: k = k.replace(f'{self._adapter_name}.', '') new_state_dict[k] = v else: @@ -1703,7 +1703,7 @@ def export_weights( self.config = mg_models[0].config with torch.no_grad(): for k, v in self._convert(mg_models, {}, hf_prefix, False, tqdm_desc=tqdm_desc): - if converter: + if converter and v is not None: kv = converter(k, v) if kv is None: continue From 609ebd492416df208434b54fefc01428f4ea4d3f Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Thu, 9 Apr 2026 23:48:16 +0800 Subject: [PATCH 2/7] lint pass --- src/mcore_bridge/bridge/gpt_bridge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 4871c39..2a4ad4b 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -267,7 +267,8 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): new_state_dict = {} for k, v in hf_state_dict.items(): if self._peft_format: - if ('.lora_A.' in k or '.lora_B.' in k or '.modules_to_save.' in k) and f'{self._adapter_name}.' in k: + if ('.lora_A.' in k or '.lora_B.' in k + or '.modules_to_save.' in k) and f'{self._adapter_name}.' in k: k = k.replace(f'{self._adapter_name}.', '') new_state_dict[k] = v else: From b32bd8edcd839e147e436af6c23c799bc60f8e04 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Apr 2026 10:37:01 +0800 Subject: [PATCH 3/7] fix --- src/mcore_bridge/bridge/gpt_bridge.py | 38 ++++++++++++++------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 2a4ad4b..9de5c35 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -1325,24 +1325,26 @@ def _set_linear_attn_state(self, mg_attn, hf_state_dict, hf_prefix: str, layer_i hf_state_dict['in_proj_b.weight_scale_inv'] = scale_inv[qkv_block + z_block:-a_block].clone() hf_state_dict['in_proj_a.weight_scale_inv'] = scale_inv[-a_block:].clone() del in_proj_weight - if to_mcore: - conv1d = hf_state_dict['conv1d.weight'].load() - q_c, k_c, v_c = torch.split(conv1d, [key_dim, key_dim, value_dim], dim=0) - conv1d = torch.cat([ - *(x.reshape(num_key_heads, -1, *conv1d.shape[-2:]) for x in [q_c, k_c, v_c]), - ], dim=1).reshape((-1, *conv1d.shape[-2:])) - self._set_weight(mg_attn.conv1d.weight, conv1d, 'conv1d.weight') - else: - conv1d, _ = self._get_weight(None if mg_attn is None else mg_attn.conv1d.weight, 'conv1d.weight') - if conv1d is not None: - conv1d = conv1d.reshape(num_key_heads, -1, *conv1d.shape[-2:]) - q_c, k_c, v_c = torch.split( - conv1d, [key_dim // num_key_heads, key_dim // num_key_heads, value_dim // num_key_heads], dim=1) - q_c = q_c.reshape(-1, *q_c.shape[-2:]) - k_c = k_c.reshape(-1, *k_c.shape[-2:]) - v_c = v_c.reshape(-1, *v_c.shape[-2:]) - conv1d = torch.concat([q_c, k_c, v_c], dim=0) - hf_state_dict['conv1d.weight'] = conv1d + if not self._peft_format: + if to_mcore: + conv1d = hf_state_dict['conv1d.weight'].load() + q_c, k_c, v_c = torch.split(conv1d, [key_dim, key_dim, value_dim], dim=0) + conv1d = torch.cat([ + *(x.reshape(num_key_heads, -1, *conv1d.shape[-2:]) for x in [q_c, k_c, v_c]), + ], + dim=1).reshape((-1, *conv1d.shape[-2:])) + self._set_weight(mg_attn.conv1d.weight, conv1d, 'conv1d.weight') + else: + conv1d, _ = self._get_weight(None if mg_attn is None else mg_attn.conv1d.weight, 'conv1d.weight') + if conv1d is not None: + conv1d = conv1d.reshape(num_key_heads, -1, *conv1d.shape[-2:]) + q_c, k_c, v_c = torch.split( + conv1d, [key_dim // num_key_heads, key_dim // num_key_heads, value_dim // num_key_heads], dim=1) + q_c = q_c.reshape(-1, *q_c.shape[-2:]) + k_c = k_c.reshape(-1, *k_c.shape[-2:]) + v_c = v_c.reshape(-1, *v_c.shape[-2:]) + conv1d = torch.concat([q_c, k_c, v_c], dim=0) + hf_state_dict['conv1d.weight'] = conv1d self._set_state_dict(mg_attn, 'dt_bias', hf_state_dict, 'dt_bias', to_mcore) self._set_state_dict(mg_attn, 'A_log', hf_state_dict, 'A_log', to_mcore) self._set_state_dict(mg_attn, 'out_norm.weight', hf_state_dict, 'norm.weight', to_mcore) From 3cc57d2affd758327759c28bf75c4e95be14e6a6 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Apr 2026 10:50:19 +0800 Subject: [PATCH 4/7] update --- src/mcore_bridge/bridge/gpt_bridge.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 9de5c35..e6905b7 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -268,14 +268,14 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): for k, v in hf_state_dict.items(): if self._peft_format: if ('.lora_A.' in k or '.lora_B.' in k - or '.modules_to_save.' in k) and f'{self._adapter_name}.' in k: - k = k.replace(f'{self._adapter_name}.', '') + or '.modules_to_save.' in k) and f'.{self._adapter_name}.' in k: + k = k.replace(f'.{self._adapter_name}.', '.') new_state_dict[k] = v else: - if '.lora_A.' in k or '.lora_B.' in k or 'original_module.' in k: + if '.lora_A.' in k or '.lora_B.' in k or '.original_module.' in k: continue - k = k.replace('base_layer.', '') - k = k.replace(f'modules_to_save.{self._adapter_name}.', '') + k = k.replace('.base_layer.', '.') + k = k.replace(f'.modules_to_save.{self._adapter_name}.', '.') new_state_dict[k] = v hf_state_dict = new_state_dict if self.pp_size > 1: From a48d08b805745eda67e241a7ef46b9b7405a7892 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Apr 2026 11:00:06 +0800 Subject: [PATCH 5/7] update --- src/mcore_bridge/bridge/gpt_bridge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index e6905b7..dfe0726 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -274,6 +274,8 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): else: if '.lora_A.' in k or '.lora_B.' in k or '.original_module.' in k: continue + if '.modules_to_save.' in k and f'.{self._adapter_name}.' not in k: + continue k = k.replace('.base_layer.', '.') k = k.replace(f'.modules_to_save.{self._adapter_name}.', '.') new_state_dict[k] = v From 83e09172b22e0700d7b23e577fd2fe7b7e4d68d1 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Apr 2026 11:24:11 +0800 Subject: [PATCH 6/7] update --- src/mcore_bridge/bridge/gpt_bridge.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index dfe0726..bea2c1a 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -267,17 +267,19 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): new_state_dict = {} for k, v in hf_state_dict.items(): if self._peft_format: - if ('.lora_A.' in k or '.lora_B.' in k - or '.modules_to_save.' in k) and f'.{self._adapter_name}.' in k: + # Without adding a leading '.' here (e.g., '.lora_A.'), + # we avoid the case where mg_module itself is a linear layer (such as proj1). + if ('lora_A.' in k or 'lora_B.' in k + or 'modules_to_save.' in k) and f'.{self._adapter_name}.' in k: k = k.replace(f'.{self._adapter_name}.', '.') new_state_dict[k] = v else: - if '.lora_A.' in k or '.lora_B.' in k or '.original_module.' in k: + if 'lora_A.' in k or 'lora_B.' in k or 'original_module.' in k: continue - if '.modules_to_save.' in k and f'.{self._adapter_name}.' not in k: + if 'modules_to_save.' in k and f'.{self._adapter_name}.' not in k: continue - k = k.replace('.base_layer.', '.') - k = k.replace(f'.modules_to_save.{self._adapter_name}.', '.') + k = k.replace('base_layer.', '') + k = k.replace(f'modules_to_save.{self._adapter_name}.', '') new_state_dict[k] = v hf_state_dict = new_state_dict if self.pp_size > 1: From d3e4e01d7e7a549afd845c11d092e28840f47ea9 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Fri, 10 Apr 2026 11:38:55 +0800 Subject: [PATCH 7/7] update --- src/mcore_bridge/bridge/gpt_bridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index bea2c1a..e48fb08 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -276,7 +276,7 @@ def _set_module(self, mg_module, hf_state_dict, hf_prefix: str, to_mcore: bool): else: if 'lora_A.' in k or 'lora_B.' in k or 'original_module.' in k: continue - if 'modules_to_save.' in k and f'.{self._adapter_name}.' not in k: + if 'modules_to_save.' in k and f'modules_to_save.{self._adapter_name}.' not in k: continue k = k.replace('base_layer.', '') k = k.replace(f'modules_to_save.{self._adapter_name}.', '')