HIEHEU commited on
Commit
41e35d1
·
verified ·
1 Parent(s): 226e13c

Update py/abstractive.py

Browse files
Files changed (1) hide show
  1. py/abstractive.py +11 -11
py/abstractive.py CHANGED
@@ -40,7 +40,7 @@ def clean_text_inference(text: str) -> str:
40
  return text
41
 
42
  # -------------------------------------------------------------------
43
- # PHOBERT ENCODER (PyTorch) - ĐÃ SỬA CHỮA
44
  # -------------------------------------------------------------------
45
  class PhoBERTEncoderTorch:
46
  def __init__(self):
@@ -52,14 +52,15 @@ class PhoBERTEncoderTorch:
52
 
53
  def encode(self, input_ids, attention_mask):
54
  with torch.no_grad():
55
- # [FIX 1] Ép kiểu input thành Long (int64) để tránh lỗi embedding trên CPU
56
  ids = torch.tensor(input_ids, dtype=torch.long).to(self.device)
57
  mask = torch.tensor(attention_mask, dtype=torch.long).to(self.device)
58
 
59
  outputs = self.model(ids, attention_mask=mask)
60
 
61
- # [FIX 2] Output chuyển về numpy ép kiểu float32
62
- # Quan trọng: TensorFlow mặc định float32, nếu để float64 sẽ gây lỗi tính toán sai
 
63
  return outputs.last_hidden_state.detach().cpu().numpy().astype(np.float32)
64
 
65
  # -------------------------------------------------------------------
@@ -94,6 +95,7 @@ if TF_AVAILABLE:
94
  self.drop3 = tf.keras.layers.Dropout(rate)
95
 
96
  def call(self, x, enc_output, training=None):
 
97
  attn1 = self.att1(x, x, use_causal_mask=True)
98
  out1 = self.ln1(x + self.drop1(attn1, training=training))
99
  attn2 = self.att2(out1, enc_output)
@@ -102,7 +104,7 @@ if TF_AVAILABLE:
102
  return self.ln3(out2 + self.drop3(ffn_out, training=training))
103
 
104
  # -------------------------------------------------------------------
105
- # BUILD DECODER MODEL (Đã chỉnh sửa để khớp với file export)
106
  # -------------------------------------------------------------------
107
  def build_inference_model():
108
  # 1. Inputs
@@ -110,8 +112,7 @@ def build_inference_model():
110
  enc_raw_input = tf.keras.Input(shape=(None, 768), name='enc_raw_input')
111
  dec_inputs_inf = tf.keras.Input(shape=(None,), dtype=tf.int32, name='dec_inputs_inf')
112
 
113
- # 2. Projection Layer (Quan trọng: Phải đặt tên đúng để load weights)
114
- # Layer này chuyển 768 -> 512
115
  enc_out = tf.keras.layers.Dense(CONFIG["EMBED_DIM"], activation="linear", name="encoder_projection")(enc_raw_input)
116
  enc_out = tf.keras.layers.Dropout(CONFIG["DROPOUT"], name="encoder_dropout")(enc_out)
117
 
@@ -133,7 +134,6 @@ def build_inference_model():
133
  # 4. Decoder Blocks
134
  dec_out = dec_emb_inf
135
  for i in range(CONFIG["NUM_LAYERS"]):
136
- # Tên block phải khớp: decoder_block_0, decoder_block_1...
137
  block = TransformerDecoderBlock(
138
  CONFIG["EMBED_DIM"],
139
  CONFIG["NUM_HEADS"],
@@ -178,7 +178,7 @@ class AbstractiveSummarizer:
178
 
179
  print(f"📥 Loading weights from {weights_path}...")
180
  try:
181
- # Load weights. Quan trọng: skip_mismatch=False để đảm bảo mọi thứ khớp 100%
182
  self.decoder_model.load_weights(weights_path)
183
  print("✅ Weights loaded successfully!")
184
  except Exception as e:
@@ -236,10 +236,10 @@ class AbstractiveSummarizer:
236
  return_tensors='np'
237
  )
238
 
239
- # 3. Encode bằng PyTorch (nhận về vector 768 chiều, float32)
240
  enc_out = self.phobert.encode(inp['input_ids'], inp['attention_mask'])
241
 
242
- # 4. Generate Summary bằng TF Model (có tích hợp Projection 768->512)
243
  seq = self.beam_search(enc_out, k=k)
244
 
245
  # 5. Decode kết quả
 
40
  return text
41
 
42
  # -------------------------------------------------------------------
43
+ # PHOBERT ENCODER (PyTorch) - [ĐÃ SỬA ĐỂ KHỚP DATA TYPE]
44
  # -------------------------------------------------------------------
45
  class PhoBERTEncoderTorch:
46
  def __init__(self):
 
52
 
53
  def encode(self, input_ids, attention_mask):
54
  with torch.no_grad():
55
+ # [SỬA 1] Ép kiểu input thành Long (int64) để tránh lỗi với PyTorch CPU
56
  ids = torch.tensor(input_ids, dtype=torch.long).to(self.device)
57
  mask = torch.tensor(attention_mask, dtype=torch.long).to(self.device)
58
 
59
  outputs = self.model(ids, attention_mask=mask)
60
 
61
+ # [SỬA 2] Quan trọng nhất: Chuyển output về float32
62
+ # PyTorch CPU thường trả về float64, nhưng TensorFlow Keras 3 cần float32.
63
+ # Nếu không ép kiểu này, model sẽ tính toán ra rác.
64
  return outputs.last_hidden_state.detach().cpu().numpy().astype(np.float32)
65
 
66
  # -------------------------------------------------------------------
 
95
  self.drop3 = tf.keras.layers.Dropout(rate)
96
 
97
  def call(self, x, enc_output, training=None):
98
+ # Code này dùng use_causal_mask=True -> Bắt buộc dùng Keras 3 (TensorFlow > 2.16)
99
  attn1 = self.att1(x, x, use_causal_mask=True)
100
  out1 = self.ln1(x + self.drop1(attn1, training=training))
101
  attn2 = self.att2(out1, enc_output)
 
104
  return self.ln3(out2 + self.drop3(ffn_out, training=training))
105
 
106
  # -------------------------------------------------------------------
107
+ # BUILD DECODER MODEL
108
  # -------------------------------------------------------------------
109
  def build_inference_model():
110
  # 1. Inputs
 
112
  enc_raw_input = tf.keras.Input(shape=(None, 768), name='enc_raw_input')
113
  dec_inputs_inf = tf.keras.Input(shape=(None,), dtype=tf.int32, name='dec_inputs_inf')
114
 
115
+ # 2. Projection Layer
 
116
  enc_out = tf.keras.layers.Dense(CONFIG["EMBED_DIM"], activation="linear", name="encoder_projection")(enc_raw_input)
117
  enc_out = tf.keras.layers.Dropout(CONFIG["DROPOUT"], name="encoder_dropout")(enc_out)
118
 
 
134
  # 4. Decoder Blocks
135
  dec_out = dec_emb_inf
136
  for i in range(CONFIG["NUM_LAYERS"]):
 
137
  block = TransformerDecoderBlock(
138
  CONFIG["EMBED_DIM"],
139
  CONFIG["NUM_HEADS"],
 
178
 
179
  print(f"📥 Loading weights from {weights_path}...")
180
  try:
181
+ # Load weights.
182
  self.decoder_model.load_weights(weights_path)
183
  print("✅ Weights loaded successfully!")
184
  except Exception as e:
 
236
  return_tensors='np'
237
  )
238
 
239
+ # 3. Encode bằng PyTorch (nhận về vector 768 chiều, đã ép float32)
240
  enc_out = self.phobert.encode(inp['input_ids'], inp['attention_mask'])
241
 
242
+ # 4. Generate Summary bằng TF Model
243
  seq = self.beam_search(enc_out, k=k)
244
 
245
  # 5. Decode kết quả