Update py/abstractive.py
Browse files- py/abstractive.py +11 -11
py/abstractive.py
CHANGED
|
@@ -40,7 +40,7 @@ def clean_text_inference(text: str) -> str:
|
|
| 40 |
return text
|
| 41 |
|
| 42 |
# -------------------------------------------------------------------
|
| 43 |
-
# PHOBERT ENCODER (PyTorch) - ĐÃ SỬA
|
| 44 |
# -------------------------------------------------------------------
|
| 45 |
class PhoBERTEncoderTorch:
|
| 46 |
def __init__(self):
|
|
@@ -52,14 +52,15 @@ class PhoBERTEncoderTorch:
|
|
| 52 |
|
| 53 |
def encode(self, input_ids, attention_mask):
|
| 54 |
with torch.no_grad():
|
| 55 |
-
# [
|
| 56 |
ids = torch.tensor(input_ids, dtype=torch.long).to(self.device)
|
| 57 |
mask = torch.tensor(attention_mask, dtype=torch.long).to(self.device)
|
| 58 |
|
| 59 |
outputs = self.model(ids, attention_mask=mask)
|
| 60 |
|
| 61 |
-
# [
|
| 62 |
-
#
|
|
|
|
| 63 |
return outputs.last_hidden_state.detach().cpu().numpy().astype(np.float32)
|
| 64 |
|
| 65 |
# -------------------------------------------------------------------
|
|
@@ -94,6 +95,7 @@ if TF_AVAILABLE:
|
|
| 94 |
self.drop3 = tf.keras.layers.Dropout(rate)
|
| 95 |
|
| 96 |
def call(self, x, enc_output, training=None):
|
|
|
|
| 97 |
attn1 = self.att1(x, x, use_causal_mask=True)
|
| 98 |
out1 = self.ln1(x + self.drop1(attn1, training=training))
|
| 99 |
attn2 = self.att2(out1, enc_output)
|
|
@@ -102,7 +104,7 @@ if TF_AVAILABLE:
|
|
| 102 |
return self.ln3(out2 + self.drop3(ffn_out, training=training))
|
| 103 |
|
| 104 |
# -------------------------------------------------------------------
|
| 105 |
-
# BUILD DECODER MODEL
|
| 106 |
# -------------------------------------------------------------------
|
| 107 |
def build_inference_model():
|
| 108 |
# 1. Inputs
|
|
@@ -110,8 +112,7 @@ def build_inference_model():
|
|
| 110 |
enc_raw_input = tf.keras.Input(shape=(None, 768), name='enc_raw_input')
|
| 111 |
dec_inputs_inf = tf.keras.Input(shape=(None,), dtype=tf.int32, name='dec_inputs_inf')
|
| 112 |
|
| 113 |
-
# 2. Projection Layer
|
| 114 |
-
# Layer này chuyển 768 -> 512
|
| 115 |
enc_out = tf.keras.layers.Dense(CONFIG["EMBED_DIM"], activation="linear", name="encoder_projection")(enc_raw_input)
|
| 116 |
enc_out = tf.keras.layers.Dropout(CONFIG["DROPOUT"], name="encoder_dropout")(enc_out)
|
| 117 |
|
|
@@ -133,7 +134,6 @@ def build_inference_model():
|
|
| 133 |
# 4. Decoder Blocks
|
| 134 |
dec_out = dec_emb_inf
|
| 135 |
for i in range(CONFIG["NUM_LAYERS"]):
|
| 136 |
-
# Tên block phải khớp: decoder_block_0, decoder_block_1...
|
| 137 |
block = TransformerDecoderBlock(
|
| 138 |
CONFIG["EMBED_DIM"],
|
| 139 |
CONFIG["NUM_HEADS"],
|
|
@@ -178,7 +178,7 @@ class AbstractiveSummarizer:
|
|
| 178 |
|
| 179 |
print(f"📥 Loading weights from {weights_path}...")
|
| 180 |
try:
|
| 181 |
-
# Load weights.
|
| 182 |
self.decoder_model.load_weights(weights_path)
|
| 183 |
print("✅ Weights loaded successfully!")
|
| 184 |
except Exception as e:
|
|
@@ -236,10 +236,10 @@ class AbstractiveSummarizer:
|
|
| 236 |
return_tensors='np'
|
| 237 |
)
|
| 238 |
|
| 239 |
-
# 3. Encode bằng PyTorch (nhận về vector 768 chiều, float32)
|
| 240 |
enc_out = self.phobert.encode(inp['input_ids'], inp['attention_mask'])
|
| 241 |
|
| 242 |
-
# 4. Generate Summary bằng TF Model
|
| 243 |
seq = self.beam_search(enc_out, k=k)
|
| 244 |
|
| 245 |
# 5. Decode kết quả
|
|
|
|
| 40 |
return text
|
| 41 |
|
| 42 |
# -------------------------------------------------------------------
|
| 43 |
+
# PHOBERT ENCODER (PyTorch) - [ĐÃ SỬA ĐỂ KHỚP DATA TYPE]
|
| 44 |
# -------------------------------------------------------------------
|
| 45 |
class PhoBERTEncoderTorch:
|
| 46 |
def __init__(self):
|
|
|
|
| 52 |
|
| 53 |
def encode(self, input_ids, attention_mask):
|
| 54 |
with torch.no_grad():
|
| 55 |
+
# [SỬA 1] Ép kiểu input thành Long (int64) để tránh lỗi với PyTorch CPU
|
| 56 |
ids = torch.tensor(input_ids, dtype=torch.long).to(self.device)
|
| 57 |
mask = torch.tensor(attention_mask, dtype=torch.long).to(self.device)
|
| 58 |
|
| 59 |
outputs = self.model(ids, attention_mask=mask)
|
| 60 |
|
| 61 |
+
# [SỬA 2] Quan trọng nhất: Chuyển output về float32
|
| 62 |
+
# PyTorch CPU thường trả về float64, nhưng TensorFlow Keras 3 cần float32.
|
| 63 |
+
# Nếu không ép kiểu này, model sẽ tính toán ra rác.
|
| 64 |
return outputs.last_hidden_state.detach().cpu().numpy().astype(np.float32)
|
| 65 |
|
| 66 |
# -------------------------------------------------------------------
|
|
|
|
| 95 |
self.drop3 = tf.keras.layers.Dropout(rate)
|
| 96 |
|
| 97 |
def call(self, x, enc_output, training=None):
|
| 98 |
+
# Code này dùng use_causal_mask=True -> Bắt buộc dùng Keras 3 (TensorFlow > 2.16)
|
| 99 |
attn1 = self.att1(x, x, use_causal_mask=True)
|
| 100 |
out1 = self.ln1(x + self.drop1(attn1, training=training))
|
| 101 |
attn2 = self.att2(out1, enc_output)
|
|
|
|
| 104 |
return self.ln3(out2 + self.drop3(ffn_out, training=training))
|
| 105 |
|
| 106 |
# -------------------------------------------------------------------
|
| 107 |
+
# BUILD DECODER MODEL
|
| 108 |
# -------------------------------------------------------------------
|
| 109 |
def build_inference_model():
|
| 110 |
# 1. Inputs
|
|
|
|
| 112 |
enc_raw_input = tf.keras.Input(shape=(None, 768), name='enc_raw_input')
|
| 113 |
dec_inputs_inf = tf.keras.Input(shape=(None,), dtype=tf.int32, name='dec_inputs_inf')
|
| 114 |
|
| 115 |
+
# 2. Projection Layer
|
|
|
|
| 116 |
enc_out = tf.keras.layers.Dense(CONFIG["EMBED_DIM"], activation="linear", name="encoder_projection")(enc_raw_input)
|
| 117 |
enc_out = tf.keras.layers.Dropout(CONFIG["DROPOUT"], name="encoder_dropout")(enc_out)
|
| 118 |
|
|
|
|
| 134 |
# 4. Decoder Blocks
|
| 135 |
dec_out = dec_emb_inf
|
| 136 |
for i in range(CONFIG["NUM_LAYERS"]):
|
|
|
|
| 137 |
block = TransformerDecoderBlock(
|
| 138 |
CONFIG["EMBED_DIM"],
|
| 139 |
CONFIG["NUM_HEADS"],
|
|
|
|
| 178 |
|
| 179 |
print(f"📥 Loading weights from {weights_path}...")
|
| 180 |
try:
|
| 181 |
+
# Load weights.
|
| 182 |
self.decoder_model.load_weights(weights_path)
|
| 183 |
print("✅ Weights loaded successfully!")
|
| 184 |
except Exception as e:
|
|
|
|
| 236 |
return_tensors='np'
|
| 237 |
)
|
| 238 |
|
| 239 |
+
# 3. Encode bằng PyTorch (nhận về vector 768 chiều, đã ép float32)
|
| 240 |
enc_out = self.phobert.encode(inp['input_ids'], inp['attention_mask'])
|
| 241 |
|
| 242 |
+
# 4. Generate Summary bằng TF Model
|
| 243 |
seq = self.beam_search(enc_out, k=k)
|
| 244 |
|
| 245 |
# 5. Decode kết quả
|