update: add metadata export of intention classify

This commit is contained in:
alikia2x (寒寒) 2024-09-26 22:57:27 +08:00
parent 853d158c41
commit bf2c9a393a
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
2 changed files with 9 additions and 2 deletions

View File

@ -88,7 +88,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_name)
data = load_data("data.json")
class_to_idx, _ = create_class_mappings(data)
class_to_idx, idx_to_class = create_class_mappings(data)
embedding_map = torch.load("token_id_to_reduced_embedding.pt")
dataset = preprocess_data(data, embedding_map, tokenizer, class_to_idx)
train_data, _ = train_test_split(dataset, test_size=0.2)
@ -143,6 +143,12 @@ def main():
},
opset_version=11,
)
meta = {
"idx_to_class": idx_to_class,
"threshold": 0
}
with open('NLU_meta.json', 'w') as f:
json.dump(meta, f)
if __name__ == "__main__":

View File

@ -2,6 +2,7 @@ from openai import OpenAI
import argparse
import os
from dotenv import load_dotenv
from tqdm import tqdm
def translate_text(text, client, model_name, temp):
messages = [
@ -37,7 +38,7 @@ with open(input_file, "r") as f:
src_lines = f.readlines()
for line in src_lines:
for line in tqdm(src_lines):
result = translate_text(line, client, model, temp)
with open(output_file, 'a') as f:
f.write(result + '\n')