From 7021687e10c8b5ae1f56c1b6d76d362b92a9486b Mon Sep 17 00:00:00 2001 From: alikia2x Date: Sun, 15 Sep 2024 23:54:37 +0800 Subject: [PATCH] add: postprocess --- .gitignore | 1 + translate/postprocess.py | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 translate/postprocess.py diff --git a/.gitignore b/.gitignore index 6ab4ef9..0d46073 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ __pycache__ .env* translate/output translate/source +translate/result *.db \ No newline at end of file diff --git a/translate/postprocess.py b/translate/postprocess.py new file mode 100644 index 0000000..bf5f1e9 --- /dev/null +++ b/translate/postprocess.py @@ -0,0 +1,42 @@ +import os +import json + +def read_converted_files(filename): + """读取converted.txt文件,返回一个包含已处理文件名的集合""" + if os.path.exists(filename): + with open(filename, 'r', encoding='utf-8') as file: + return set(file.read().splitlines()) + return set() + +def write_converted_file(filename, file_name): + """将处理过的文件名写入converted.txt""" + with open(filename, 'a', encoding='utf-8') as file: + file.write(file_name + '\n') + +def process_json_files(directory, converted_filename): + """处理指定目录下的所有json文件""" + converted_files = read_converted_files(converted_filename) + + for filename in os.listdir(directory): + if filename.endswith('.json') and filename not in converted_files: + file_path = os.path.join(directory, filename) + with open(file_path, 'r', encoding='utf-8') as json_file: + data = json.load(json_file) + segments = data.get('segments', []) + + with open('./result/source.txt', 'a', encoding='utf-8') as source_file, \ + open('./result/target.txt', 'a', encoding='utf-8') as target_file: + for segment in segments: + chinese_text = segment.get('chinese', '').replace('\n', ' ') + english_text = segment.get('english', '').replace('\n', ' ') + + source_file.write(chinese_text + '\n') + target_file.write(english_text + '\n') + + write_converted_file(converted_filename, filename) + +if __name__ == "__main__": + json_directory = './output' # 替换为你的JSON文件目录路径 + converted_filename = './result/converted.txt' + + process_json_files(json_directory, converted_filename) \ No newline at end of file