add: postprocess

This commit is contained in:
alikia2x (寒寒) 2024-09-15 23:54:37 +08:00
parent 4c9f411f67
commit 7021687e10
Signed by: alikia2x
GPG Key ID: 56209E0CCD8420C6
2 changed files with 43 additions and 0 deletions

1
.gitignore vendored
View File

@ -11,4 +11,5 @@ __pycache__
.env*
translate/output
translate/source
translate/result
*.db

42
translate/postprocess.py Normal file
View File

@ -0,0 +1,42 @@
import os
import json
def read_converted_files(filename):
"""读取converted.txt文件返回一个包含已处理文件名的集合"""
if os.path.exists(filename):
with open(filename, 'r', encoding='utf-8') as file:
return set(file.read().splitlines())
return set()
def write_converted_file(filename, file_name):
"""将处理过的文件名写入converted.txt"""
with open(filename, 'a', encoding='utf-8') as file:
file.write(file_name + '\n')
def process_json_files(directory, converted_filename):
"""处理指定目录下的所有json文件"""
converted_files = read_converted_files(converted_filename)
for filename in os.listdir(directory):
if filename.endswith('.json') and filename not in converted_files:
file_path = os.path.join(directory, filename)
with open(file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
segments = data.get('segments', [])
with open('./result/source.txt', 'a', encoding='utf-8') as source_file, \
open('./result/target.txt', 'a', encoding='utf-8') as target_file:
for segment in segments:
chinese_text = segment.get('chinese', '').replace('\n', ' ')
english_text = segment.get('english', '').replace('\n', ' ')
source_file.write(chinese_text + '\n')
target_file.write(english_text + '\n')
write_converted_file(converted_filename, filename)
if __name__ == "__main__":
json_directory = './output' # 替换为你的JSON文件目录路径
converted_filename = './result/converted.txt'
process_json_files(json_directory, converted_filename)