add: postprocess
This commit is contained in:
parent
4c9f411f67
commit
7021687e10
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,4 +11,5 @@ __pycache__
|
|||||||
.env*
|
.env*
|
||||||
translate/output
|
translate/output
|
||||||
translate/source
|
translate/source
|
||||||
|
translate/result
|
||||||
*.db
|
*.db
|
42
translate/postprocess.py
Normal file
42
translate/postprocess.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
def read_converted_files(filename):
|
||||||
|
"""读取converted.txt文件,返回一个包含已处理文件名的集合"""
|
||||||
|
if os.path.exists(filename):
|
||||||
|
with open(filename, 'r', encoding='utf-8') as file:
|
||||||
|
return set(file.read().splitlines())
|
||||||
|
return set()
|
||||||
|
|
||||||
|
def write_converted_file(filename, file_name):
|
||||||
|
"""将处理过的文件名写入converted.txt"""
|
||||||
|
with open(filename, 'a', encoding='utf-8') as file:
|
||||||
|
file.write(file_name + '\n')
|
||||||
|
|
||||||
|
def process_json_files(directory, converted_filename):
|
||||||
|
"""处理指定目录下的所有json文件"""
|
||||||
|
converted_files = read_converted_files(converted_filename)
|
||||||
|
|
||||||
|
for filename in os.listdir(directory):
|
||||||
|
if filename.endswith('.json') and filename not in converted_files:
|
||||||
|
file_path = os.path.join(directory, filename)
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
segments = data.get('segments', [])
|
||||||
|
|
||||||
|
with open('./result/source.txt', 'a', encoding='utf-8') as source_file, \
|
||||||
|
open('./result/target.txt', 'a', encoding='utf-8') as target_file:
|
||||||
|
for segment in segments:
|
||||||
|
chinese_text = segment.get('chinese', '').replace('\n', ' ')
|
||||||
|
english_text = segment.get('english', '').replace('\n', ' ')
|
||||||
|
|
||||||
|
source_file.write(chinese_text + '\n')
|
||||||
|
target_file.write(english_text + '\n')
|
||||||
|
|
||||||
|
write_converted_file(converted_filename, filename)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
json_directory = './output' # 替换为你的JSON文件目录路径
|
||||||
|
converted_filename = './result/converted.txt'
|
||||||
|
|
||||||
|
process_json_files(json_directory, converted_filename)
|
Loading…
Reference in New Issue
Block a user