In [16]:
import time
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# 定义参数
model_checkpoint = "Helsinki-NLP/opus-mt-zh-en"
checkpoint_path = "./saves/step_74500_valid_bleu_30.28_model_weights.bin"  # 假设使用训练中的checkpoint

# 加载tokenizer和模型
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

# 加载checkpoint
model.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))
model.eval()

# 将模型转移到设备
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
model = model.to(device)

  model.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))


In [24]:
def infer_translation(input_text, model, tokenizer, max_length=128, num_beams=1, length_penalty=1.2):
    # 记录推理开始时间
    start_time = time.time()

    # 预处理输入文本
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        padding="max_length",
        max_length=max_length,
    ).to(device)

    # 模型生成翻译
    with torch.no_grad():
        output_tokens = model.generate(
            inputs["input_ids"],
            max_length=max_length,
            num_beams=num_beams,
            length_penalty=length_penalty,
            early_stopping=True,
            no_repeat_ngram_size=2,
            temperature = 0.3,
            top_p = 0.85,
            do_sample = False
        )

    # 解码生成的tokens为文本
    translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    # 记录推理结束时间
    end_time = time.time()
    inference_time = end_time - start_time

    return translation, inference_time

def translate(input_text, model, tokenizer):
    lines = input_text.splitlines()
    
    # 存储每一行的翻译结果
    translations = []
    total_time = 0 
    
    # 对每一行进行翻译
    for line in lines:
        if line.strip() == "":
            translations.append("")
            continue
        #对于长行按句翻译
        if len(line) > 64 and '。' in line:
            sentences = line.split('。')
            translated_sentences=[]
            for sentence in sentences:
                if sentence.strip() == "":
                    continue
                translation, time_cost = infer_translation(sentence, model, tokenizer)
                translated_sentences.append(translation)
                total_time += time_cost
                #print(sentence,translation)
            translations.append(" ".join(translated_sentences))
        else:
            translation, time_cost = infer_translation(line, model, tokenizer)
            #print(line,translation)
            translations.append(translation)
            total_time += time_cost
    
    final_translation = "\n".join(translations)
    
    return final_translation, total_time


In [25]:
# 用户输入
input_text = '''自2000年左右，台湾的珍珠奶茶传入中国大陆，市场规模逐步扩大。当地不断推出新口味的奶茶、水果茶和奶盖茶等创新饮品，并提供多样化的配料选择，统称为新式茶饮。2018年起，奶茶品牌开始采用网红营销策略，使得部分城市门店顾客络绎不绝。尽管消费者有多达两千种的搭配选择，但销量最高的依旧是珍珠、红豆和布丁这三种经典配料。
面对激烈的市场竞争，茶饮品牌开始区分不同的档次，从使用红茶粉和奶精的低成本产品，到采用新鲜牛奶和现场煮制的高级奶茶，甚至高端茶叶如大红袍、龙井茶也成为一些品牌的选用。'''

# 进行推理并测量时间
translated_text, time_taken = translate(input_text, model, tokenizer)

# 输出结果
print(f"Original Text: \n{input_text}\n\n")
print(f"Translated Text: \n{translated_text}\n")
print(f"Inference Time: {time_taken:.4f} seconds")




Original Text: 
自2000年左右，台湾的珍珠奶茶传入中国大陆，市场规模逐步扩大。当地不断推出新口味的奶茶、水果茶和奶盖茶等创新饮品，并提供多样化的配料选择，统称为新式茶饮。2018年起，奶茶品牌开始采用网红营销策略，使得部分城市门店顾客络绎不绝。尽管消费者有多达两千种的搭配选择，但销量最高的依旧是珍珠、红豆和布丁这三种经典配料。
面对激烈的市场竞争，茶饮品牌开始区分不同的档次，从使用红茶粉和奶精的低成本产品，到采用新鲜牛奶和现场煮制的高级奶茶，甚至高端茶叶如大红袍、龙井茶也成为一些品牌的选用。


Translated Text: 
Since about 2000, the Pearl Milk Tea of Taiwan has been spreading into the mainland, and the market has gradually expanded The new tea, fruit tea and milk tea are introduced in the local market, and the variety of ingredients is offered, collectively known as new-style tea. Since 2018, the milk tea brand has adopted a mesh marketing strategy, which has made some city stores more and more customers. Despite the fact that consumers have as many as 2, 000 combinations, the highest sales are still the three classic ingredients: pearls and red beans and pudding.
In the face of fierce market competition, tea and tea brands began to differentiate between low-cost products using red tea powder and cream, high-grade milk 