cvsa/whisperAlignment/align2srt.py
2024-12-27 19:46:22 +08:00

6 lines
256 B
Python

import stable_whisper
def align2srt(lyrics, audio_path, output_path):
model = stable_whisper.load_model('large-v3')
result = model.align(audio_path, lyrics, language="Chinese", regroup=False)
result.to_srt_vtt(output_path, segment_level=False)