From 0b31288754a674e5338da7f465435a2947b620f6 Mon Sep 17 00:00:00 2001 From: alikia2x Date: Wed, 17 Sep 2025 23:07:41 +0800 Subject: [PATCH] update: complete the doc for alignment pipeline --- ml/lab/align-pipeline.md | 5 +++-- ml/lab/mmsAlignment/alignWithMMS.py | 2 +- ml/lab/mmsAlignment/splitSong.py | 2 +- ml/lab/whisperAlignment/align2srt.py | 11 ++++++++++- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ml/lab/align-pipeline.md b/ml/lab/align-pipeline.md index 887b919..37f27ba 100644 --- a/ml/lab/align-pipeline.md +++ b/ml/lab/align-pipeline.md @@ -1,3 +1,4 @@ 1. prepare `1.mp3`, `1.txt`, `1.group` in `./data` -2. `whisperAlignment/alignWithGroup` -3. `mmsAlignment +2. Run `whisperAlignment/alignWithGroup.py` +3. Run `mmsAlignment/splitSong.py` +4. Run `mmsAlignment/alignWithMMS.py` \ No newline at end of file diff --git a/ml/lab/mmsAlignment/alignWithMMS.py b/ml/lab/mmsAlignment/alignWithMMS.py index c5169c9..59dd4a4 100644 --- a/ml/lab/mmsAlignment/alignWithMMS.py +++ b/ml/lab/mmsAlignment/alignWithMMS.py @@ -164,4 +164,4 @@ def align(audio_file: str, lrc_file: str, output_ttml: str, segments_dir: str = ttml_generator.save(output_ttml) if __name__ == "__main__": - align("./data/1.flac", "./data/1.lrc", "./data/output.ttml", "./temp/lines") \ No newline at end of file + align("./data/1.mp3", "./data/1.lrc", "./data/output.ttml", "./temp/lines") \ No newline at end of file diff --git a/ml/lab/mmsAlignment/splitSong.py b/ml/lab/mmsAlignment/splitSong.py index bf0ec03..21abb8a 100644 --- a/ml/lab/mmsAlignment/splitSong.py +++ b/ml/lab/mmsAlignment/splitSong.py @@ -52,7 +52,7 @@ def split_audio_by_lrc(audio_file, lrc_data): if __name__ == "__main__": lrc_file = "./data/1.lrc" # LRC文件路径 - audio_file = "./data/1.flac" # 音频文件路径 + audio_file = "./data/1.mp3" # 音频文件路径 lrc_data = parse_lrc(lrc_file) split_audio_by_lrc(audio_file, lrc_data) \ No newline at end of file diff --git a/ml/lab/whisperAlignment/align2srt.py b/ml/lab/whisperAlignment/align2srt.py index 54947eb..1202dbe 100644 --- a/ml/lab/whisperAlignment/align2srt.py +++ b/ml/lab/whisperAlignment/align2srt.py @@ -3,4 +3,13 @@ import stable_whisper def align2srt(lyrics, audio_path, output_path): model = stable_whisper.load_model('large-v3') result = model.align(audio_path, lyrics, language="Chinese", regroup=False) - result.to_srt_vtt(output_path, segment_level=False) \ No newline at end of file + result.to_srt_vtt(output_path, segment_level=False) + +AUDIO_FILE = './data/1.mp3' +LYRICS_FILE = './data/1.txt' +OUTPUT_FILE = './data/1.srt' + +if __name__ == "__main__": + with open(LYRICS_FILE, 'r') as f: + lyrics_content = f.read() + align2srt(lyrics_content, AUDIO_FILE, OUTPUT_FILE) \ No newline at end of file