From 3867f6172e2789da5ff3df275a09eedb014a8b27 Mon Sep 17 00:00:00 2001
From: alikia2x <alikia2x@outlook.com>
Date: Sun, 20 Oct 2024 03:36:48 +0800
Subject: [PATCH] ref: re-organize project structure

---
 .gitignore                              | 169 ++++++++++++++++++++++++
 README.md                               |  36 +----
 UModel.py                               |   6 +-
 cafe.wav => assets/cafe.wav             | Bin
 perlin.wav => assets/perlin.wav         | Bin
 radionoise.wav => assets/radionoise.wav | Bin
 silence.wav => assets/silence.wav       | Bin
 white.wav => assets/white.wav           | Bin
 data.py                                 |   8 +-
 requirements.txt                        |   6 +
 test1.wav => test/test1.wav             | Bin
 test2.wav => test/test2.wav             | Bin
 test3.wav => test/test3.wav             | Bin
 test4.wav => test/test4.wav             | Bin
 test5.wav => test/test5.wav             | Bin
 test6.wav => test/test6.wav             | Bin
 tonu.py                                 |  10 +-
 train.py                                |   9 +-
 18 files changed, 199 insertions(+), 45 deletions(-)
 create mode 100644 .gitignore
 rename cafe.wav => assets/cafe.wav (100%)
 rename perlin.wav => assets/perlin.wav (100%)
 rename radionoise.wav => assets/radionoise.wav (100%)
 rename silence.wav => assets/silence.wav (100%)
 rename white.wav => assets/white.wav (100%)
 create mode 100644 requirements.txt
 rename test1.wav => test/test1.wav (100%)
 rename test2.wav => test/test2.wav (100%)
 rename test3.wav => test/test3.wav (100%)
 rename test4.wav => test/test4.wav (100%)
 rename test5.wav => test/test5.wav (100%)
 rename test6.wav => test/test6.wav (100%)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9dff66e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,169 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# OS-specific
+.DS_Store
+
+# Project Specific
+*.mp3
+results/
\ No newline at end of file
diff --git a/README.md b/README.md
index 83ea1fd..e7935bb 100644
--- a/README.md
+++ b/README.md
@@ -1,35 +1,9 @@
-# Speech Recognition for Uyghur using deep learning
-Training:
+# Agnlash
 
-this model using CTC loss for training.
+A ASR(Automatic Speech Recognition) model for Uyghur language.
 
-Download [pretrained model](https://github.com/gheyret/uyghur-asr-ctc/releases/download/data/results.7z) and [dataset](https://github.com/gheyret/uyghur-asr-ctc/releases/download/data/thuyg20_data.7z).
+This project is forked from [uyghur-asr-ctc](https://github.com/gheyret/uyghur-asr-ctc/forks).
 
-unzip results.7z and thuyg20_data.7z to the same folder where python source files located. then run:
-```
-python train.py
-```
-
-Recognition:
-
-for recognition download only pretrained model(results.7z). then run:
-
-```
-python tonu.py test1.wav 
-```
-result will be:
-```
-        Model loaded: results/UModel_last.pth
-            Best CER: 7.21%
-             Trained: 473 epochs
-The model has 26,389,282 trainable parameters
-
-======================
-Recognizing file .\test2.wav
-test2.wav -> bu öy eslide xotunining xush tebessumi oghlining omaq külküsi bilen güzel idi
-```
-
-This project using 
-
-[**A free Uyghur speech database Released by CSLT@Tsinghua University & Xinjiang University**](http://www.openslr.org/22/)
+The Anglash is fine-tuned on the [CommonVoice](https://commonvoice.mozilla.org/) dataset which contains 313 hours of data.
 
+The original project uses [**A free Uyghur speech database Released by CSLT@Tsinghua University & Xinjiang University**](http://www.openslr.org/22/). This dataset contains 22.45 hours of data.
diff --git a/UModel.py b/UModel.py
index bb4f65f..dc98d43 100644
--- a/UModel.py
+++ b/UModel.py
@@ -162,12 +162,12 @@ if __name__ == "__main__":
     net = UModel(featurelen).to(device)
     #net.save(0)
 
-    text = net.predict("test1.wav",device)
+    text = net.predict("./test/test1.wav",device)
     print(text)
-    text = net.predict("test2.wav",device)
+    text = net.predict("./test/test2.wav",device)
     print(text)
 
-    melf = melfuture("test3.wav")
+    melf = melfuture("./test/test3.wav")
     melf.unsqueeze_(0)
 
     conv0 = nn.Conv1d(featurelen,256,11,2, 5, 1)
diff --git a/cafe.wav b/assets/cafe.wav
similarity index 100%
rename from cafe.wav
rename to assets/cafe.wav
diff --git a/perlin.wav b/assets/perlin.wav
similarity index 100%
rename from perlin.wav
rename to assets/perlin.wav
diff --git a/radionoise.wav b/assets/radionoise.wav
similarity index 100%
rename from radionoise.wav
rename to assets/radionoise.wav
diff --git a/silence.wav b/assets/silence.wav
similarity index 100%
rename from silence.wav
rename to assets/silence.wav
diff --git a/white.wav b/assets/white.wav
similarity index 100%
rename from white.wav
rename to assets/white.wav
diff --git a/data.py b/data.py
index 80a653b..d357c84 100644
--- a/data.py
+++ b/data.py
@@ -17,10 +17,10 @@ window_len  = fft_len
 window      = "hann"
 hop_len     = 200
 
-white_noise,_=librosa.load('white.wav',sr=sample_rate, duration=15.0)
-perlin_noise,_=librosa.load('perlin.wav',sr=sample_rate, duration=15.0)
-cafe_noise, _ = librosa.load('cafe.wav',sr=sample_rate, duration=15.0)
-radio_noise, _ = librosa.load('radionoise.wav',sr=sample_rate, duration=15.0)
+white_noise,_=librosa.load('./assets/white.wav',sr=sample_rate, duration=15.0)
+perlin_noise,_=librosa.load('./assets/perlin.wav',sr=sample_rate, duration=15.0)
+cafe_noise, _ = librosa.load('./assets/cafe.wav',sr=sample_rate, duration=15.0)
+radio_noise, _ = librosa.load('./assets/radionoise.wav',sr=sample_rate, duration=15.0)
 
 def addnoise(audio):
     rnd = random.random()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e68c8d9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+librosa==0.9.2
+numpy==1.24.4
+scikit_learn==1.3.2
+torch==2.2.2
+tqdm==4.66.1
+umsc==0.3.0
diff --git a/test1.wav b/test/test1.wav
similarity index 100%
rename from test1.wav
rename to test/test1.wav
diff --git a/test2.wav b/test/test2.wav
similarity index 100%
rename from test2.wav
rename to test/test2.wav
diff --git a/test3.wav b/test/test3.wav
similarity index 100%
rename from test3.wav
rename to test/test3.wav
diff --git a/test4.wav b/test/test4.wav
similarity index 100%
rename from test4.wav
rename to test/test4.wav
diff --git a/test5.wav b/test/test5.wav
similarity index 100%
rename from test5.wav
rename to test/test5.wav
diff --git a/test6.wav b/test/test6.wav
similarity index 100%
rename from test6.wav
rename to test/test6.wav
diff --git a/tonu.py b/tonu.py
index 22712bf..087c71e 100644
--- a/tonu.py
+++ b/tonu.py
@@ -1,7 +1,11 @@
 import sys
-import os
 from data import featurelen
 from UModel import UModel
+from umsc import UgMultiScriptConverter
+source_script = 'UAS'
+target_script = 'ULS'
+converter = UgMultiScriptConverter(source_script, target_script)
+
 
 if __name__ == '__main__':
     model = UModel(featurelen)
@@ -12,6 +16,6 @@ if __name__ == '__main__':
         device = 'cpu'
         model.to(device)
         audiofile = sys.argv[1]
-        print(f"\n======================\nRecognizing file {audiofile}")
         txt = model.predict(audiofile,device)
-        print("%s -> %s" %(os.path.basename(audiofile),txt))
+        script = converter(txt)
+        print(script)
diff --git a/train.py b/train.py
index f033607..801ad65 100644
--- a/train.py
+++ b/train.py
@@ -128,7 +128,7 @@ def train(model, train_loader):
 
 
 if __name__ == "__main__":
-    device = "cuda"
+    device = "mps"
     
     os.makedirs('./results',exist_ok=True)
 
@@ -167,9 +167,10 @@ if __name__ == "__main__":
         torch.cuda.empty_cache()
         model.eval()
         msg = ""
-        for afile in testfile:
-            text = model.predict(afile,device)
-            text = f"{afile}-->{text}\n"
+        for file in testfile:
+            file = "./test/" + file
+            text = model.predict(file,device)
+            text = f"{file}-->{text}\n"
             print(text,end="")
             msg += text