diff options
-rw-r--r-- | poetry.lock | 113 | ||||
-rw-r--r-- | pyproject.toml | 26 | ||||
-rw-r--r-- | readme.md | 23 | ||||
-rw-r--r-- | requirements.txt | 12 | ||||
-rw-r--r-- | swr2_asr/train.py | 16 |
5 files changed, 112 insertions, 78 deletions
diff --git a/poetry.lock b/poetry.lock index 788c2eb..9d91798 100644 --- a/poetry.lock +++ b/poetry.lock @@ -20,6 +20,21 @@ wrapt = [ ] [[package]] +name = "AudioLoader" +version = "0.1.4" +description = "A collection of PyTorch audio datasets for speech and music applications" +optional = false +python-versions = ">=3.6" +files = [] +develop = false + +[package.source] +type = "git" +url = "https://github.com/KinWaiCheuk/AudioLoader.git" +reference = "HEAD" +resolved_reference = "c79acea2db7323fab22a0041211208899d0371e2" + +[[package]] name = "black" version = "23.7.0" description = "The uncompromising code formatter." @@ -77,13 +92,13 @@ files = [ [[package]] name = "click" -version = "8.1.6" +version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, - {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, ] [package.dependencies] @@ -629,31 +644,19 @@ files = [ [[package]] name = "torch" -version = "2.0.1" +version = "2.0.1+cpu" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.8.0" files = [ - {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, - {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, - {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, - {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"}, - {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"}, - {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"}, - {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"}, - {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"}, - {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"}, - {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"}, - {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"}, - {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"}, - {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"}, - {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"}, - {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"}, - {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"}, - {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"}, - {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"}, - {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, - {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, + {file = "torch-2.0.1+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:fec257249ba014c68629a1994b0c6e7356e20e1afc77a87b9941a40e5095285d"}, + {file = "torch-2.0.1+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:ca88b499973c4c027e32c4960bf20911d7e984bd0c55cda181dc643559f3d93f"}, + {file = "torch-2.0.1+cpu-cp311-cp311-linux_x86_64.whl", hash = "sha256:274d4acf486ef50ce1066ffe9d500beabb32bde69db93e3b71d0892dd148956c"}, + {file = "torch-2.0.1+cpu-cp311-cp311-win_amd64.whl", hash = "sha256:e2603310bdff4b099c4c41ae132192fc0d6b00932ae2621d52d87218291864be"}, + {file = "torch-2.0.1+cpu-cp38-cp38-linux_x86_64.whl", hash = "sha256:8046f49deae5a3d219b9f6059a1f478ae321f232e660249355a8bf6dcaa810c1"}, + {file = "torch-2.0.1+cpu-cp38-cp38-win_amd64.whl", hash = "sha256:2ac4382ff090035f9045b18afe5763e2865dd35f2d661c02e51f658d95c8065a"}, + {file = "torch-2.0.1+cpu-cp39-cp39-linux_x86_64.whl", hash = "sha256:73482a223d577407c45685fde9d2a74ba42f0d8d9f6e1e95c08071dc55c47d7b"}, + {file = "torch-2.0.1+cpu-cp39-cp39-win_amd64.whl", hash = "sha256:f263f8e908288427ae81441fef540377f61e339a27632b1bbe33cf78292fdaea"}, ] [package.dependencies] @@ -666,38 +669,56 @@ typing-extensions = "*" [package.extras] opt-einsum = ["opt-einsum (>=3.3)"] +[package.source] +type = "legacy" +url = "https://download.pytorch.org/whl/cpu" +reference = "pytorch-cpu" + [[package]] name = "torchaudio" -version = "2.0.2" +version = "2.0.2+cpu" description = "An audio package for PyTorch" optional = false python-versions = "*" files = [ - {file = "torchaudio-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80c64dabb9d8c33bc6f2a8e0c7ebe17ea87f5028931c0d6a2f73b9e16b5272d0"}, - {file = "torchaudio-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1619673500fe08ae96b71952f03ecfc74e7d0843cd9882193d0642a82724f537"}, - {file = "torchaudio-2.0.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4dbc7dd84be522f6d1159d33f5cf2fe08656ac5e8402ac2aa07cb626fed4b700"}, - {file = "torchaudio-2.0.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:dadf237b4fd155a3d213bdfeffeded47f5a553d383817500438b44f24fa53851"}, - {file = "torchaudio-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2f975083ba66f0837ea4b55fb8d81f31a63a4a27f8628e54c1e16ebda1842931"}, - {file = "torchaudio-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a6cba80d9ab3a2ec1317cdc5cbc0654a189a26e3d8b28ef9f83336159fd5e5e9"}, - {file = "torchaudio-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15327132b28f34963baa6fe1813030a634d2581aa9ca120f730c1e8fabdc1102"}, - {file = "torchaudio-2.0.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b44ec89d4274856f58d55bce4f90e4294ee26ec3020dc39b3081d541d7fd6184"}, - {file = "torchaudio-2.0.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b22aceaa1ec5a3310cc15642d19dd00d53a7ce399b9096ad1dea0b24e5097af3"}, - {file = "torchaudio-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b37ed5ea4846ce334e7a4a93ca798683088b49e9a4943ed04f4fc4ba1ddc3c4"}, - {file = "torchaudio-2.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a8283dd61c579ea5b14d6773bbc0bf84573b12b37f05f02bb4b2425d77767284"}, - {file = "torchaudio-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c9be830608f93c906770eb7b4880962f8fef9bd5275ac5b48c850f3cc4bc32"}, - {file = "torchaudio-2.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:286061f9999905a6b96107c7ac751a4016985a8e2087250ab8328845e4128952"}, - {file = "torchaudio-2.0.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:ee08ec303050405998e74a0a3649aee4d16408c2eb4bb1f8c7a726318b1ce1b7"}, - {file = "torchaudio-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:ad43c0ffaa6771bfa05669bb1d8c3c179c859e92efd985683a78d6d313ecefb6"}, - {file = "torchaudio-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1138a1c39da2445a1caca20ddce1e77c9657e92263eb34376024f517f5284d4b"}, - {file = "torchaudio-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de5d94cb8305c00268dfbc576ca7e445f40891e024a9e5e28c63ad9f851e541a"}, - {file = "torchaudio-2.0.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6c617d5978a6b8a29d6675dcd18196e6dc541daac44b7b2b6eeb31aed82f3203"}, - {file = "torchaudio-2.0.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a74f33da0b3c53b770f583a02cabd59196f089fb77a65eb39cd5d811b5a21d63"}, - {file = "torchaudio-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:66df470da29964d7c1e8106a42f5f5c9bc09e824083675ce3aba054d68b4af54"}, + {file = "torchaudio-2.0.2+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:80eccef075f9e356f7a4ad27c57614c726906a30617424e2d86a08de179feeff"}, + {file = "torchaudio-2.0.2+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:514995f6418d193caf1a2e06c912f603959ec6d8557b48ef52a9ceb8e5d180d1"}, + {file = "torchaudio-2.0.2+cpu-cp311-cp311-linux_x86_64.whl", hash = "sha256:eb589737c30139c946b4c4f3a73307a6286c5402b6e5273dd5cb8768d0e73b3f"}, + {file = "torchaudio-2.0.2+cpu-cp311-cp311-win_amd64.whl", hash = "sha256:a93c8e9ff28959b2e8b9a1347b34c5f94f33f617bc96fe17011b0d8cb5fb377f"}, + {file = "torchaudio-2.0.2+cpu-cp38-cp38-linux_x86_64.whl", hash = "sha256:5196d7ed34863deaa28434ad02a043b01cfbd0c42b58d61ca56825577aeca74e"}, + {file = "torchaudio-2.0.2+cpu-cp38-cp38-win_amd64.whl", hash = "sha256:b29d34bb39b8e613c562d8c04bd4108be6ae20221ee043e29b81bbef95f3f5bf"}, + {file = "torchaudio-2.0.2+cpu-cp39-cp39-linux_x86_64.whl", hash = "sha256:30fc6926b51892cda933e98616fcdbc1f53084680210809cbc03c301d585ed9c"}, + {file = "torchaudio-2.0.2+cpu-cp39-cp39-win_amd64.whl", hash = "sha256:aa7425184157a0356fe08deb74a517f5a0e1f27722f362a5ac3089904bf17001"}, ] [package.dependencies] torch = "2.0.1" +[package.source] +type = "legacy" +url = "https://download.pytorch.org/whl/cpu" +reference = "pytorch-cpu" + +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typing-extensions" version = "4.7.1" @@ -816,4 +837,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c6893a8dbe2f65dbe93da4d0db72878f099cd03fdb6dc623f053b57a8e02e9ba" +content-hash = "e7b0344d7d2f66cddf80ac9fdbc63b839297b7443c043f960805738d54a79d43" diff --git a/pyproject.toml b/pyproject.toml index 928874a..9c09a99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,13 +11,10 @@ packages = [{include = "swr2_asr"}] python = "^3.10" numpy = "^1.25.2" click = "^8.1.6" - -[tool.poetry.group.cpu] -optional = true - -[tool.poetry.group.cpu.dependencies] -torch = "=2.0.1" -torchaudio = "=2.0.2" +audioloader = {git = "https://github.com/KinWaiCheuk/AudioLoader.git"} +tqdm = "^4.66.1" +torch = {version = "^2.0.1+cpu", source = "pytorch-cpu"} +torchaudio = {version = "^2.0.2+cpu", source = "pytorch-cpu"} [tool.poetry.group.dev.dependencies] black = "^23.7.0" @@ -26,18 +23,23 @@ mypy = "^1.5.1" pre-commit = "^3.3.3" [[tool.poetry.source]] -name = "pytorch" -url = "https://download.pytorch.org/whl/cu111/" -priority = "supplemental" +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +priority = "explicit" + [[tool.poetry.source]] -name = "PyPI" -priority = "primary" +name = "pytorch-gpu" +url = "https://download.pytorch.org/whl/cu118" +priority = "explicit" [[tool.mypy.overrides]] module = "torchaudio.*" ignore_missing_imports = true +[tool.poetry.scripts] +train = "swr2_asr.train:main" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" @@ -4,26 +4,9 @@ Automatic speech recognition model for the seminar spoken word recogniton 2 (SWR2) in the summer term 2023. # Installation - -## Installing poetry - - // installing poetry with the provided install script - curl -sSL https://install.python-poetry.org | python3 - - - // adding poetry to your path - // Linux - echo export PATH="$HOME/.local/bin:$PATH" >> ~/.bashrc && source ~/.bashrc - - // Mac - echo export PATH="$HOME/.local/bin:$PATH" >> ~/.zshrc && source ~/.zshrc - -## Running on cpu or m1 gpu - - poetry lock && poetry install --with cpu - -## Running on nvidia gpu - - poetry lock && poetry install --with gpu +``` +pip install -r requirements.txt +``` # Usage diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bf22df2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +AudioLoader==0.1.4 +filelock==3.12.2 +Jinja2==3.1.2 +MarkupSafe==2.1.3 +mpmath==1.3.0 +networkx==3.1 +numpy==1.25.2 +sympy==1.12 +torch==2.0.1 +torchaudio==2.0.2 +tqdm==4.66.1 +typing_extensions==4.7.1 diff --git a/swr2_asr/train.py b/swr2_asr/train.py index e69de29..cfdef6b 100644 --- a/swr2_asr/train.py +++ b/swr2_asr/train.py @@ -0,0 +1,16 @@ +# import dataset +# define model +# define loss +# define optimizer +# train +from AudioLoader.speech.mls import MultilingualLibriSpeech + + +def main(): + dataset = MultilingualLibriSpeech("./data", "mls_german_opus", "dev", download=True) + + print(dataset[1]) + + +if __name__ == "__main__": + main() |