Committed by
GitHub
Replace torchaudio with soundfile in python-api-examples (#765)
正在显示
3 个修改的文件
包含
27 行增加
和
9 行删除
| @@ -65,7 +65,7 @@ from typing import Dict, List, Tuple | @@ -65,7 +65,7 @@ from typing import Dict, List, Tuple | ||
| 65 | 65 | ||
| 66 | import numpy as np | 66 | import numpy as np |
| 67 | import sherpa_onnx | 67 | import sherpa_onnx |
| 68 | -import torchaudio | 68 | +import soundfile as sf |
| 69 | 69 | ||
| 70 | try: | 70 | try: |
| 71 | import sounddevice as sd | 71 | import sounddevice as sd |
| @@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | @@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | ||
| 357 | 357 | ||
| 358 | 358 | ||
| 359 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: | 359 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: |
| 360 | - samples, sample_rate = torchaudio.load(filename) | ||
| 361 | - return samples[0].contiguous().numpy(), sample_rate | 360 | + data, sample_rate = sf.read( |
| 361 | + filename, | ||
| 362 | + always_2d=True, | ||
| 363 | + dtype="float32", | ||
| 364 | + ) | ||
| 365 | + data = data[:, 0] # use only the first channel | ||
| 366 | + samples = np.ascontiguousarray(data) | ||
| 367 | + return samples, sample_rate | ||
| 362 | 368 | ||
| 363 | 369 | ||
| 364 | def compute_speaker_embedding( | 370 | def compute_speaker_embedding( |
| @@ -60,7 +60,7 @@ from typing import Dict, List, Tuple | @@ -60,7 +60,7 @@ from typing import Dict, List, Tuple | ||
| 60 | 60 | ||
| 61 | import numpy as np | 61 | import numpy as np |
| 62 | import sherpa_onnx | 62 | import sherpa_onnx |
| 63 | -import torchaudio | 63 | +import soundfile as sf |
| 64 | 64 | ||
| 65 | try: | 65 | try: |
| 66 | import sounddevice as sd | 66 | import sounddevice as sd |
| @@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | @@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | ||
| 160 | 160 | ||
| 161 | 161 | ||
| 162 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: | 162 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: |
| 163 | - samples, sample_rate = torchaudio.load(filename) | ||
| 164 | - return samples[0].contiguous().numpy(), sample_rate | 163 | + data, sample_rate = sf.read( |
| 164 | + filename, | ||
| 165 | + always_2d=True, | ||
| 166 | + dtype="float32", | ||
| 167 | + ) | ||
| 168 | + data = data[:, 0] # use only the first channel | ||
| 169 | + samples = np.ascontiguousarray(data) | ||
| 170 | + return samples, sample_rate | ||
| 165 | 171 | ||
| 166 | 172 | ||
| 167 | def compute_speaker_embedding( | 173 | def compute_speaker_embedding( |
| @@ -52,7 +52,7 @@ from typing import Dict, List, Tuple | @@ -52,7 +52,7 @@ from typing import Dict, List, Tuple | ||
| 52 | 52 | ||
| 53 | import numpy as np | 53 | import numpy as np |
| 54 | import sherpa_onnx | 54 | import sherpa_onnx |
| 55 | -import torchaudio | 55 | +import soundfile as sf |
| 56 | 56 | ||
| 57 | try: | 57 | try: |
| 58 | import sounddevice as sd | 58 | import sounddevice as sd |
| @@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | @@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]: | ||
| 145 | 145 | ||
| 146 | 146 | ||
| 147 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: | 147 | def load_audio(filename: str) -> Tuple[np.ndarray, int]: |
| 148 | - samples, sample_rate = torchaudio.load(filename) | ||
| 149 | - return samples[0].contiguous().numpy(), sample_rate | 148 | + data, sample_rate = sf.read( |
| 149 | + filename, | ||
| 150 | + always_2d=True, | ||
| 151 | + dtype="float32", | ||
| 152 | + ) | ||
| 153 | + data = data[:, 0] # use only the first channel | ||
| 154 | + samples = np.ascontiguousarray(data) | ||
| 155 | + return samples, sample_rate | ||
| 150 | 156 | ||
| 151 | 157 | ||
| 152 | def compute_speaker_embedding( | 158 | def compute_speaker_embedding( |
-
请 注册 或 登录 后发表评论