gtf35
Committed by GitHub

Replace torchaudio with soundfile in python-api-examples (#765)

... ... @@ -65,7 +65,7 @@ from typing import Dict, List, Tuple
import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf
try:
import sounddevice as sd
... ... @@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate
def compute_speaker_embedding(
... ...
... ... @@ -60,7 +60,7 @@ from typing import Dict, List, Tuple
import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf
try:
import sounddevice as sd
... ... @@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate
def compute_speaker_embedding(
... ...
... ... @@ -52,7 +52,7 @@ from typing import Dict, List, Tuple
import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf
try:
import sounddevice as sd
... ... @@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate
def compute_speaker_embedding(
... ...