import os
import platform
#The API is not snake case which triggers linter errors
#pylint: disable=invalid-name
if platform.system().lower() == "windows":
dslib_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'lib')
# On Windows, we can't rely on RPATH being set to $ORIGIN/lib/ or on
# @loader_path/lib
if hasattr(os, 'add_dll_directory'):
# Starting with Python 3.8 this properly handles the problem
os.add_dll_directory(dslib_path)
else:
# Before Pythin 3.8 we need to change the PATH to include the proper
# directory for the dynamic linker
os.environ['PATH'] = dslib_path + ';' + os.environ['PATH']
import deepspeech
# rename for backwards compatibility
from deepspeech.impl import PrintVersions as printVersions
from deepspeech.impl import FreeStream as freeStream
[docs]class Model(object):
"""
Class holding a DeepSpeech model
:param aModelPath: Path to model file to load
:type aModelPath: str
:param aBeamWidth: Decoder beam width
:type aBeamWidth: int
"""
def __init__(self, *args, **kwargs):
# make sure the attribute is there if CreateModel fails
self._impl = None
status, impl = deepspeech.impl.CreateModel(*args, **kwargs)
if status != 0:
raise RuntimeError("CreateModel failed with error code {}".format(status))
self._impl = impl
def __del__(self):
if self._impl:
deepspeech.impl.FreeModel(self._impl)
self._impl = None
[docs] def sampleRate(self):
"""
Return the sample rate expected by the model.
:return: Sample rate.
:type: int
"""
return deepspeech.impl.GetModelSampleRate(self._impl)
[docs] def enableDecoderWithLM(self, *args, **kwargs):
"""
Enable decoding using beam scoring with a KenLM language model.
:param aLMPath: The path to the language model binary file.
:type aLMPath: str
:param aTriePath: The path to the trie file build from the same vocabulary as the language model binary.
:type aTriePath: str
:param aLMAlpha: The alpha hyperparameter of the CTC decoder. Language Model weight.
:type aLMAlpha: float
:param aLMBeta: The beta hyperparameter of the CTC decoder. Word insertion weight.
:type aLMBeta: float
:return: Zero on success, non-zero on failure (invalid arguments).
:type: int
"""
return deepspeech.impl.EnableDecoderWithLM(self._impl, *args, **kwargs)
[docs] def stt(self, *args, **kwargs):
"""
Use the DeepSpeech model to perform Speech-To-Text.
:param aBuffer: A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).
:type aBuffer: int array
:param aBufferSize: The number of samples in the audio signal.
:type aBufferSize: int
:return: The STT result.
:type: str
"""
return deepspeech.impl.SpeechToText(self._impl, *args, **kwargs)
[docs] def createStream(self):
"""
Create a new streaming inference state. The streaming state returned
by this function can then be passed to :func:`feedAudioContent()` and :func:`finishStream()`.
:return: Object holding the stream
:throws: RuntimeError on error
"""
status, ctx = deepspeech.impl.CreateStream(self._impl)
if status != 0:
raise RuntimeError("CreateStream failed with error code {}".format(status))
return ctx
# pylint: disable=no-self-use
[docs] def feedAudioContent(self, *args, **kwargs):
"""
Feed audio samples to an ongoing streaming inference.
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
:type aSctx: object
:param aBuffer: An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).
:type aBuffer: int array
:param aBufferSize: The number of samples in @p aBuffer.
:type aBufferSize: int
"""
deepspeech.impl.FeedAudioContent(*args, **kwargs)
# pylint: disable=no-self-use
# pylint: disable=no-self-use
[docs] def finishStream(self, *args, **kwargs):
"""
Signal the end of an audio signal to an ongoing streaming
inference, returns the STT result over the whole audio signal.
:param aSctx: A streaming state pointer returned by :func:`createStream()`.
:type aSctx: object
:return: The STT result.
:type: str
"""
return deepspeech.impl.FinishStream(*args, **kwargs)
# pylint: disable=no-self-use
# This is only for documentation purpose
# Metadata and MetadataItem should be in sync with native_client/deepspeech.h