diff --git a/src/cpp/include/STT.hpp b/src/cpp/include/STT.hpp index f3dcd9d401f1ac3ca0d6a33a083fbb973b720091..ad5b311c7cf8bad8b0a2a8112fbd6afbca8ebdb5 100644 --- a/src/cpp/include/STT.hpp +++ b/src/cpp/include/STT.hpp @@ -32,12 +32,12 @@ public: * @param noContext whether to disable reusing context between segments * @param singleSegment whether to transcribe the entire audio in a single segment */ - void InitParams(const bool printRealtime, const bool printProgress, const bool timeStamps, + void InitParams(const bool printRealTime, const bool printProgress, const bool timeStamps, const bool printSpecial, const bool translate, const char *language, const int numThreads, const int offsetMs, const bool noContext, const bool singleSegment) { - stt.InitParams(printRealtime, printProgress, timeStamps, printSpecial, translate, + stt.InitParams(printRealTime, printProgress, timeStamps, printSpecial, translate, language, numThreads, offsetMs, noContext, singleSegment); } @@ -70,6 +70,7 @@ public: * @param contextPtr stt context pointer * @param audioData audio data to transcribe * @param audioDataLength length of the Audio data supplied + * @return String containing the transcribed text. */ template std::string FullTranscribe(P* contextPtr, float* audioData, int audioDataLength) diff --git a/src/cpp/whisper_cpp/include/WhisperImpl.hpp b/src/cpp/whisper_cpp/include/WhisperImpl.hpp index 1c97004cf5aa7e60b082ea985e9837f9125f07d2..b4f06eea07c1238bb50ab24678fc9e2d15afcdc9 100644 --- a/src/cpp/whisper_cpp/include/WhisperImpl.hpp +++ b/src/cpp/whisper_cpp/include/WhisperImpl.hpp @@ -52,7 +52,7 @@ public: * Initializes the Whisper parameters with the specified settings. * @param printRealTime whether to print partial decoding results in real-time * @param printProgress whether to print progress information - * @param timeStamps whether to include timestamps in the transcription + * @param printTimestamps whether to include timestamps in the transcription * @param printSpecial whether to include special tokens (e.g., markers) in the output * @param translate whether to translate the transcription to English * @param language the language code for transcription (e.g., "en", "fr", etc.) @@ -61,14 +61,14 @@ public: * @param noContext whether to disable reusing context between segments * @param singleSegment whether to transcribe the entire audio in a single segment */ - void InitParams(const bool printRealtime, const bool printProgress, const bool printTimestamps, + void InitParams(const bool printRealTime, const bool printProgress, const bool printTimestamps, const bool printSpecial, const bool translate, const char *language, const int numThreads, const int offsetMs, const bool noContext, const bool singleSegment) { this->strLang = std::string(language); this->whisperParams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); - this->whisperParams.print_realtime = printRealtime; + this->whisperParams.print_realtime = printRealTime; this->whisperParams.print_progress = printProgress; this->whisperParams.print_timestamps = printTimestamps; this->whisperParams.print_special = printSpecial; @@ -108,6 +108,7 @@ public: * @param contextPtr whisper_context pointer * @param audioDataPtr pointer to audio data to transcribe * @param audioDataLength length of the audio data array + * @return String containing the transcribed text */ std::string FullTranscribe(whisper_context* contextPtr, const float* audioDataPtr, const int audioDataLength) diff --git a/src/java/com/arm/stt/WhisperConfig.java b/src/java/com/arm/stt/WhisperConfig.java index a038f4cea3056a352b4b0a5ecde474a689394d1b..74bd20a9b5b8ea7a33132062cbbd4b99f9a264bd 100644 --- a/src/java/com/arm/stt/WhisperConfig.java +++ b/src/java/com/arm/stt/WhisperConfig.java @@ -6,6 +6,10 @@ package com.arm.stt; +/** + * @class WhisperConfig + * @brief Config for setting options for Whisper + */ public class WhisperConfig { private boolean printRealTime; @@ -19,6 +23,19 @@ public class WhisperConfig { private boolean noContext; private boolean singleSegment; + /** + * Initializes the Whisper config with the specified settings. + * @param printRealTime whether to print partial decoding results in real-time + * @param printProgress whether to print progress information + * @param timeStamps whether to include timestamps in the transcription + * @param printSpecial whether to include special tokens (e.g., markers) in the output + * @param translate whether to translate the transcription to English + * @param language the language code for transcription (e.g., "en", "fr", etc.) + * @param numThreads the number of CPU threads to use for transcription + * @param offsetMs an initial time offset (in milliseconds) for the transcription + * @param noContext whether to disable reusing context between segments + * @param singleSegment whether to transcribe the entire audio in a single segment + */ public WhisperConfig(boolean printRealTime, boolean printProgress, boolean timeStamps, boolean printSpecial, boolean translate, String language, int numThreads, int offsetMs, boolean noContext, boolean singleSegment)