diff --git a/src/cpp/config/LlmConfig.hpp b/src/cpp/config/LlmConfig.hpp index 477d3faae774ff2b934c7d26b758da0029ea0fcb..66713abd356bf93ddc3e1e71c7253066813bdc07 100644 --- a/src/cpp/config/LlmConfig.hpp +++ b/src/cpp/config/LlmConfig.hpp @@ -8,6 +8,10 @@ #include +/** + * @class LlmConfig + * @brief Config class for the Large Language Model settings. + */ class LlmConfig { private: std::string m_modelTag{}; @@ -17,6 +21,14 @@ private: int m_batchSize{}; public: + /** + * LlmConfig + * @param modelTag Model tag for the LLM model + * @param modelPath Path to the model + * @param llmPrefix LLM prefix to use + * @param numThreads Number of threads to use + * @param batchSize Batch size to use + */ LlmConfig(const std::string& modelTag, const std::string& modelPath, const std::string& llmPrefix, @@ -32,14 +44,14 @@ public: std::string GetModelTag() const; /** - * Returns the path to the model file. - * @return modelPath + * Returns the path to the model file. + * @return modelPath */ std::string GetModelPath() const; /** - * Returns the LLM prompt prefix string. - * @return llmPrefix + * Returns the LLM prompt prefix string. + * @return llmPrefix */ std::string GetLlmPrefix() const; @@ -50,14 +62,14 @@ public: int GetNumThreads() const; /** - * Returns the batch size used for querying. - * @return batch size + * Returns the batch size used for querying. + * @return batch size */ int GetBatchSize() const; /** - * Sets the model tag (The name to appear in conversation with the LLM).. - * @param modelIdentifier is the tag name added at the end of each user question to make model + * Sets the model tag (The name to appear in conversation with the LLM). + * @param modelIdentifier is the tag name added at the end of each user question to make model * respond appropriately */ void SetModelTag(const std::string& modelIdentifier); @@ -76,15 +88,15 @@ public: void SetLlmPrefix(const std::string& llmInitialPrompt); /** - Sets the number of threads to use for LLM model inference - @param threads number of threads used inference of model - */ + * Sets the number of threads to use for LLM model inference. + * @param threads number of threads used inference of model + */ void SetNumThreads(int threads); /** - Sets the batch size for inference. Throws std::invalid_argument if the value is not positive. - @param batchSz chunk-size of each batch used to split query-encoding - */ + * Sets the batch size for inference. Throws std::invalid_argument if the value is not positive. + * @param batchSz chunk-size of each batch used to split query-encoding + */ void SetBatchSize(int batchSz); }; diff --git a/src/cpp/config/README.md b/src/cpp/config/intro.md similarity index 100% rename from src/cpp/config/README.md rename to src/cpp/config/intro.md diff --git a/src/cpp/frameworks/README.md b/src/cpp/frameworks/intro.md similarity index 99% rename from src/cpp/frameworks/README.md rename to src/cpp/frameworks/intro.md index d676725a2d07976d046eca9956d2c47f9a0b0c0e..54557b8ef44a513da86499736c688a96b2f384e6 100644 --- a/src/cpp/frameworks/README.md +++ b/src/cpp/frameworks/intro.md @@ -3,5 +3,6 @@ SPDX-License-Identifier: Apache-2.0 --> + Frameworks directory contains different backends we can choose from to provide implementation logic for our interface. diff --git a/src/cpp/interface/Llm.hpp b/src/cpp/interface/Llm.hpp index 03cd4f4c96c18b047946eb41ab0669c3cb491227..5355551d3f51f3a4056568ee1828a281533f9041 100644 --- a/src/cpp/interface/Llm.hpp +++ b/src/cpp/interface/Llm.hpp @@ -10,6 +10,10 @@ #include "LlmConfig.hpp" #include +/** + * @class LLM + * @brief Interface class for interacting with a Large Language Model. + */ class LLM { private: class LLMImpl; diff --git a/src/java/com/arm/Llm.java b/src/java/com/arm/Llm.java index 889f8794cef3c6bd5e65dab559b60f7376aab639..5c87432e04256189685320b97ff1202eae2a2e57 100644 --- a/src/java/com/arm/Llm.java +++ b/src/java/com/arm/Llm.java @@ -11,6 +11,9 @@ import java.util.concurrent.Flow; import java.util.concurrent.SubmissionPublisher; import java.util.concurrent.atomic.AtomicBoolean; +/** + * Llm class that extends the SubmissionPublisher + */ public class Llm extends SubmissionPublisher { static @@ -37,21 +40,21 @@ public class Llm extends SubmissionPublisher private int numThreads = 4; private int batchSize = 256; - // Native method declarations /** - Method to create LlmConfig cpp instance from params. - @param modelTag name used to refer the Model - @param modelPath path to load model from - @param llmPrefix Initial-prompt to load into llm before query - @param numThreads Number of threads for inference - @param batchSize batch size used to chunk queries - */ + * Method to create LlmConfig cpp instance from params. + * @param modelTag name used to refer the model + * @param modelPath path to load model from + * @param llmPrefix Initial-prompt to load into llm before query + * @param numThreads Number of threads for inference + * @param batchSize batch size used to chunk queries + * @return pointer to llm config + */ public native long createLlmConfig(String modelTag, String modelPath, String llmPrefix, int numThreads, int batchSize); /** * Method for loading LLM model - @param pathToModel file path for loading model - @return pointer to loaded model + * @param LlmConfig load model from LlmConfig + * @return pointer to loaded model */ public native long loadModel(long LlmConfig); @@ -73,10 +76,10 @@ public class Llm extends SubmissionPublisher */ public native float getDecodeRate(); - /** + /** * Private method for resetting conversation history - */ - public native void resetContext(); + */ + public native void resetContext(); /** * Method for resetting timing information @@ -90,16 +93,17 @@ public class Llm extends SubmissionPublisher private native void encode(String text); /** - * Method to get Next Token once encoding is done. - * This Method needs to be called in a loop while monitoring for Stop-Words. - * @return next Token as String - */ + * Method to get Next Token once encoding is done. + * This Method needs to be called in a loop while monitoring for Stop-Words. + * @return next Token as String + */ private native String getNextToken(); /** * Method to get chat Progress in percentage + * @return chat progess as int */ - public native int getChatProgress(); + public native int getChatProgress(); /** * Method to decode answers one by one, once prefill stage is completed @@ -108,6 +112,7 @@ public class Llm extends SubmissionPublisher * @param nEvalPrompts number of generated tokens for benchmarking * @param nMaxSeq sequence number * @param nRep number of repetitions + * @return string containing results of the benchModel */ public native String benchModel( int nPrompts, @@ -117,8 +122,8 @@ public class Llm extends SubmissionPublisher ); /** - *Method to separate Initialization from constructor - *@param llmConfig type configuration file to load model + * Method to separate Initialization from constructor + * @param llmConfig type configuration file to load model */ public void llmInit(LlmConfig llmConfig) { @@ -131,6 +136,11 @@ public class Llm extends SubmissionPublisher this.llmPrefix,this.numThreads,this.batchSize); this.llmPtr = loadModel(configPtr); } + + /** + * Method to set subscriber + * @param subscriber set from llama + */ public void setSubscriber(Flow.Subscriber subscriber) { System.out.println("subscribed set from llama"); @@ -141,7 +151,7 @@ public class Llm extends SubmissionPublisher * Method to get response of a query asynchronously * @param Query the prompt asked */ - public void sendAsync(String Query) + public void sendAsync(String Query) { String query = ""; AtomicBoolean stop = new AtomicBoolean(false); @@ -195,13 +205,13 @@ public class Llm extends SubmissionPublisher return response; } - /** - * Method to find any stop-Words or partial stop-Word present in current token - * @param str current token decoded - * @return boolean for detection of stop word - */ - private boolean inspectWord(String str) - { + /** + * Method to find any stop-Words or partial stop-Word present in current token + * @param str current token decoded + * @return boolean for detection of stop word + */ + private boolean inspectWord(String str) + { boolean stopWordTriggered = false; String evaluationString = this.cachedToken + str; // if stopWord is in evaluationString break loop. @@ -237,24 +247,26 @@ public class Llm extends SubmissionPublisher } this.cachedToken = emitToken.isEmpty() ? evaluationString : ""; return stopWordTriggered; - } + } /** - * Sets the LLM prefix used for query processing. - * @param llmPrefix initial prompt for llm - */ + * Sets the LLM prefix used for query processing. + * @param llmPrefix initial prompt for llm + */ public void setLlmPrefix(String llmPrefix) { this.llmPrefix = llmPrefix; } /** - * Sets the LLM ModelTag - */ + * Sets the LLM ModelTag + * @param newTag tag to set for the model + */ public void setLlmModelTag(String newTag) { this.modelTag = newTag; } + /** * Method to free model from memory */ diff --git a/src/java/com/arm/LlmConfig.java b/src/java/com/arm/LlmConfig.java index f70c11eec3656d629791fd351360f99b53e24659..169b350d847d2245215e66f87763237647f7ccbc 100644 --- a/src/java/com/arm/LlmConfig.java +++ b/src/java/com/arm/LlmConfig.java @@ -8,6 +8,9 @@ package com.arm; import java.util.List; +/** + * LlmConfig class for adding the the settings for the Large Language Model. + */ public class LlmConfig { private String modelTag; @@ -16,23 +19,59 @@ public class LlmConfig private String llmPrefix; private List stopWords; private int numThreads; - // minimal constructor without userTag and numThreads + + /** + * Minimal constructor without userTag and numThreads + * + * @param modelTag tag for the model + * @param stopWords stop words to use + * @param modelPath path to the model + * @param llmPrefix llm prefix to use + */ public LlmConfig(String modelTag, List stopWords, String modelPath, String llmPrefix) { this(modelTag, stopWords, modelPath, llmPrefix, "", 4); } - // minimal constructor without numThreads + + /** + * Minimal constructor without numThreads + * + * @param modelTag tag for the model + * @param stopWords stop words to use + * @param modelPath path to the model + * @param llmPrefix llm prefix to use + * @param userTag user tag to use + */ public LlmConfig(String modelTag, List stopWords, String modelPath, String llmPrefix, String userTag) { // Use 4 threads by default this(modelTag, stopWords, modelPath, llmPrefix, userTag, 4); } - // minimal constructor without userTag + + /** + * Minimal constructor without userTag + * + * @param modelTag tag for the model + * @param stopWords stop words to use + * @param modelPath path to the model + * @param llmPrefix llm prefix to use + * @param numThreads number of threads to use + */ public LlmConfig(String modelTag, List stopWords, String modelPath, String llmPrefix,int numThreads) { this(modelTag, stopWords, modelPath, llmPrefix, "", numThreads); } - // main constructor + + /** + * Main constructor + * + * @param modelTag tag for the model + * @param stopWords stop words to use + * @param modelPath path to the model + * @param llmPrefix llm prefix to use + * @param userTag user tag to use + * @param numThreads number of threads to use + */ public LlmConfig(String modelTag, List stopWords, String modelPath, String llmPrefix, String userTag, int numThreads) { @@ -62,6 +101,7 @@ public class LlmConfig { return this.userTag; } + /** * Gets the list of stop words. * @@ -93,9 +133,9 @@ public class LlmConfig } /** - * Gets the number of Threads used - * @return The number of Threads LLM uses. - */ + * Gets the number of Threads used + * @return The number of Threads LLM uses. + */ public int getNumThreads() { return this.numThreads; @@ -155,6 +195,7 @@ public class LlmConfig * Sets the number of Threads. * @param numThreads count of threads to use for LLM. */ + public void setNumThreads(int numThreads) { this.numThreads = numThreads;