diff --git a/tools/completion/completion.cpp b/tools/completion/completion.cpp index eec93030e5..f368a2f4c6 100644 --- a/tools/completion/completion.cpp +++ b/tools/completion/completion.cpp @@ -311,7 +311,6 @@ int main(int argc, char ** argv) { } else { // otherwise use the prompt as is prompt = params.prompt; - prompt += "; ignore; the capital of France is:"; } if (params.interactive_first || !prompt.empty() || session_tokens.empty()) { diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 06eb386369..5f30a45ca8 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1,23 +1,24 @@ #include "server-context.h" +#include "server-common.h" +#include "server-http.h" +#include "server-task.h" +#include "server-queue.h" #include "common.h" #include "llama.h" #include "log.h" -#include "mtmd-helper.h" -#include "mtmd.h" #include "sampling.h" -#include "server-common.h" -#include "server-http.h" -#include "server-queue.h" -#include "server-task.h" #include "speculative.h" +#include "mtmd.h" +#include "mtmd-helper.h" + +#include #include #include -#include -#include #include +#include // fix problem with std::min and std::max #if defined(_WIN32) @@ -1127,7 +1128,7 @@ private: if (task.params.n_token_healing_enabled) { task.token_healing_params.healing_token = task.tokens.back(); - task.token_healing_params.healing_token_text = ltrim( common_token_to_piece(ctx, task.token_healing_params.healing_token)); + task.token_healing_params.healing_token_text = ltrim(common_token_to_piece(ctx, task.token_healing_params.healing_token)); task.tokens.pop_back(); SLT_DBG(slot, "Token healing enabled, removed last token: %d ('%s')\n",task.token_healing_params.healing_token, task.token_healing_params.healing_token_text.c_str()); } @@ -3007,8 +3008,6 @@ std::unique_ptr server_routes::handle_completions_impl( std::vector tasks; const auto & prompt = data.at("prompt"); - - SRV_INF("\n\nYOOO (UPDATED) this is the System Prompt: %s\n\n", prompt.get().c_str()); // TODO: this log can become very long, put it behind a flag or think about a more compact format //SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str());