diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 7c785819693e..211f21e3be0a 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=29c8fbe4e05fd23c44950d0958299e25fbeabc5c +LLAMA_VERSION?=df36bce667bf14f8e538645547754386f9516326 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 872c9edb71c0..b0a27f5feee3 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -53,9 +53,9 @@ static void start_llama_server(server_context& ctx_server) { LOG_INF("%s: model loaded\n", __func__); // print sample chat example to make it clear which template is used - LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, - common_chat_templates_source(ctx_server.chat_templates.get()), - common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str()); + // LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, + // common_chat_templates_source(ctx_server.chat_templates.get()), + // common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str(), ctx_server.params_base.default_template_kwargs); // Reset the chat templates // TODO: We should make this configurable by respecting the option that is already present in LocalAI for vLLM