mudler · mudler · Aug 15, 2025 · Aug 14, 2025
diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-LLAMA_VERSION?=29c8fbe4e05fd23c44950d0958299e25fbeabc5c
+LLAMA_VERSION?=df36bce667bf14f8e538645547754386f9516326
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 
 CMAKE_ARGS?=

diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -53,9 +53,9 @@ static void start_llama_server(server_context& ctx_server) {
     LOG_INF("%s: model loaded\n", __func__);
 
     // print sample chat example to make it clear which template is used
-    LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
-        common_chat_templates_source(ctx_server.chat_templates.get()),
-        common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str());
+    // LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
+    //     common_chat_templates_source(ctx_server.chat_templates.get()),
+    //     common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str(), ctx_server.params_base.default_template_kwargs);
 
     // Reset the chat templates
     // TODO: We should make this configurable by respecting the option that is already present in LocalAI for vLLM