From 76d5b6798044fde6b50915a905c00ad8844523f4 Mon Sep 17 00:00:00 2001
From: Salvatore Rossitto <srossitto79@gmail.com>
Date: Thu, 12 Mar 2026 11:58:14 +0100
Subject: [PATCH] added missing llama_opt_set_reward_weights

---
 include/llama.h     | 9 ++++++++-
 src/llama-context.h | 4 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/llama.h b/include/llama.h
index 0bd10294cb..0bf8ead384 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1556,6 +1556,12 @@ extern "C" {
 
     LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
 
+    // weights: array of floats, one per dataset window (indexed by idata), already normalized to [0,1].
+    // n_weights: length of the array.
+    // Pass NULL/0 to disable (equivalent to all-ones, i.e. standard SFT).
+    // The pointer must remain valid for the duration of all llama_opt_epoch calls.
+    LLAMA_API void llama_opt_set_reward_weights(const float * weights, int64_t n_weights);
+
     LLAMA_API void llama_opt_epoch(
             struct llama_context    * lctx,
             ggml_opt_dataset_t        dataset,
@@ -1563,7 +1569,8 @@ extern "C" {
             ggml_opt_result_t         result_eval,
             int64_t                   idata_split,
             ggml_opt_epoch_callback   callback_train,
-            ggml_opt_epoch_callback   callback_eval);
+            ggml_opt_epoch_callback   callback_eval,
+            bool                      shuffle);
 
 #ifdef __cplusplus
 }
diff --git a/src/llama-context.h b/src/llama-context.h
index e0d0085c1c..21d0ba6299 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -187,7 +187,8 @@ struct llama_context {
             ggml_opt_result_t       result_eval,
             int64_t                 idata_split,
             ggml_opt_epoch_callback callback_train,
-            ggml_opt_epoch_callback callback_eval);
+            ggml_opt_epoch_callback callback_eval,
+            bool                    shuffle);
 
     void opt_epoch_iter(
             ggml_opt_dataset_t               dataset,
@@ -195,6 +196,7 @@ struct llama_context {
             const std::vector<llama_token> & tokens,
             const std::vector<llama_token> & labels_sparse,
             llama_batch                    & batch,
+            float                            reward_scale,
             ggml_opt_epoch_callback          callback,
             bool                             train,
             int64_t                          idata_in_loop,