diff --git a/common/arg.cpp b/common/arg.cpp
index 4210633398..eac7454768 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1512,7 +1512,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target-range"}, "N",
-        string_format("Power Law sampler adaptive range +/- from target (default: %.2f, 0.0 = no adaptation)", (double)params.sampling.power_law_target_range),
+        string_format("Power Law sampler adaptive target range (target±range) (default: %.2f, 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
         [](common_params & params, const std::string & value) {
             params.sampling.power_law_target_range = std::stof(value);
         }
diff --git a/include/llama.h b/include/llama.h
index 1aace655d0..95df1058cc 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1289,12 +1289,19 @@ extern "C" {
                           const char ** seq_breakers,
                               size_t    num_breakers);
 
-    /// @details power law sampler, reshapes probability distribution to target specific probability ranges
-    /// ref: https://github.com/MrJackSpade/llama.cpp
-    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
+    /// @details power-law sampler - reshapes probability distribution to target specific probability ranges
+    ///
+    /// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+    /// rather than just transforming logits. therefore it must always be the last sampler in the
+    /// sampler chain.
+    ///
+    /// it is recommended to only perform minimal truncation before this sampler.
+    ///
+    /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
                                float    target,       // target probability (0.0 to 1.0)
-                               float    target_range, // adaptive target range (+/- range from target)
+                               float    target_range, // adaptive target range (target±range)
                              int32_t    window_size,  // rolling history window size for target adaptation
                             uint32_t    seed);        // RNG seed
 
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index e2c229cd9f..0b591d60a8 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2314,8 +2314,15 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 }
 
 // power-law
-// ref: https://github.com/MrJackSpade/llama.cpp/tree/master
-// ref: https://github.com/ggml-org/llama.cpp/pull/17927
+//
+// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+// rather than just transforming logits. therefore it must always be the last sampler in the
+// sampler chain.
+//
+// it is recommended to only perform minimal truncation before this sampler.
+//
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
 
 struct llama_sampler_power_law {
     const float    target;