// Copyright 2024 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_ #define THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_ #include #include #include #include #include "gtest/gtest.h" #include "compression/weights_raw.h" namespace gcpp { template void Complexify(const std::array& x, std::array, kLen>& c_x) { for (size_t i = 0; i < kLen; ++i) { c_x[i] = std::complex(x[i], 0.0); } } template void Complexify(const Layer& w, Layer, TConfig>& c_w) { Complexify(w.pre_attention_norm_scale, c_w.pre_attention_norm_scale); Complexify(w.attn_vec_einsum_w, c_w.attn_vec_einsum_w); Complexify(w.qkv_einsum_w, c_w.qkv_einsum_w); Complexify(w.pre_ffw_norm_scale, c_w.pre_ffw_norm_scale); Complexify(w.gating_einsum_w, c_w.gating_einsum_w); Complexify(w.linear_w, c_w.linear_w); } template void Complexify(const Weights& w, Weights, TConfig>& c_w) { static constexpr size_t kLayers = TConfig::kLayers; Complexify(w.embedder_input_embedding, c_w.embedder_input_embedding); Complexify(w.final_norm_scale, c_w.final_norm_scale); for (size_t i = 0; i < kLayers; ++i) { Complexify(*w.GetLayer(i), *c_w.GetLayer(i)); } } template void TestNear(const std::array& actual, const std::array& expected, double max_abs_err, double max_rel_err, int line) { double sum0 = 0; double sum1 = 0; double sum01 = 0; for (size_t i = 0; i < N; ++i) { sum0 += actual[i] * actual[i]; sum1 += expected[i] * expected[i]; sum01 += actual[i] * expected[i]; ASSERT_NEAR(actual[i], expected[i], std::max(max_abs_err, std::abs(expected[i]) * max_rel_err)) << "line: " << line << " dim=" << N << " i=" << i; } if (sum0 > 1e-40) { double norm_dot = sum01 / std::sqrt(sum0) / std::sqrt(sum1); ASSERT_NEAR(norm_dot, 1.0, 1e-7) << "line: " << line << " sum0: " << sum0 << " sum1: " << sum1 << " sum01: " << sum01; } } // Compute gradient with the finite difference method in the complex plane. // If f : R->R is the tested function and F : C->C is its extension on the // complex plane so that F is complex differentiable in x, then // // F(x + ih) = F(x) + ih F'(x) + O(h^2) F''(x) // // which means that // // F'(x) ~= Imag(F(x + ih)) / h // // This method is more numerically stable than the real-valued finite difference // method since we don't need to subtract floating point numbers that are near // to each other. template void TestGradient(const std::array& grad, std::array, N>& x, FUNC func, U step, T max_abs_err, T max_rel_err, int line) { std::array exp_grad; const U inv_step = 1.0 / step; for (size_t i = 0; i < N; ++i) { const U x0 = std::real(x[i]); const std::complex x1 = std::complex(x0, step); x[i] = x1; const std::complex f1 = func(); exp_grad [i] = std::imag(f1) * inv_step; x[i] = x0; } TestNear(grad, exp_grad, max_abs_err, max_rel_err, line); } template void TestGradient(const std::array& grad, std::array, N>& x, FUNC func, float max_abs_err, float max_rel_error, int line) { TestGradient(grad, x, func, 1e-30f, max_abs_err, max_rel_error, line); } template void TestGradient(const std::array& grad, std::array, N>& x, FUNC func, float max_abs_err, float max_rel_error, int line) { TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line); } template void TestGradient(const std::array& grad, std::array, N>& x, FUNC func, double max_abs_err, double max_rel_error, int line) { TestGradient(grad, x, func, 1e-50, max_abs_err, max_rel_error, line); } template void TestGradient(const Layer& grad, Layer, TConfig>& c_weights, FUNC func, T max_err) { TestGradient(grad.pre_attention_norm_scale, c_weights.pre_attention_norm_scale, func, max_err, max_err, __LINE__); TestGradient(grad.attn_vec_einsum_w, c_weights.attn_vec_einsum_w, func, max_err, max_err, __LINE__); TestGradient(grad.qkv_einsum_w, c_weights.qkv_einsum_w, func, max_err, max_err, __LINE__); TestGradient(grad.pre_ffw_norm_scale, c_weights.pre_ffw_norm_scale, func, max_err, max_err, __LINE__); TestGradient(grad.gating_einsum_w, c_weights.gating_einsum_w, func, max_err, max_err, __LINE__); TestGradient(grad.linear_w, c_weights.linear_w, func, max_err, max_err, __LINE__); } template void TestGradient(const Weights& grad, Weights, TConfig>& c_weights, FUNC func, T max_err) { TestGradient(grad.embedder_input_embedding, c_weights.embedder_input_embedding, func, 2 * max_err, max_err, __LINE__); TestGradient(grad.final_norm_scale, c_weights.final_norm_scale, func, max_err, max_err, __LINE__); for (int i = 0; i < TConfig::kLayers; ++i) { TestGradient(*grad.GetLayer(i), *c_weights.GetLayer(i), func, max_err); } } } // namespace gcpp #endif // THIRD_PARTY_GEMMA_CPP_GEMMA_TEST_UTIL_H_