mirror of https://github.com/google/gemma.cpp.git
Speed up builds by skipping rarely used targets
Centralize previous code into GEMMA_DISABLED_TARGETS PiperOrigin-RevId: 772433723
This commit is contained in:
parent
9a02d6be68
commit
cd80d8b24d
|
|
@ -367,6 +367,7 @@ cc_test(
|
|||
":test_util",
|
||||
":threading_context",
|
||||
"@googletest//:gtest_main", # buildcleaner: keep
|
||||
"//compression:types",
|
||||
"@highway//:hwy",
|
||||
"@highway//:hwy_test_util",
|
||||
"@highway//:nanobenchmark", #buildcleaner: keep
|
||||
|
|
@ -430,7 +431,7 @@ cc_test(
|
|||
],
|
||||
deps = [
|
||||
":basics",
|
||||
":ops",
|
||||
":matmul",
|
||||
":threading_context",
|
||||
"@googletest//:gtest_main", # buildcleaner: keep
|
||||
"//compression:compress",
|
||||
|
|
@ -500,10 +501,10 @@ cc_library(
|
|||
":matmul",
|
||||
":model_store",
|
||||
":ops",
|
||||
":tokenizer",
|
||||
":threading",
|
||||
":threading_context",
|
||||
":weights",
|
||||
"//compression:types",
|
||||
"//io:blob_store",
|
||||
"//io",
|
||||
"//paligemma:image",
|
||||
|
|
@ -521,6 +522,7 @@ cc_library(
|
|||
deps = [
|
||||
":gemma_lib",
|
||||
":ops",
|
||||
"//compression:types",
|
||||
"@highway//:hwy",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// SFP uses ConcatEven/Odd which are not supported; skip SVE for faster tests.
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR | HWY_SVE)
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "compression/compress.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// SFP uses ConcatEven/Odd which are not supported; skip SVE for faster tests.
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR | HWY_SVE)
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
|
@ -27,7 +27,6 @@
|
|||
#include <random>
|
||||
|
||||
#include "compression/distortion.h"
|
||||
#include "compression/types.h"
|
||||
#include "util/test_util.h"
|
||||
#include "hwy/aligned_allocator.h"
|
||||
#include "hwy/base.h"
|
||||
|
|
|
|||
|
|
@ -15,6 +15,11 @@
|
|||
|
||||
#include "compression/python/compression_clif_aux.h"
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// We use ConcatEven/Odd which are not supported. Use HWY_EMU128 instead.
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS HWY_SCALAR
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
|
@ -25,7 +25,6 @@
|
|||
#include <set>
|
||||
|
||||
#include "compression/distortion.h"
|
||||
#include "compression/types.h"
|
||||
#include "util/test_util.h"
|
||||
#include "hwy/aligned_allocator.h"
|
||||
#include "hwy/base.h"
|
||||
|
|
|
|||
|
|
@ -29,6 +29,30 @@
|
|||
|
||||
namespace gcpp {
|
||||
|
||||
// EMU128 must not be disabled because we disable SCALAR.
|
||||
#define HWY_BROKEN_EMU128 0
|
||||
|
||||
// Allow user override of disabled targets.
|
||||
#ifndef GEMMA_DISABLED_TARGETS
|
||||
|
||||
// All platforms: exclude SCALAR because we use ReorderWidenMulAccumulate.
|
||||
|
||||
#if HWY_ARCH_ARM_V7
|
||||
// No NEON because we require double-precision support.
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR | HWY_ALL_NEON)
|
||||
#elif HWY_ARCH_ARM_A64
|
||||
// We do not yet use AES (e.g. for random generation), hence NEON is the same
|
||||
// as NEON_WITHOUT_AES. Also skip SVE because SVE2_128 and SVE_256 cover most.
|
||||
#define GEMMA_DISABLED_TARGETS (HWY_SCALAR | HWY_NEON | HWY_SVE)
|
||||
#elif HWY_ARCH_X86
|
||||
// Skip anything older than Haswell (2013); also use Zen4 for recent CPUs,
|
||||
// because we do not use anything added by SPR (e.g. FP16) nor AVX 10.2.
|
||||
#define GEMMA_DISABLED_TARGETS \
|
||||
(HWY_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | HWY_AVX3_SPR | HWY_AVX10_2)
|
||||
#endif // HWY_ARCH_*
|
||||
|
||||
#endif // GEMMA_DISABLED_TARGETS
|
||||
|
||||
// Only used in experiments, hence disable in default builds.
|
||||
#ifndef GEMMA_ENABLE_NUQ
|
||||
#define GEMMA_ENABLE_NUQ 0
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
|
|||
|
|
@ -18,6 +18,11 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "gemma/activations.h"
|
||||
#include "gemma/gemma.h"
|
||||
#include "gemma/weights.h"
|
||||
|
|
|
|||
|
|
@ -18,6 +18,11 @@
|
|||
|
||||
#include "gemma/gemma.h"
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "gemma/activations.h"
|
||||
#include "gemma/gemma.h"
|
||||
#include "gemma/gemma_args.h"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,11 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "gemma/activations.h"
|
||||
#include "gemma/gemma.h"
|
||||
#include "gemma/gemma_args.h"
|
||||
|
|
|
|||
|
|
@ -16,24 +16,17 @@
|
|||
// Benchmark of large MatMul instances for which the MatMulSlow would be too
|
||||
// slow. This lacks a reference and is only useful for performance measurement.
|
||||
|
||||
#include "hwy/base.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
// Exclude HWY_SCALAR due to 2x bf16 -> f32, and Armv7 NEON because we require
|
||||
// double-precision support.
|
||||
#if HWY_ARCH_ARM_V7
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR | HWY_NEON)
|
||||
#else
|
||||
#define HWY_DISABLED_TARGETS HWY_SCALAR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "compression/types.h"
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "ops/matmul.h"
|
||||
#include "util/basics.h"
|
||||
#include "util/threading_context.h"
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
// Exclude HWY_SCALAR due to 2x bf16 -> f32.
|
||||
#define HWY_DISABLED_TARGETS HWY_SCALAR
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
|
@ -27,7 +27,6 @@
|
|||
#include <random>
|
||||
|
||||
#include "compression/compress.h"
|
||||
#include "compression/types.h"
|
||||
#include "util/allocator.h"
|
||||
#include "util/test_util.h"
|
||||
#include "util/threading_context.h"
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
// Exclude HWY_SCALAR due to 2x bf16 -> f32.
|
||||
#define HWY_DISABLED_TARGETS HWY_SCALAR
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
|
|||
|
|
@ -13,6 +13,13 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h" // GEMMA_ENABLE_NUQ
|
||||
#if GEMMA_ENABLE_NUQ
|
||||
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
@ -22,3 +29,5 @@
|
|||
#include "hwy/foreach_target.h" // IWYU pragma: keep
|
||||
#define GEMMA_MATMUL_TB NuqStream
|
||||
#include "ops/matmul_static-inl.h"
|
||||
|
||||
#endif // GEMMA_ENABLE_NUQ
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "compression/types.h" // GEMMA_DISABLED_TARGETS
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// Compiles this file for multiple architectures via "foreach_target.h", to
|
||||
// which we pass the filename via macro 'argument'.
|
||||
// clang-format off
|
||||
|
|
|
|||
|
|
@ -15,16 +15,11 @@
|
|||
|
||||
// End to end test of MatMul, comparing against a reference implementation.
|
||||
|
||||
#include "hwy/detect_compiler_arch.h" // IWYU pragma: keep
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
// Exclude HWY_SCALAR due to 2x bf16 -> f32, and Armv7 NEON because we require
|
||||
// double-precision support.
|
||||
#if HWY_ARCH_ARM_V7
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR | HWY_NEON)
|
||||
#else
|
||||
#define HWY_DISABLED_TARGETS (HWY_SCALAR)
|
||||
#endif // HWY_ARCH_ARM_V7
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
// matmul_static is not built as a test, hence does not define MatMulStatic for
|
||||
// worse-than-baseline targets (to speed up builds), so we skip them here, too.
|
||||
#ifndef HWY_SKIP_NON_BEST_BASELINE
|
||||
|
|
@ -34,7 +29,6 @@
|
|||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "compression/types.h"
|
||||
#include "ops/matmul.h"
|
||||
#include "util/basics.h"
|
||||
#include "util/mat.h"
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// OrderedDemote2To is not supported by HWY_SCALAR.
|
||||
#include "compression/types.h"
|
||||
#ifndef HWY_DISABLED_TARGETS
|
||||
#define HWY_DISABLED_TARGETS HWY_SCALAR
|
||||
#endif
|
||||
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
||||
#endif // HWY_DISABLED_TARGETS
|
||||
|
||||
#include "ops/ops.h"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue