From cf347dfe350aa64f6f2d51df677379fce35284e4 Mon Sep 17 00:00:00 2001 From: pculliton Date: Fri, 17 May 2024 16:06:03 -0400 Subject: [PATCH] Adds Kaggle testing to CI workflow Using a restricted Kaggle account, this code: - Adds an Ubuntu 20.04 build (required for glibc compat with Kaggle infra) - Uploads the ubuntu-20.04 build and supporting library to a Kaggle dataset using a fork of `push-kaggle-dataset` - Creates a new version of a Kaggle notebook that loads artifacts from the Kaggle Model Hub, along with the newly updated dataset, and validates a 2b-it-sfp model. - Runs the notebook and throws an error if the process does not complete, raises an exception, or produces an invalid response. Todo: add tests / capabilities to the smoke tests used by the notebook. --- .github/workflows/build.yml | 42 ++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2635ec5..bb7783a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,10 +17,12 @@ jobs: fail-fast: false matrix: # When adding another, also add to copybara's github_check_runs. - os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] + os: ['ubuntu-latest', 'macos-latest', 'windows-latest', 'ubuntu-20.04'] build_type: ['Release'] preset: ['make', 'windows'] exclude: + - os: ubuntu-20.04 + preset: windows - os: ubuntu-latest preset: windows - os: macos-latest @@ -60,6 +62,44 @@ jobs: ${{ github.workspace }}/build/gemma ${{ github.workspace }}/build/libgemma.a + - if: matrix.os == 'ubuntu-20.04' + name: Upload build artifacts to Kaggle + uses: pculliton/push-kaggle-dataset@v1.0.0 + env: + KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} + KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} + with: + id: "phillipculliton/gemma-build-artifacts" + files: | + build/gemma + build/_deps/sentencepiece-build/src/libsentencepiece.so.0 + + - if: matrix.os == 'ubuntu-20.04' + name: Create code for new test notebook version + run: | + cat > runner.py << EOF + import subprocess + subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/gemma", "/kaggle/working"]) + subprocess.run(["chmod", "700", "/kaggle/working/gemma"]) + subprocess.run(["cp", "/kaggle/input/gemma-build-artifacts/_deps/sentencepiece-build/src/libsentencepiece.so.0", "/kaggle/working"]) + output = subprocess.run(["/kaggle/working/gemma", "--tokenizer", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/tokenizer.spm", "--compressed_weights", "/kaggle/input/gemma/gemmacpp/2b-it-sfp/4/2b-it-sfp.sbs", "--model", "2b-it", "--verbosity", "0", "--max_generated_tokens", "128"], stdout=subprocess.PIPE, input='Write an email to the moon.', encoding='ascii').stdout + assert("write an email to the moon." not in output.lower()); + assert("moon" in output.lower()); + EOF + + - if: matrix.os == 'ubuntu-20.04' + name: Run kaggle test notebook + uses: pculliton/kaggle-action@v1.0.28 + with: + username: ${{ secrets.KAGGLE_USERNAME }} + key: ${{ secrets.KAGGLE_KEY }} + title: GemmaCPP-CI-2 + code_file: runner.py + dataset_sources: "phillipculliton/gemma-build-artifacts" + model_sources: "google/gemma/gemmaCpp/2b-it-sfp/4" + enable_gpu: False + kernel_type: script + bazel: runs-on: ubuntu-latest steps: