name: Slow tests (on push) on: push: branches: [main] paths: # Run only when python files are modified - "trl/**.py" - "examples/**.py" env: RUN_SLOW: "yes" IS_GITHUB_CI: "1" SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} jobs: run_all_tests_single_gpu: strategy: fail-fast: false matrix: docker-image-name: [ "huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest", ] runs-on: group: aws-g4dn-2xlarge env: CUDA_VISIBLE_DEVICES: "0" TEST_TYPE: "single_gpu_${{ matrix.docker-image-name }}" container: image: ${{ matrix.docker-image-name }} options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true defaults: run: shell: bash steps: - uses: actions/checkout@v4 - name: Pip install run: | source activate trl pip install -e ".[test,vlm]" --no-deps pip install pytest-reportlog parameterized - name: Run slow SFT tests on single GPU if: always() run: | source activate trl make slow_tests - name: Generate Report if: always() run: | pip install slack_sdk tabulate python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY run_all_tests_multi_gpu: strategy: fail-fast: false matrix: docker-image-name: [ "huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest", ] runs-on: group: aws-g4dn-2xlarge env: CUDA_VISIBLE_DEVICES: "0,1" TEST_TYPE: "multi_gpu_${{ matrix.docker-image-name }}" container: image: ${{ matrix.docker-image-name }} options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true defaults: run: shell: bash steps: - uses: actions/checkout@v4 - name: Pip install run: | source activate trl pip install -e ".[test,vlm]" --no-deps pip install pytest-reportlog parameterized - name: Run slow SFT tests on Multi GPU if: always() run: | source activate trl make slow_tests - name: Run end-to-end examples tests on multi GPU if: always() run: | source activate trl pip install deepspeed make test_examples - name: Generate Reports if: always() run: | pip install slack_sdk tabulate python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY python scripts/log_example_reports.py --text_file_name temp_results_sft_tests.txt >> $GITHUB_STEP_SUMMARY python scripts/log_example_reports.py --text_file_name temp_results_dpo_tests.txt >> $GITHUB_STEP_SUMMARY rm *.txt