.github/workflows/test_reblockGVCF.yml

name: Test ReblockGVCF

# Controls when the workflow will run
on:
  #run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING
  # push:
  #   branches:
  #     - kp_GHA_Terra_auth_PD-2682
  pull_request:
    branches: [ "develop", "staging", "master" ]
    # Only run if files in these paths changed:
    ####################################
    # SET PIPELINE SPECIFIC PATHS HERE #
    ####################################
    paths:
      - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/**'
      - 'tasks/broad/GermlineVariantDiscovery.wdl'
      - 'tasks/broad/Qc.wdl'
      - 'tasks/broad/Utilities.wdl'
      - 'verification/VerifyGvcf.wdl'
      - 'verification/VerifyTasks.wdl'
      - 'verification/test-wdls/TestReblockGVCF.wdl'
      - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
      - '.github/workflows/test_reblockGVCF.yml'


  # Allows you to run this workflow manually from the Actions tab
  workflow_dispatch:
    inputs:
      useCallCache:
        description: 'Use call cache (default: true)'
        required: false
        default: "true"
      updateTruth:
        description: 'Update truth files (default: false)'
        required: false
        default: "false"
      testType:
        description: 'Specify the type of test (Plumbing or Scientific)'
        required: true
      truthBranch:
        description: 'Specify the branch for truth files (default: master)'
        required: false
        default: "master"


env:
  # pipeline configuration
  PROJECT_NAME: WARP
  PIPELINE_NAME: TestReblockGVCF
  DOCKSTORE_PIPELINE_NAME: ReblockGVCF
  PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping/reblocking"

  # workspace configuration
  TESTING_WORKSPACE: WARP Tests
  WORKSPACE_NAMESPACE: warp-pipelines

  # github repo configuration
  REPOSITORY_NAME: ${{ github.event.repository.name }}

  # service account configuration
  SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }}
  USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com


jobs:
  run_pipeline:
    runs-on: ubuntu-latest
    # Add "id-token" with the intended permissions.
    permissions:
      contents: 'read'
      id-token: 'write'

    steps:
      # actions/checkout MUST come before auth action
      - uses: actions/checkout@v3
        with:
          ref: ${{ github.ref }}

      - name: Set up python
        id: setup-python
        uses: actions/setup-python@v4
        with:
          python-version: '3.11'

      - name: Install dependencies
        run: |
          cd scripts/firecloud_api/
          pip install -r requirements.txt

      - name: Set Branch Name
        id: set_branch
        run: |
          if [ -z "${{ github.head_ref }}" ]; then
            echo "Branch name is missing, using ${GITHUB_REF##*/}"
            echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV
          else
            echo "Branch name from PR: ${{ github.head_ref }}"
            echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV
          fi

      - name: Determine Github Commit Hash
        id: determine_github_commit_hash
        run: |
          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
            echo "Using github.sha for manually triggered workflow."
            echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV
          elif [ "${{ github.event_name }}" == "pull_request" ]; then
            echo "Using github.event.pull_request.head.sha for PR-triggered workflow."
            echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
          else
            echo "Unsupported event type: ${{ github.event_name }}"
            exit 1
          fi

      - name: Fetch Dockstore Workflow Commit Hash
        run: |
          # Wait 5.5 minutes for Dockstore to update
          sleep 330
          
          DOCKSTORE_COMMIT_HASH_FROM_FETCH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \
            $DOCKSTORE_TOKEN \
            $DOCKSTORE_PIPELINE_NAME \
            $BRANCH_NAME)
          
          # Export the commit hash as an environment variable
          echo "DOCKSTORE_COMMIT_HASH=$DOCKSTORE_COMMIT_HASH_FROM_FETCH" >> $GITHUB_ENV
          echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH_FROM_FETCH"
        env:
          ## TODO NEED TO ADD DOCKSTORE_TOKEN FOR SERVICE ACCOUNT ##
          DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }}
          DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }}
          BRANCH_NAME: ${{ env.BRANCH_NAME }}

      - name: Compare Dockstore and Commit Hashes
        id: compare_hashes
        run: |
          echo "Comparing hashes..."
          echo "Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH"
          echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH"

          if [ "$DOCKSTORE_COMMIT_HASH" != "$GITHUB_COMMIT_HASH" ]; then
            echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash!"
            echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH"
            exit 1
          else
            echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash."
          fi
        env:
          DOCKSTORE_COMMIT_HASH: ${{ env.DOCKSTORE_COMMIT_HASH }}
          GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }}

      - name: Set Test Type
        id: set_test_type
        run: |
          if [ "${{ github.event_name }}" == "pull_request" ]; then
            # For PRs, set based on target branch
            if [ "${{ github.base_ref }}" == "master" ]; then
              echo "testType=Scientific" >> $GITHUB_ENV
              echo "testType=Scientific"
            else
              echo "testType=Plumbing" >> $GITHUB_ENV
              echo "testType=Plumbing"
            fi
          else
            # For workflow_dispatch, use provided test type
            echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV
            echo "testType=${{ github.event.inputs.testType }}"
          fi


      - name: Create new method configuration
        run: |
          echo "Creating new method configuration for branch: $BRANCH_NAME"

          METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \
            create_new_method_config \
            --workspace-namespace $WORKSPACE_NAMESPACE \
            --workspace-name "$TESTING_WORKSPACE" \
            --pipeline_name "$PIPELINE_NAME" \
            --branch_name "$BRANCH_NAME" \
            --sa-json-b64 "$SA_JSON_B64" \
            --user "$USER")

            echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV
        env:
          PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
          TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
          WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
          USER: ${{ env.USER }}

      - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs
        run: |
          UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}"
          USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}"
          TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}"
          CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
          MAX_RETRIES=2
          RETRY_DELAY=300  # 300 seconds = 5 minutes
          # Initialize variables to aggregate statuses and outputs
          ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---"
          ALL_OUTPUTS=""
          # Initialize arrays to track submission and workflow statuses
          declare -a SUBMISSION_IDS
          declare -A WORKFLOW_STATUSES
          
          
          # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
          if [ "$UPDATE_TRUTH" = "true" ]; then
              UPDATE_TRUTH_BOOL=true
          else
              UPDATE_TRUTH_BOOL=false
          fi
          
          if [ "$USE_CALL_CACHE" == "true" ]; then
              USE_CALL_CACHE_BOOL=true
          else
              USE_CALL_CACHE_BOOL=false
          fi
          
          TEST_TYPE="${{ env.testType }}"
          INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
          echo "Running tests with test type: $TEST_TYPE"
          
          TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
          echo "Truth path: $TRUTH_PATH"
          RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME"
          
          # Create the submission_data.json file which will be the same for all inputs
          SUBMISSION_DATA_FILE="submission_data.json"
          
          # Use a heredoc to generate the JSON file content dynamically
          cat <<EOF > "$SUBMISSION_DATA_FILE"
          {
            "methodConfigurationNamespace": "$WORKSPACE_NAMESPACE",
            "methodConfigurationName": "$METHOD_CONFIG_NAME",
            "useCallCache": $USE_CALL_CACHE_BOOL,
            "deleteIntermediateOutputFiles": false,
            "useReferenceDisks": true,
            "memoryRetryMultiplier": 1.2,
            "workflowFailureMode": "NoNewCalls",
            "userComment": "Automated submission",
            "ignoreEmptyOutputs": false
          }
          EOF
          
          echo "Created submission data file: $SUBMISSION_DATA_FILE"
          
          # 1. Submit all jobs first and store their submission IDs
          for input_file in "$INPUTS_DIR"/*.json; do
            test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
                --results_path "$RESULTS_PATH" \
                --inputs_json "$input_file" \
                --update_truth "$UPDATE_TRUTH_BOOL" \
                --branch_name "$BRANCH_NAME" )
            echo "Uploading the test input file: $test_input_file"
            python3 scripts/firecloud_api/firecloud_api.py \
                upload_test_inputs \
                --workspace-namespace $WORKSPACE_NAMESPACE \
                --workspace-name "$TESTING_WORKSPACE" \
                --pipeline_name "$PIPELINE_NAME" \
                --test_input_file "$test_input_file" \
                --branch_name "$BRANCH_NAME" \
                --sa-json-b64 "$SA_JSON_B64" \
                --user "$USER"
          
            attempt=1
            while [ $attempt -le $MAX_RETRIES ]; do
              SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \
               --workspace-namespace "$WORKSPACE_NAMESPACE" \
               --workspace-name "$TESTING_WORKSPACE" \
               --sa-json-b64 "$SA_JSON_B64" \
               --user "$USER" \
               --submission_data_file "$SUBMISSION_DATA_FILE")
          
              echo "Submission ID: $SUBMISSION_ID"
          
              if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then
                  echo "Error in submission, retrying in $RETRY_DELAY seconds..."
                  ((attempt++))
                  if [ $attempt -gt $MAX_RETRIES ]; then
                      echo "Max retries reached. Exiting..."
                      exit 1
                  fi
                  sleep $RETRY_DELAY
                  continue
              fi
          
              echo "Submission successful. Submission ID: $SUBMISSION_ID"
              SUBMISSION_IDS+=("$SUBMISSION_ID")
              break
            done
          done
          
          echo "All jobs have been submitted. Starting to poll for statuses..."
          
          # 2. After all submissions are done, start polling for statuses of all jobs
          for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
            attempt=1
            while [ $attempt -le $MAX_RETRIES ]; do
              echo "Polling for Submission ID: $SUBMISSION_ID"
              RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \
                  --submission_id "$SUBMISSION_ID" \
                  --sa-json-b64 "$SA_JSON_B64" \
                  --user "$USER" \
                  --workspace-namespace "$WORKSPACE_NAMESPACE" \
                  --workspace-name "$TESTING_WORKSPACE")
          
              if [ -z "$RESPONSE" ]; then
                  echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
                  ((attempt++))
                  if [ $attempt -gt $MAX_RETRIES ]; then
                      echo "Max retries reached. Exiting..."
                      exit 1
                  fi
                  sleep $RETRY_DELAY
                  continue
              fi
          
              WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]')
              WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION"
          
              # retrieve workflow outputs
              echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
              for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do
                   WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \
                    --user "$USER" \
                    --sa-json-b64 "$SA_JSON_B64" \
                    --submission_id "$SUBMISSION_ID" \
                    --workspace-namespace $WORKSPACE_NAMESPACE \
                    --workspace-name "$TESTING_WORKSPACE" \
                    --workflow_id "$WORKFLOW_ID" \
                    --pipeline_name "$PIPELINE_NAME")
                    ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
                done
              break
            done
            # Generate final summary tables with hyperlinks for Submission IDs
            echo "## Combined Workflow Statuses" >> $GITHUB_STEP_SUMMARY
            for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
              # Generate the Terra URL for the submission
              SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID"
          
              # Add the Submission ID as a hyperlink
              echo "[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)" >> $GITHUB_STEP_SUMMARY
          
              # Add the workflows and statuses for this submission
              echo "${WORKFLOW_STATUSES[$SUBMISSION_ID]}" >> $GITHUB_STEP_SUMMARY
          
              # Add a blank line for separation
              echo "" >> $GITHUB_STEP_SUMMARY
            done
          done
        env:
          PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
          TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
          METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }}
          WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
          USER: ${{ env.USER }}
          DOCKSTORE_PIPELINE_NAME: ${{ env.DOCKSTORE_PIPELINE_NAME }}
          PIPELINE_DIR: ${{ env.PIPELINE_DIR }}

      - name: Delete Method Configuration
        if: always() # Ensures it runs regardless of success or failure
        run: |
          echo "Deleting method configuration for branch: $BRANCH_NAME"
          DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \
            --workspace-namespace $WORKSPACE_NAMESPACE \
            --workspace-name "$TESTING_WORKSPACE" \
            --pipeline_name "$PIPELINE_NAME" \
            --branch_name "$BRANCH_NAME" \
            --sa-json-b64 "$SA_JSON_B64" \
            --user "$USER" \
            --method_config_name "$METHOD_CONFIG_NAME")
            echo "Delete response: $DELETE_RESPONSE"
          if [ "$DELETE_RESPONSE" == "True" ]; then
            echo "Method configuration deleted successfully."
          else
            echo "Error: Method configuration deletion failed."
            exit 1
          fi

        env:
          PIPELINE_NAME: ${{ env.PIPELINE_NAME }}
          BRANCH_NAME: ${{ env.BRANCH_NAME }}
          SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }}
          METHOD_CONFIG_NAME: ${{ env.METHOD_CONFIG_NAME }}
          WORKSPACE_NAMESPACE: ${{ env.WORKSPACE_NAMESPACE }}
          TESTING_WORKSPACE: ${{ env.TESTING_WORKSPACE }}
          USER: ${{ env.USER }}

      - name: Print Summary on Success
        if: success()
        run: |
          echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY

      - name: Print Summary on Failure
        if: failure()
        run: |
          echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY