Skip to content
Snippets Groups Projects
.gitlab-ci.yml 5.02 KiB
Newer Older
variables:
  GIT_STRATEGY: fetch
  GIT_SSL_NO_VERIFY: "true"
  GIT_SUBMODULE_STRATEGY: recursive
  GIT_LFS_SKIP_SMUDGE: 1
  DOCKER_BUILDKIT: 1
.docker-before-script: &docker_before_script  # This is an anchor
  before_script:
    - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN http://$CI_REGISTRY

.git-before-script: &git_before_script
  before_script:
    - git config --global user.name "CR (covid cron)"
    - git config --global user.email "beepbop@example.com"
    - url_host=`git remote get-url origin | sed -e "s/https:\/\/gitlab-ci-token:.*@//g"`
    - cd /tmp/
    - git clone "https://oauth2:${CI_PUSH_TOKEN}@${url_host}"
    - cd covid-19-public-data
    - git lfs install --local
    - git checkout $CI_COMMIT_BRANCH
    - PATH=$PATH:~/.local/bin

  - datasets-update
  - notebooks-update
  - finalize-update

image_build:
  stage: build
  image: docker:stable
  except:
    - /^auto-update.*$/
  <<: *docker_before_script

  script: |
    CI_COMMIT_SHA_7=$(echo $CI_COMMIT_SHA | cut -c1-7)

    # export the short SHA for other stages
    echo "CI_COMMIT_SHA_7=$CI_COMMIT_SHA_7" >> build.env

    # build image
    docker build --build-arg BUILDKIT_INLINE_CACHE=1 \
      --tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA_7 \
      --cache-from $CI_REGISTRY_IMAGE .

    # push with commit-sha tag and latest
    docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA_7
    docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA_7 $CI_REGISTRY_IMAGE:latest
    docker push $CI_REGISTRY_IMAGE:latest
  artifacts:
    reports:
      dotenv: build.env
datasets:
  stage: datasets-update
    name: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_SHA_7}
  only:
    - schedules
  <<: *git_before_script
    - export BRANCH_NAME=auto-update-datasets-${CI_COMMIT_SHA_7}
    - git checkout -b $BRANCH_NAME
    # Johns Hopkins
    - renku dataset update covid-19_jhu-csse
    - renku dataset add covid-19_jhu-csse --force --source csse_covid_19_data/csse_covid_19_daily_reports https://github.com/CSSEGISandData/COVID-19.git

    # Open Data Zürich
    - renku dataset update openzh-covid-19
    - renku dataset update covid-19-italy
    - renku dataset update covid-19-spain

    # NYT US data
    - renku dataset update covid-19-us-nyt
    - wget -O /tmp/covid-19-ecdc.csv https://opendata.ecdc.europa.eu/covid19/casedistribution/csv
    - renku dataset add covid-19-ecdc /tmp/covid-19-ecdc.csv --overwrite || true

    # chile data has not been updated for a long time
    # - renku dataset add covid-19-chile -s data/*.csv  -s data/covid19_chile.rds https://github.com/itoledor/coronavirus.git --force || true

    # Covidtracker no longer collecting data
    # TODO: transition to federal data sources: https://covidtracking.com/analysis-updates/federal-covid-data-single-stream
    # - renku dataset add covidtracker https://ocgptweb.azurewebsites.net/CSVDownload -d covidtracker.csv --force || true
    # - renku rerun data/covidtracking/states-metadata.json data/covidtracking/states-daily.json
    - git push --set-upstream origin $BRANCH_NAME

notebooks:
  stage: notebooks-update
  image:
    name: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_SHA_7}
    entrypoint: [""]
  only:
    refs:
      - schedules
  <<: *git_before_script
  script:
    - export BRANCH_NAME=auto-update-notebooks-${CI_COMMIT_SHA_7}
    - git checkout auto-update-datasets-${CI_COMMIT_SHA_7}
    - git checkout -b $BRANCH_NAME
    - renku update --all
    - git push --set-upstream origin $BRANCH_NAME

finalize:
  stage: finalize-update
  image:
    name: ${CI_REGISTRY_IMAGE}:${CI_COMMIT_SHA_7}
    entrypoint: [""]
  only:
    - schedules
  <<: *git_before_script
  when: always
  script: |
    set -x

    # install jq
    wget -O ~/.local/bin/jq https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64
    chmod u+x ~/.local/bin/jq

    # push the updated branch to a common name
    export BRANCH_NAME="auto-update"_$(date -u +"%Y-%m-%d_%H-%M")
    git checkout auto-update-notebooks-${CI_COMMIT_SHA_7} || git checkout auto-update-datasets-${CI_COMMIT_SHA_7}
    git rev-parse --abbrev-ref HEAD
    git checkout -b $BRANCH_NAME
    git push origin $BRANCH_NAME

    # create MR
    BODY="{
      \"id\": ${CI_PROJECT_ID},
      \"source_branch\": \"${BRANCH_NAME}\",
      \"target_branch\": \"master\",
      \"remove_source_branch\": true,
      \"title\": \"Automatic update - ${BRANCH_NAME}\"
      }";
    MR_ID=$(curl -s -X POST "https://renkulab.io/gitlab/api/v4/projects/${CI_PROJECT_ID}/merge_requests" \
      --header "PRIVATE-TOKEN: ${CI_PUSH_TOKEN}" \
      --header "Content-Type: application/json" \
      --data "${BODY}" | jq .iid)

    # merge MR
    RES=$(curl -s -X PUT "https://renkulab.io/gitlab/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}/merge" \
      --header "PRIVATE-TOKEN: ${CI_PUSH_TOKEN}" \
      --header "Content-Type: application/json")
    echo $RES

    # clean up branches
    git push origin --delete auto-update-notebooks-${CI_COMMIT_SHA_7} || true
    git push origin --delete auto-update-datasets-${CI_COMMIT_SHA_7} || true