fleet/.github/workflows/integration.yml
Lucas Manuel Rodriguez 9a3b4cd365
Attempt to stabilize the broken integration.yml workflow (#13653)
#13547

This is an attempt to stabilize this workflow that has been broken for
4-6 months.

# Issue and proposed solution

Github runner VMs re-use UUIDs, which is not supported by Orbit (this
causes a host to be enrolled as two hosts in Fleet), thus, until that is
fixed in https://github.com/fleetdm/fleet/issues/8021 I propose we
stabilize this workflow by testing all `stable` channels only (which is
better than having the build broken all the time IMO).

Once https://github.com/fleetdm/fleet/issues/8021 is fixed we can re-add
the edge channels.
2023-09-01 12:25:17 -03:00

373 lines
15 KiB
YAML

# This workflow tests enrolling of agents on the supported platforms,
# using the latest version of fleet, fleetctl and orbit.
#
# It starts the latest release of fleet with the "fleetctl preview" command.
# It generates the installers for the latest version of Orbit with the
# "fleetctl package" command.
name: Test Fleetctl, Orbit & Preview
on:
workflow_dispatch: # Manual
schedule:
- cron: '0 2 * * *' # Nightly 2AM UTC
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id}}
cancel-in-progress: true
defaults:
run:
# fail-fast using bash -eo pipefail. See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
shell: bash
permissions:
contents: read
jobs:
gen:
runs-on: ubuntu-latest
outputs:
subdomain: ${{ steps.gen.outputs.subdomain }}
address: ${{ steps.gen.outputs.address }}
steps:
- id: gen
run: |
UUID=$(uuidgen)
echo "subdomain=fleet-test-$UUID" >> $GITHUB_OUTPUT
echo "address=https://fleet-test-$UUID.fleetuem.com" >> $GITHUB_OUTPUT
run-server:
runs-on: ubuntu-latest
needs: gen
steps:
- name: Start tunnel
env:
CERT_PEM: ${{ secrets.CLOUDFLARE_TUNNEL_FLEETUEM_CERT_B64 }}
run: |
# Increase maximum receive buffer size to roughly 2.5 MB.
# Cloudflared uses quic-go. This buffer holds packets that have been received by the kernel,
# but not yet read by the application (quic-go in this case). Once this buffer fills up, the
# kernel will drop any new incoming packet.
# See https://github.com/quic-go/quic-go/wiki/UDP-Receive-Buffer-Size.
sudo sysctl -w net.core.rmem_max=2500000
# Install cloudflared
wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
sudo dpkg -i cloudflared-linux-amd64.deb
# Add secret
echo "$CERT_PEM" | base64 -d > cert.pem
# Start tunnel
cloudflared tunnel --origincert cert.pem --hostname ${{ needs.gen.outputs.subdomain }} --url http://localhost:1337 --name ${{ needs.gen.outputs.subdomain }} --logfile cloudflared.log &
until [[ $(cloudflared tunnel --origincert cert.pem info -o json ${{ needs.gen.outputs.subdomain }} | jq '.conns[0].conns[0].is_pending_reconnect') = false ]]; do
echo "Awaiting tunnel ready..."
sleep 5
done
# Download fleet and fleetctl binaries from last successful build on main
- name: Download binaries
uses: dawidd6/action-download-artifact@5e780fc7bbd0cac69fc73271ed86edf5dcb72d67
with:
workflow: build-binaries.yaml
branch: main
name: build
path: build
check_artifacts: true
- name: Run Fleet server
timeout-minutes: 10
run: |
chmod +x ./build/fleetctl
./build/fleetctl preview --no-hosts
./build/fleetctl config set --address ${{ needs.gen.outputs.address }}
./build/fleetctl get enroll-secret
docker compose -f ~/.fleet/preview/docker-compose.yml logs --follow fleet01 fleet02 &
# Wait for all of the hosts to be enrolled
EXPECTED=3
until [ $(./build/fleetctl get hosts --json | wc -l | tee hostcount) -ge $EXPECTED ]; do
echo -n "Waiting for hosts to enroll: "
cat hostcount | xargs echo -n
echo " / $EXPECTED"
sleep 20
done
echo "Success! $EXPECTED hosts enrolled."
- name: Show enrolled hosts
if: always()
run: |
./build/fleetctl get hosts --json
- name: Slack Notification
if: failure()
uses: slackapi/slack-github-action@e28cf165c92ffef168d23c5c9000cffc8a25e117 # v1.24.0
with:
payload: |
{
"text": "${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head.html_url }}",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Integration test result: ${{ job.status }}\nhttps://github.com/fleetdm/fleet/actions/runs/${{ github.run_id }}\n${{ github.event.pull_request.html_url || github.event.head.html_url }}"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_G_HELP_ENGINEERING_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
- name: Cleanup tunnel
if: always()
run: cloudflared tunnel --origincert cert.pem delete --force ${{ needs.gen.outputs.subdomain }}
- name: Upload cloudflared logs
if: always()
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: cloudflared.log
path: cloudflared.log
login:
runs-on: ubuntu-latest
needs: gen
outputs:
token: ${{ steps.login.outputs.token }}
steps:
# Download fleet and fleetctl binaries from last successful build on main
- name: Download binaries
uses: dawidd6/action-download-artifact@5e780fc7bbd0cac69fc73271ed86edf5dcb72d67
with:
workflow: build-binaries.yaml
branch: main
name: build
path: build
check_artifacts: true
# Login only here and share the token because otherwise we could hit rate limits.
- id: login
name: Attempt login
timeout-minutes: 5
run: |
chmod +x ./build/fleetctl
./build/fleetctl config set --address ${{ needs.gen.outputs.address }}
until ./build/fleetctl login --email admin@example.com --password preview1337#
do
echo "Retrying in 5s..."
sleep 5
done
TOKEN=$(cat ~/.fleet/config| grep token | awk '{ print $2 }')
echo "token=$TOKEN" >> $GITHUB_OUTPUT
orbit-macos:
timeout-minutes: 10
strategy:
matrix:
# To run multiple VMs that have the same UUID we need to implement
# https://github.com/fleetdm/fleet/issues/8021 (otherwise orbit and osqueryd
# in the same host are enrolled as two hosts in Fleet).
# Until then we will just test the `stable` channel in all components.
#
# Alternatively, we can bring back the `edge` channel when we decide to upgrade
# our worker to macOS 13 in the future, as they changed the virtualization
# layer for 13 and now it has random UUIDs (https://github.com/actions/runner-images/issues/7591).
orbit-channel: [ 'stable' ]
osqueryd-channel: [ 'stable' ]
desktop-channel: [ 'stable' ]
runs-on: macos-latest
needs: [gen, login]
steps:
- name: Checkout Code
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
- name: Install dependencies
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }}
- name: Install Orbit
run: |
sudo hostname macos-orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo $SECRET_JSON
SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
echo "Hostname: $(hostname -s)"
fleetctl package --type pkg --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug
sudo installer -pkg fleet-osquery.pkg -target /
until fleetctl get hosts | grep -iF $(hostname -s);
do
echo "Awaiting enrollment..."
sleep 10
done
- name: Collect orbit logs
if: always()
run: |
mkdir orbit-logs
sudo cp /var/log/orbit/* orbit-logs/
- name: Upload Orbit logs
if: always()
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: orbit-macos-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-logs
path: |
orbit-logs
- name: Uninstall Orbit
run: |
./orbit/tools/cleanup/cleanup_macos.sh
orbit-ubuntu:
timeout-minutes: 10
strategy:
matrix:
# To run multiple VMs that have the same UUID we need to implement
# https://github.com/fleetdm/fleet/issues/8021 (otherwise orbit and osqueryd
# in the same host are enrolled as two hosts in Fleet).
# Until then we will just test the `stable` channel in all components.
orbit-channel: [ 'stable' ]
osqueryd-channel: [ 'stable' ]
desktop-channel: [ 'stable' ]
runs-on: ubuntu-latest
needs: [gen, login]
steps:
- name: Install dependencies
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }}
- name: Install Go
uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: '^1.19.12'
- name: Checkout Code
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
- name: Build Fleetctl
run: make fleetctl
- name: Install Orbit
run: |
sudo hostname ubuntu-orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}
chmod +x ./build/fleetctl
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo $SECRET_JSON
SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
echo "Hostname: $(hostname -s)"
./build/fleetctl package --type deb --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug
sudo dpkg -i fleet-osquery*
until fleetctl get hosts | grep -iF $(hostname -s);
do
echo "Awaiting enrollment..."
sudo systemctl status orbit.service || true
sleep 10
done
- name: Collect orbit logs
if: always()
run: |
sudo journalctl -u orbit.service > orbit-logs
- name: Upload Orbit logs
if: always()
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: orbit-ubuntu-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-logs
path: |
orbit-logs
- name: Uninstall Orbit
run: |
sudo apt remove fleet-osquery -y
orbit-windows-build:
timeout-minutes: 10
strategy:
matrix:
# To run multiple VMs that have the same UUID we need to implement
# https://github.com/fleetdm/fleet/issues/8021 (otherwise orbit and osqueryd
# in the same host are enrolled as two hosts in Fleet).
# Until then we will just test the `stable` channel in all components.
orbit-channel: [ 'stable' ]
osqueryd-channel: [ 'stable' ]
desktop-channel: [ 'stable' ]
runs-on: ubuntu-latest
needs: [gen, login]
steps:
- name: Install dependencies
run: |
docker pull fleetdm/wix:latest &
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }}
- name: Build Orbit
run: |
SECRET_JSON=$(fleetctl get enroll_secret --json --debug)
echo $SECRET_JSON
SECRET=$(echo $SECRET_JSON | jq -r '.spec.secrets[0].secret')
echo "Secret: $SECRET"
echo "Hostname: $(hostname -s)"
fleetctl package --type msi --fleet-url=${{ needs.gen.outputs.address }} --enroll-secret=$SECRET --orbit-channel=${{ matrix.orbit-channel }} --osqueryd-channel=${{ matrix.osqueryd-channel }} --desktop-channel=${{ matrix.desktop-channel }} --fleet-desktop --debug
mv fleet-osquery.msi orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}-desktop-${{ matrix.desktop-channel }}.msi
- name: Upload MSI
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}-desktop-${{ matrix.desktop-channel }}.msi
path: orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}-desktop-${{ matrix.desktop-channel }}.msi
orbit-windows:
timeout-minutes: 10
strategy:
matrix:
# To run multiple VMs that have the same UUID we need to implement
# https://github.com/fleetdm/fleet/issues/8021 (otherwise orbit and osqueryd
# in the same host are enrolled as two hosts in Fleet).
# Until then we will just test the `stable` channel in all components.
orbit-channel: [ 'stable' ]
osqueryd-channel: [ 'stable' ]
desktop-channel: [ 'stable' ]
needs: [gen, login, orbit-windows-build]
runs-on: windows-latest
steps:
- name: Install dependencies
shell: bash
run: |
npm install -g fleetctl
fleetctl config set --address ${{ needs.gen.outputs.address }} --token ${{ needs.login.outputs.token }} --tls-skip-verify
- name: Download MSI
id: download
uses: actions/download-artifact@fb598a63ae348fa914e94cd0ff38f362e927b741 # v2
with:
name: orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}-desktop-${{ matrix.desktop-channel }}.msi
- name: Install Orbit
shell: cmd
run: |
msiexec /i ${{steps.download.outputs.download-path}}\orbit-${{ matrix.orbit-channel }}-osqueryd-${{ matrix.osqueryd-channel }}-desktop-${{ matrix.desktop-channel }}.msi /quiet /passive /lv log.txt
sleep 30
# We can't very accurately check the install on these Windows hosts since the hostnames tend to
# overlap and we can't control the hostnames. Instead we just return and have the run-server job
# wait until the expected number of hosts enroll.
- name: Upload orbit install log
if: always()
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: msiexec-install-log
path: log.txt
- name: Upload Orbit logs
if: always()
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v2
with:
name: orbit-windows-${{ matrix.orbit-channel }}-${{ matrix.osqueryd-channel }}-${{ matrix.desktop-channel }}-logs
path: C:\Windows\system32\config\systemprofile\AppData\Local\FleetDM\Orbit\Logs\orbit-osquery.log