Improve APM in Loadtesting (#7061)

* Initial cloudwatch stub for loadtesting/apm

* Fix duplicate policy attachment name

* elastichsearch pull keys from ssh secret

* Set aws region for run-ansible

* Alternate way to specify region in run-ansible

* Fix elasticsearch_ansible ssh file modes

* Cloudwatch agent config elasticsearch

* Fix ansible indents

* Set platform to linux/amd64 for loadtesting docker image

* Use /dev/sdb on elasticsearch/apm

* fixup

* elasticsearch volume mounted

* elasticapm increase shards and size

* Increase elasticapm instance size

* Document how to get the IP for APM ec2 instance
This commit is contained in:
Robert Fairburn 2022-08-10 12:33:49 -05:00 committed by GitHub
parent e2194be61c
commit 1f1ee964ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 122 additions and 8 deletions

View File

@ -44,6 +44,7 @@ resource "docker_registry_image" "loadtest" {
build {
context = "${path.cwd}/docker/"
dockerfile = "loadtest.Dockerfile"
platform = "linux/amd64"
build_args = {
TAG = var.tag
}

View File

@ -32,6 +32,7 @@ With the variable `loadtest_containers` you can specify how many containers of 5
There are a few main places of interest to monitor the load and resource usage:
* The Application Performance Monitoring (APM) dashboard: access it on your Fleet load-testing URL on port `:5601` and path `/app/apm`, e.g. `https://loadtest.fleetdm.com:5601/app/apm`.
* The APM dashboard can also be accessed via private IP over the VPN. Use the following one-liner to get the URL: `aws ec2 describe-instances --region=us-east-2 | jq -r '.Reservations[].Instances[] | select(.State.Name == "running") | select(.Tags[] | select(.Key == "ansible_playbook_file") | .Value == "elasticsearch.yml") | "http://" + .PrivateIpAddress + ":5601/app/apm"'`. This connects directly to the EC2 instance and doesn't use the load balancer.
* To monitor mysql database load, go to AWS RDS, select "Performance Insights" and the database instance to monitor (you may want to turn off auto-refresh).
* To monitor Redis load, go to Amazon ElastiCache, select the redis cluster to monitor, and go to "Metrics".

View File

@ -159,6 +159,11 @@ resource "aws_iam_role_policy_attachment" "role_attachment_es" {
role = aws_iam_role.elasticstack.name
}
resource "aws_iam_role_policy_attachment" "role_attachment_cloudwatch" {
policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
role = aws_iam_role.elasticstack.name
}
resource "aws_iam_policy" "elasticstack" {
name = "fleet-es-iam-policy"
policy = data.aws_iam_policy_document.elasticstack.json
@ -182,8 +187,8 @@ data "aws_ami" "amazonlinux" {
resource "aws_launch_template" "elasticstack" {
name_prefix = "${local.prefix}-elasticstack"
image_id = data.aws_ami.amazonlinux.image_id
instance_type = "t3.large"
key_name = "zwinnerman"
instance_type = "m6a.xlarge"
key_name = "robert"
vpc_security_group_ids = [aws_security_group.elasticsearch.id]
metadata_options {

View File

@ -7,3 +7,5 @@
- role: elasticsearch
- role: elasticapm
- role: kibana
- role: cloudwatch
- role: ssh

View File

@ -0,0 +1,12 @@
---
- package: name=amazon-cloudwatch-agent
- template:
src: amazon-cloudwatch-agent.json
dest: /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
mode: 0644
owner: root
group: root
- systemd:
name: amazon-cloudwatch-agent.service
enabled: true
state: started

View File

@ -0,0 +1,71 @@
{
"agent": {
"metrics_collection_interval": 60,
"run_as_user": "cwagent"
},
"metrics": {
"append_dimensions": {
"AutoScalingGroupName": "${aws:AutoScalingGroupName}",
"ImageId": "${aws:ImageId}",
"InstanceId": "${aws:InstanceId}",
"InstanceType": "${aws:InstanceType}"
},
"metrics_collected": {
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"metrics_collection_interval": 60,
"resources": [
"*"
],
"totalcpu": false
},
"disk": {
"measurement": [
"used_percent",
"inodes_free"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"diskio": {
"measurement": [
"io_time",
"write_bytes",
"read_bytes",
"writes",
"reads"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 60
},
"netstat": {
"measurement": [
"tcp_established",
"tcp_time_wait"
],
"metrics_collection_interval": 60
},
"swap": {
"measurement": [
"swap_used_percent"
],
"metrics_collection_interval": 60
}
}
}
}

View File

@ -1,3 +1,25 @@
---
- ansible.builtin.file:
path: /var/lib/docker/volumes/
owner: root
group: root
mode: '0755'
state: directory
- name: create docker volume partition
community.general.parted:
device: /dev/sdb
number: 1
state: present
fs_type: ext4
- name: format docker volume
community.general.filesystem:
dev: /dev/sdb1
fstype: ext4
- name: docker volume mount
ansible.posix.mount:
path: /var/lib/docker/volumes/
src: /dev/sdb1
fstype: ext4
state: mounted
- package: name=docker
- systemd: name=docker.service enabled=true state=started

View File

@ -465,7 +465,7 @@ apm-server:
actions:
rollover:
max_age: 99999d
max_primary_shard_size: 2gb
max_primary_shard_size: 20gb
set_priority:
priority: 100
min_age: 0ms
@ -570,7 +570,7 @@ setup.template.settings:
# of the Elasticsearch template. For more details, please check
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html
index:
#number_of_shards: 1
number_of_shards: 2
number_of_replicas: 0
#codec: best_compression
#number_of_routing_shards: 30

View File

@ -7,7 +7,7 @@
cluster.name: docker-cluster
cluster.routing.allocation.disk.threshold_enabled: "false"
discovery.type: single-node
ES_JAVA_OPTS: -XX:UseAVX=2 -Xms1g -Xmx1g
ES_JAVA_OPTS: -XX:UseAVX=2 -Xms2g -Xmx4g
ulimits:
- "memlock:-1:-1"
volumes:

View File

@ -5,11 +5,11 @@
path: /home/ec2-user/.ssh
owner: ec2-user
group: ec2-user
mode: 700
mode: '0700'
state: directory
- ansible.builtin.template:
src: authorized_keys.j2
dest: /home/ec2-user/.ssh/authorized_keys
owner: ec2-user
group: ec2-user
mode: 600
mode: '0600'

View File

@ -1 +1 @@
{{ lookup("amazon.aws.aws_secret", "/fleet/ssh/keys") }}
{{ lookup("amazon.aws.aws_secret", "/fleet/ssh/keys", region=ansible_ec2_placement_region) }}