From 75a7ccbc42e78a64c615d0a71ba1a1355e44e830 Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Wed, 15 Apr 2026 15:30:37 +0100 Subject: [PATCH 01/10] feat(launch_config_drift): get node info from kube api not imds --- config/plugin/launch_config_drift.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh index e37cb2d..d6fa780 100755 --- a/config/plugin/launch_config_drift.sh +++ b/config/plugin/launch_config_drift.sh @@ -4,9 +4,26 @@ OK=0 NONOK=1 UNKNOWN=2 -export $(cat /run/metadata/coreos | xargs) +# Get instance ID from Kubernetes node providerID +if [ -z "${NODE_NAME}" ]; then + exit $UNKNOWN +fi + +provider_id="$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${NODE_NAME}" \ + 2>/dev/null | jq -r '.spec.providerID')" -instance_id="${COREOS_EC2_INSTANCE_ID}" +if [ -z "${provider_id}" ]; then + exit $UNKNOWN +fi + +# Extract instance ID from providerID (format: aws:///region/instance-id) +instance_id="$(echo "${provider_id}" | awk -F'/' '{print $NF}')" + +if [ -z "${instance_id}" ]; then + exit $UNKNOWN +fi instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}")" From ca67d186fe0536b1194fa3da82899dad3f628003 Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Thu, 16 Apr 2026 11:49:45 +0100 Subject: [PATCH 02/10] feat(spot_termination): get termination info from aws not imds --- config/plugin/spot_termination.sh | 36 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/config/plugin/spot_termination.sh b/config/plugin/spot_termination.sh index 5d6e289..99130e4 100755 --- a/config/plugin/spot_termination.sh +++ b/config/plugin/spot_termination.sh @@ -4,18 +4,34 @@ OK=0 NONOK=1 UNKNOWN=2 -TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" \ - -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" \ - --max-time 3 --silent --fail 2>/dev/null) +# Get instance ID from Kubernetes node providerID +if [ -z "${NODE_NAME}" ]; then + exit $UNKNOWN +fi -status_code=$(curl --max-time 3 --silent --output /dev/stderr --write-out "%{http_code}" \ - -H "X-aws-ec2-metadata-token: $TOKEN" \ - "http://169.254.169.254/latest/meta-data/spot/instance-action") +provider_id="$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${NODE_NAME}" \ + 2>/dev/null | jq -r '.spec.providerID')" -if [ "${status_code}" -eq "404" ]; then +if [ -z "${provider_id}" ]; then + exit $UNKNOWN +fi + +# Extract instance ID from providerID (format: aws:///region/instance-id) +instance_id="$(echo "${provider_id}" | awk -F'/' '{print $NF}')" + +if [ -z "${instance_id}" ]; then + exit $UNKNOWN +fi + +# Check for spot instance interruption via EC2 API +interruption_time=$(aws ec2 describe-instances --instance-ids "${instance_id}" \ + --query 'Reservations[0].Instances[0].DisruptionTime' \ + --output text 2>/dev/null) + +if [ "${interruption_time}" = "None" ] || [ -z "${interruption_time}" ]; then exit $OK -elif [ "${status_code}" -eq "200" ]; then - exit $NONOK else - exit $UNKNOWN + exit $NONOK fi From c9ab830c7dc72283d4aa20a7da26c0ae60033dcc Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Thu, 16 Apr 2026 11:50:29 +0100 Subject: [PATCH 03/10] feat(base-image): update base image patch version --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 666c63f..6a0d601 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.1 +FROM registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.2 RUN set -eux; \ apt-get update; \ @@ -13,4 +13,4 @@ RUN set -eux; \ ; \ rm -rf /var/lib/apt/lists/*; -COPY config /config \ No newline at end of file +COPY config /config From f407af29c5ee14b7c1fdfe413ef57b45ab18913a Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Fri, 17 Apr 2026 09:58:21 +0100 Subject: [PATCH 04/10] feat(launch_config_drift): extend invoke interval --- config/aws-ec2-asg-lc-drift-plugin-monitor.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/aws-ec2-asg-lc-drift-plugin-monitor.json b/config/aws-ec2-asg-lc-drift-plugin-monitor.json index ceef119..e125657 100644 --- a/config/aws-ec2-asg-lc-drift-plugin-monitor.json +++ b/config/aws-ec2-asg-lc-drift-plugin-monitor.json @@ -1,8 +1,8 @@ { "plugin": "custom", "pluginConfig": { - "invoke_interval": "5s", - "timeout": "4s", + "invoke_interval": "30s", + "timeout": "10s", "max_output_length": 80, "concurrency": 1 }, @@ -20,7 +20,7 @@ "condition": "LaunchConfigurationDrifted", "reason": "LaunchConfigurationDrifted", "path": "./config/plugin/launch_config_drift.sh", - "timeout": "4s" + "timeout": "10s" } ] } From 7be2f4e306c9e10864fd6c5e6c4145aa9f09b0ef Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Fri, 17 Apr 2026 13:53:20 +0100 Subject: [PATCH 05/10] use metadata endpoint to get instance info --- config/plugin/launch_config_drift.sh | 51 ++++++++++++++-------------- config/plugin/spot_termination.sh | 36 ++++++-------------- 2 files changed, 36 insertions(+), 51 deletions(-) diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh index d6fa780..c5ae3e3 100755 --- a/config/plugin/launch_config_drift.sh +++ b/config/plugin/launch_config_drift.sh @@ -4,50 +4,51 @@ OK=0 NONOK=1 UNKNOWN=2 -# Get instance ID from Kubernetes node providerID -if [ -z "${NODE_NAME}" ]; then - exit $UNKNOWN -fi +# Get IMDSv2 token +TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" \ + --max-time 3 --silent --fail 2>/dev/null) -provider_id="$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ - --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ - "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${NODE_NAME}" \ - 2>/dev/null | jq -r '.spec.providerID')" - -if [ -z "${provider_id}" ]; then +if [ -z "${TOKEN}" ]; then exit $UNKNOWN fi -# Extract instance ID from providerID (format: aws:///region/instance-id) -instance_id="$(echo "${provider_id}" | awk -F'/' '{print $NF}')" +# Get instance ID and launch template from IMDS +instance_id=$(curl --max-time 3 --silent --fail \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + "http://169.254.169.254/latest/meta-data/instance-id") if [ -z "${instance_id}" ]; then exit $UNKNOWN fi -instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}")" +instance_launch_template=$(curl --max-time 3 --silent --fail \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + "http://169.254.169.254/latest/meta-data/tags/instance/aws:ec2launchtemplate:id") + +instance_asg=$(curl --max-time 3 --silent --fail \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + "http://169.254.169.254/latest/meta-data/tags/instance/aws:autoscaling:groupName") -if [ "$(echo "${instances}" | jq '.AutoScalingInstances | length')" -eq "0" ] -then +if [ -z "${instance_asg}" ] || [ -z "${instance_launch_template}" ]; then exit $UNKNOWN fi -instance="$(echo "${instances}" | jq '.AutoScalingInstances[0]')" -instance_launch_config="$(echo "${instance}" | jq -r .LaunchTemplate.LaunchTemplateName)" -instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" +# Get ASG's current launch template (still requires AWS API) +asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names "${instance_asg}" 2>/dev/null)" -asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${instance_asg})" +if [ -z "${asgs}" ] || ! echo "${asgs}" | jq empty 2>/dev/null; then + exit $UNKNOWN +fi -if [ "$(echo "${asgs}" | jq '.AutoScalingGroups | length')" -eq "0" ] -then +if [ "$(echo "${asgs}" | jq '.AutoScalingGroups | length')" -eq "0" ]; then exit $UNKNOWN fi -asg_launch_config="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification.LaunchTemplateName')" +asg_launch_template="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification.LaunchTemplateId')" -if [ "${instance_launch_config}" = "${asg_launch_config}" ] -then +if [ "${instance_launch_template}" = "${asg_launch_template}" ]; then exit $OK else exit $NONOK -fi \ No newline at end of file +fi diff --git a/config/plugin/spot_termination.sh b/config/plugin/spot_termination.sh index 99130e4..5d6e289 100755 --- a/config/plugin/spot_termination.sh +++ b/config/plugin/spot_termination.sh @@ -4,34 +4,18 @@ OK=0 NONOK=1 UNKNOWN=2 -# Get instance ID from Kubernetes node providerID -if [ -z "${NODE_NAME}" ]; then - exit $UNKNOWN -fi +TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" \ + --max-time 3 --silent --fail 2>/dev/null) -provider_id="$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ - --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ - "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${NODE_NAME}" \ - 2>/dev/null | jq -r '.spec.providerID')" +status_code=$(curl --max-time 3 --silent --output /dev/stderr --write-out "%{http_code}" \ + -H "X-aws-ec2-metadata-token: $TOKEN" \ + "http://169.254.169.254/latest/meta-data/spot/instance-action") -if [ -z "${provider_id}" ]; then - exit $UNKNOWN -fi - -# Extract instance ID from providerID (format: aws:///region/instance-id) -instance_id="$(echo "${provider_id}" | awk -F'/' '{print $NF}')" - -if [ -z "${instance_id}" ]; then - exit $UNKNOWN -fi - -# Check for spot instance interruption via EC2 API -interruption_time=$(aws ec2 describe-instances --instance-ids "${instance_id}" \ - --query 'Reservations[0].Instances[0].DisruptionTime' \ - --output text 2>/dev/null) - -if [ "${interruption_time}" = "None" ] || [ -z "${interruption_time}" ]; then +if [ "${status_code}" -eq "404" ]; then exit $OK -else +elif [ "${status_code}" -eq "200" ]; then exit $NONOK +else + exit $UNKNOWN fi From ee9759434c09a14a35c9444ffba93ae82f30d2a8 Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Mon, 20 Apr 2026 14:21:02 +0100 Subject: [PATCH 06/10] reference node-local-dns by ip --- config/local-dns-resolver-config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/local-dns-resolver-config.json b/config/local-dns-resolver-config.json index 468c378..9dfdbff 100644 --- a/config/local-dns-resolver-config.json +++ b/config/local-dns-resolver-config.json @@ -21,7 +21,7 @@ "reason": "NodeLocalDnsResolutionFailing", "path": "./config/plugin/local_dns_resolver.sh", "args": [ - "kube-dns-upstream.kube-system.svc.cluster.local." + "169.254.20.10" ] } ] From 963b9141d3ad75fafacb003b86349b44b8997c1c Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Tue, 21 Apr 2026 09:23:41 +0100 Subject: [PATCH 07/10] still use aws cli for comparing launch template --- config/plugin/launch_config_drift.sh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh index c5ae3e3..61034c7 100755 --- a/config/plugin/launch_config_drift.sh +++ b/config/plugin/launch_config_drift.sh @@ -22,18 +22,21 @@ if [ -z "${instance_id}" ]; then exit $UNKNOWN fi -instance_launch_template=$(curl --max-time 3 --silent --fail \ - -H "X-aws-ec2-metadata-token: $TOKEN" \ - "http://169.254.169.254/latest/meta-data/tags/instance/aws:ec2launchtemplate:id") +instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}" 2>/dev/null)" -instance_asg=$(curl --max-time 3 --silent --fail \ - -H "X-aws-ec2-metadata-token: $TOKEN" \ - "http://169.254.169.254/latest/meta-data/tags/instance/aws:autoscaling:groupName") +if [ -z "${instances}" ] || ! echo "${instances}" | jq empty 2>/dev/null; then + exit $UNKNOWN +fi -if [ -z "${instance_asg}" ] || [ -z "${instance_launch_template}" ]; then +if [ "$(echo "${instances}" | jq '.AutoScalingInstances | length')" -eq "0" ]; then exit $UNKNOWN fi +instance="$(echo "${instances}" | jq '.AutoScalingInstances[0]')" +instance_launch_template_id="$(echo "${instance}" | jq -r .LaunchTemplate.LaunchTemplateId)" +instance_launch_template_version="$(echo "${instance}" | jq -r .LaunchTemplate.Version)" +instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" + # Get ASG's current launch template (still requires AWS API) asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names "${instance_asg}" 2>/dev/null)" @@ -45,9 +48,11 @@ if [ "$(echo "${asgs}" | jq '.AutoScalingGroups | length')" -eq "0" ]; then exit $UNKNOWN fi -asg_launch_template="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification.LaunchTemplateId')" +asg="$(echo "${asgs}" | jq '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification')" +asg_launch_template_id="$(echo "${asg}" | jq -r '.LaunchTemplateId')" +asg_launch_template_version="$(echo "${asg}" | jq -r '.Version')" -if [ "${instance_launch_template}" = "${asg_launch_template}" ]; then +if [ "${instance_launch_template_id}" = "${asg_launch_template_id}" ] && [ "${instance_launch_template_version}" = "${asg_launch_template_version}" ]; then exit $OK else exit $NONOK From a25ccfccea197eff839000797443edf3424d3e33 Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Tue, 21 Apr 2026 10:15:26 +0100 Subject: [PATCH 08/10] get node-local-dns pod ip --- config/local-dns-resolver-config.json | 5 +---- config/plugin/local_dns_resolver.sh | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/config/local-dns-resolver-config.json b/config/local-dns-resolver-config.json index 9dfdbff..038873b 100644 --- a/config/local-dns-resolver-config.json +++ b/config/local-dns-resolver-config.json @@ -19,10 +19,7 @@ "type": "permanent", "condition": "NodeLocalDnsResolutionFailure", "reason": "NodeLocalDnsResolutionFailing", - "path": "./config/plugin/local_dns_resolver.sh", - "args": [ - "169.254.20.10" - ] + "path": "./config/plugin/local_dns_resolver.sh" } ] } diff --git a/config/plugin/local_dns_resolver.sh b/config/plugin/local_dns_resolver.sh index d0f6165..9571b75 100755 --- a/config/plugin/local_dns_resolver.sh +++ b/config/plugin/local_dns_resolver.sh @@ -4,7 +4,21 @@ OK=0 NONOK=1 UNKNOWN=2 -readonly local_dns_resolver_ip="$1" +if [ -z "${NODE_NAME}" ]; then + exit $UNKNOWN +fi + +# Get the node-local-dns pod IP running on this node directly, +# bypassing 169.254.20.10 which requires Cilium's eBPF path +local_dns_resolver_ip="$(curl -s \ + -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/namespaces/kube-system/pods?labelSelector=k8s-app%3Dnode-local-dns&fieldSelector=spec.nodeName%3D${NODE_NAME}" \ + 2>/dev/null | jq -r '.items[0].status.podIP')" + +if [ -z "${local_dns_resolver_ip}" ] || [ "${local_dns_resolver_ip}" = "null" ]; then + exit $UNKNOWN +fi dig_cmd_out="$(dig -t TXT @"${local_dns_resolver_ip}" +tries=1 +retry=0 +time=33 +noqr +noall +comments kubernetes.default.svc. 2>&1)" dig_cmd_return_code="$?" From b9273e77b9667ac8e26748d703bcddab9273cfbb Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Tue, 21 Apr 2026 13:19:41 +0100 Subject: [PATCH 09/10] cache result of describe-auto-scaling-instances --- config/plugin/launch_config_drift.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh index 61034c7..d69bc8b 100755 --- a/config/plugin/launch_config_drift.sh +++ b/config/plugin/launch_config_drift.sh @@ -22,10 +22,17 @@ if [ -z "${instance_id}" ]; then exit $UNKNOWN fi -instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}" 2>/dev/null)" +INSTANCE_CACHE="/tmp/npd_asg_instance_cache" -if [ -z "${instances}" ] || ! echo "${instances}" | jq empty 2>/dev/null; then - exit $UNKNOWN +if [ -f "$INSTANCE_CACHE" ]; then + instances="$(cat "$INSTANCE_CACHE")" +else + instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}" 2>&1)" + if [ $? -ne 0 ]; then + echo "describe-auto-scaling-instances: ${instances}" >&2 + exit $UNKNOWN + fi + echo "${instances}" > "$INSTANCE_CACHE" fi if [ "$(echo "${instances}" | jq '.AutoScalingInstances | length')" -eq "0" ]; then @@ -38,9 +45,9 @@ instance_launch_template_version="$(echo "${instance}" | jq -r .LaunchTemplate.V instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" # Get ASG's current launch template (still requires AWS API) -asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names "${instance_asg}" 2>/dev/null)" - -if [ -z "${asgs}" ] || ! echo "${asgs}" | jq empty 2>/dev/null; then +asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names "${instance_asg}" 2>&1)" +if [ $? -ne 0 ]; then + echo "describe-auto-scaling-groups: ${asgs}" >&2 exit $UNKNOWN fi From 3470d6d546bf4474071a8a7b8b93adbe02b7402e Mon Sep 17 00:00:00 2001 From: Mark McGarry Date: Tue, 21 Apr 2026 13:46:47 +0100 Subject: [PATCH 10/10] remove launch config drift plugin --- Dockerfile | 3 +- .../aws-ec2-asg-lc-drift-plugin-monitor.json | 26 -------- config/plugin/launch_config_drift.sh | 66 ------------------- 3 files changed, 1 insertion(+), 94 deletions(-) delete mode 100644 config/aws-ec2-asg-lc-drift-plugin-monitor.json delete mode 100755 config/plugin/launch_config_drift.sh diff --git a/Dockerfile b/Dockerfile index 6a0d601..97298b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,8 +5,7 @@ RUN set -eux; \ apt-get install -y --no-install-recommends \ # required by plugin/spot_termination.sh curl \ - # required by plugin/launch_config_drift.sh - awscli \ + # required by plugin/local_dns_resolver.sh jq \ # required by local_dns_resolver.sh and upstream_dns_resolver.sh plugins dnsutils \ diff --git a/config/aws-ec2-asg-lc-drift-plugin-monitor.json b/config/aws-ec2-asg-lc-drift-plugin-monitor.json deleted file mode 100644 index e125657..0000000 --- a/config/aws-ec2-asg-lc-drift-plugin-monitor.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "plugin": "custom", - "pluginConfig": { - "invoke_interval": "30s", - "timeout": "10s", - "max_output_length": 80, - "concurrency": 1 - }, - "source": "lc-drift-custom-plugin-monitor", - "conditions": [ - { - "type": "LaunchConfigurationDrifted", - "reason": "LaunchConfigurationMatches", - "message": "launch configuration matches" - } - ], - "rules": [ - { - "type": "permanent", - "condition": "LaunchConfigurationDrifted", - "reason": "LaunchConfigurationDrifted", - "path": "./config/plugin/launch_config_drift.sh", - "timeout": "10s" - } - ] -} diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh deleted file mode 100755 index d69bc8b..0000000 --- a/config/plugin/launch_config_drift.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -OK=0 -NONOK=1 -UNKNOWN=2 - -# Get IMDSv2 token -TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" \ - -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" \ - --max-time 3 --silent --fail 2>/dev/null) - -if [ -z "${TOKEN}" ]; then - exit $UNKNOWN -fi - -# Get instance ID and launch template from IMDS -instance_id=$(curl --max-time 3 --silent --fail \ - -H "X-aws-ec2-metadata-token: $TOKEN" \ - "http://169.254.169.254/latest/meta-data/instance-id") - -if [ -z "${instance_id}" ]; then - exit $UNKNOWN -fi - -INSTANCE_CACHE="/tmp/npd_asg_instance_cache" - -if [ -f "$INSTANCE_CACHE" ]; then - instances="$(cat "$INSTANCE_CACHE")" -else - instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}" 2>&1)" - if [ $? -ne 0 ]; then - echo "describe-auto-scaling-instances: ${instances}" >&2 - exit $UNKNOWN - fi - echo "${instances}" > "$INSTANCE_CACHE" -fi - -if [ "$(echo "${instances}" | jq '.AutoScalingInstances | length')" -eq "0" ]; then - exit $UNKNOWN -fi - -instance="$(echo "${instances}" | jq '.AutoScalingInstances[0]')" -instance_launch_template_id="$(echo "${instance}" | jq -r .LaunchTemplate.LaunchTemplateId)" -instance_launch_template_version="$(echo "${instance}" | jq -r .LaunchTemplate.Version)" -instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" - -# Get ASG's current launch template (still requires AWS API) -asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names "${instance_asg}" 2>&1)" -if [ $? -ne 0 ]; then - echo "describe-auto-scaling-groups: ${asgs}" >&2 - exit $UNKNOWN -fi - -if [ "$(echo "${asgs}" | jq '.AutoScalingGroups | length')" -eq "0" ]; then - exit $UNKNOWN -fi - -asg="$(echo "${asgs}" | jq '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification')" -asg_launch_template_id="$(echo "${asg}" | jq -r '.LaunchTemplateId')" -asg_launch_template_version="$(echo "${asg}" | jq -r '.Version')" - -if [ "${instance_launch_template_id}" = "${asg_launch_template_id}" ] && [ "${instance_launch_template_version}" = "${asg_launch_template_version}" ]; then - exit $OK -else - exit $NONOK -fi