diff --git a/Dockerfile b/Dockerfile index 666c63f..97298b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,15 @@ -FROM registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.1 +FROM registry.k8s.io/node-problem-detector/node-problem-detector:v1.35.2 RUN set -eux; \ apt-get update; \ apt-get install -y --no-install-recommends \ # required by plugin/spot_termination.sh curl \ - # required by plugin/launch_config_drift.sh - awscli \ + # required by plugin/local_dns_resolver.sh jq \ # required by local_dns_resolver.sh and upstream_dns_resolver.sh plugins dnsutils \ ; \ rm -rf /var/lib/apt/lists/*; -COPY config /config \ No newline at end of file +COPY config /config diff --git a/config/aws-ec2-asg-lc-drift-plugin-monitor.json b/config/aws-ec2-asg-lc-drift-plugin-monitor.json deleted file mode 100644 index ceef119..0000000 --- a/config/aws-ec2-asg-lc-drift-plugin-monitor.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "plugin": "custom", - "pluginConfig": { - "invoke_interval": "5s", - "timeout": "4s", - "max_output_length": 80, - "concurrency": 1 - }, - "source": "lc-drift-custom-plugin-monitor", - "conditions": [ - { - "type": "LaunchConfigurationDrifted", - "reason": "LaunchConfigurationMatches", - "message": "launch configuration matches" - } - ], - "rules": [ - { - "type": "permanent", - "condition": "LaunchConfigurationDrifted", - "reason": "LaunchConfigurationDrifted", - "path": "./config/plugin/launch_config_drift.sh", - "timeout": "4s" - } - ] -} diff --git a/config/local-dns-resolver-config.json b/config/local-dns-resolver-config.json index 468c378..038873b 100644 --- a/config/local-dns-resolver-config.json +++ b/config/local-dns-resolver-config.json @@ -19,10 +19,7 @@ "type": "permanent", "condition": "NodeLocalDnsResolutionFailure", "reason": "NodeLocalDnsResolutionFailing", - "path": "./config/plugin/local_dns_resolver.sh", - "args": [ - "kube-dns-upstream.kube-system.svc.cluster.local." - ] + "path": "./config/plugin/local_dns_resolver.sh" } ] } diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh deleted file mode 100755 index e37cb2d..0000000 --- a/config/plugin/launch_config_drift.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh - -OK=0 -NONOK=1 -UNKNOWN=2 - -export $(cat /run/metadata/coreos | xargs) - -instance_id="${COREOS_EC2_INSTANCE_ID}" - -instances="$(aws autoscaling describe-auto-scaling-instances --instance-ids "${instance_id}")" - -if [ "$(echo "${instances}" | jq '.AutoScalingInstances | length')" -eq "0" ] -then - exit $UNKNOWN -fi - -instance="$(echo "${instances}" | jq '.AutoScalingInstances[0]')" -instance_launch_config="$(echo "${instance}" | jq -r .LaunchTemplate.LaunchTemplateName)" -instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" - -asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${instance_asg})" - -if [ "$(echo "${asgs}" | jq '.AutoScalingGroups | length')" -eq "0" ] -then - exit $UNKNOWN -fi - -asg_launch_config="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification.LaunchTemplateName')" - -if [ "${instance_launch_config}" = "${asg_launch_config}" ] -then - exit $OK -else - exit $NONOK -fi \ No newline at end of file diff --git a/config/plugin/local_dns_resolver.sh b/config/plugin/local_dns_resolver.sh index d0f6165..9571b75 100755 --- a/config/plugin/local_dns_resolver.sh +++ b/config/plugin/local_dns_resolver.sh @@ -4,7 +4,21 @@ OK=0 NONOK=1 UNKNOWN=2 -readonly local_dns_resolver_ip="$1" +if [ -z "${NODE_NAME}" ]; then + exit $UNKNOWN +fi + +# Get the node-local-dns pod IP running on this node directly, +# bypassing 169.254.20.10 which requires Cilium's eBPF path +local_dns_resolver_ip="$(curl -s \ + -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ + "https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/namespaces/kube-system/pods?labelSelector=k8s-app%3Dnode-local-dns&fieldSelector=spec.nodeName%3D${NODE_NAME}" \ + 2>/dev/null | jq -r '.items[0].status.podIP')" + +if [ -z "${local_dns_resolver_ip}" ] || [ "${local_dns_resolver_ip}" = "null" ]; then + exit $UNKNOWN +fi dig_cmd_out="$(dig -t TXT @"${local_dns_resolver_ip}" +tries=1 +retry=0 +time=33 +noqr +noall +comments kubernetes.default.svc. 2>&1)" dig_cmd_return_code="$?"