Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.fabric8.kubernetes.api.model.ContainerStatus;
import io.fabric8.kubernetes.api.model.HasMetadata;
import io.fabric8.kubernetes.api.model.Pod;
import io.fabric8.kubernetes.api.model.apps.Deployment;
import io.fabric8.kubernetes.api.model.rbac.ClusterRoleBinding;
import io.fabric8.kubernetes.client.KubernetesClient;
import io.fabric8.kubernetes.client.KubernetesClientBuilder;
import io.fabric8.kubernetes.client.KubernetesClientTimeoutException;

public class ClusterDeployedOperatorExtension extends AbstractOperatorExtension {

Expand Down Expand Up @@ -123,9 +127,14 @@ protected void before(ExtensionContext context) {
});

kubernetesClient.resourceList(operatorDeployment).inNamespace(namespace).createOrReplace();
kubernetesClient
.resourceList(operatorDeployment)
.waitUntilReady(operatorDeploymentTimeout.toMillis(), TimeUnit.MILLISECONDS);
try {
kubernetesClient
.resourceList(operatorDeployment)
.waitUntilReady(operatorDeploymentTimeout.toMillis(), TimeUnit.MILLISECONDS);
} catch (KubernetesClientTimeoutException e) {
logDiagnosticInfo(kubernetesClient);
throw e;
}
LOGGER.debug("Operator resources deployed.");
}

Expand All @@ -137,6 +146,127 @@ protected void deleteOperator() {
.delete();
}

private void logDiagnosticInfo(KubernetesClient kubernetesClient) {
LOGGER.error(
"Operator deployment timed out after {} seconds in namespace: {}",
operatorDeploymentTimeout.getSeconds(),
namespace);
try {
// Log deployment status
var deployments =
kubernetesClient.apps().deployments().inNamespace(namespace).list().getItems();
for (Deployment deployment : deployments) {
var status = deployment.getStatus();
LOGGER.error(
"Deployment '{}': replicas={}, readyReplicas={}, availableReplicas={},"
+ " unavailableReplicas={}, conditions={}",
deployment.getMetadata().getName(),
status != null ? status.getReplicas() : "null",
status != null ? status.getReadyReplicas() : "null",
status != null ? status.getAvailableReplicas() : "null",
status != null ? status.getUnavailableReplicas() : "null",
status != null ? status.getConditions() : "null");
}

// Log pod status and container details
var pods = kubernetesClient.pods().inNamespace(namespace).list().getItems();
for (Pod pod : pods) {
var podStatus = pod.getStatus();
LOGGER.error(
"Pod '{}': phase={}, reason={}, message={}",
pod.getMetadata().getName(),
podStatus != null ? podStatus.getPhase() : "null",
podStatus != null ? podStatus.getReason() : "null",
podStatus != null ? podStatus.getMessage() : "null");

if (podStatus != null && podStatus.getContainerStatuses() != null) {
for (ContainerStatus cs : podStatus.getContainerStatuses()) {
LOGGER.error(
" Container '{}': ready={}, restartCount={}, state={}",
cs.getName(),
cs.getReady(),
cs.getRestartCount(),
cs.getState());
}
}
if (podStatus != null && podStatus.getInitContainerStatuses() != null) {
for (ContainerStatus cs : podStatus.getInitContainerStatuses()) {
LOGGER.error(
" InitContainer '{}': ready={}, restartCount={}, state={}",
cs.getName(),
cs.getReady(),
cs.getRestartCount(),
cs.getState());
}
}

// Log pod events
var events =
kubernetesClient
.v1()
.events()
.inNamespace(namespace)
.withField("involvedObject.name", pod.getMetadata().getName())
.list()
.getItems();
for (var event : events) {
LOGGER.error(
" Event: type={}, reason={}, message={}",
event.getType(),
event.getReason(),
event.getMessage());
}

// Try to get container logs
try {
String logs =
kubernetesClient
.pods()
.inNamespace(namespace)
.withName(pod.getMetadata().getName())
.tailingLines(50)
.getLog();
if (logs != null && !logs.isEmpty()) {
LOGGER.error(" Logs for pod '{}':\n{}", pod.getMetadata().getName(), logs);
}
Comment thread
csviri marked this conversation as resolved.
} catch (Exception logEx) {
LOGGER.error(
" Could not retrieve logs for pod '{}': {}",
pod.getMetadata().getName(),
logEx.getMessage());
Comment on lines +234 to +236
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When pod log retrieval fails, this only logs the exception message and drops the stack trace, which makes diagnosing client/auth/network issues harder. Consider logging the exception itself as the last argument (similar to the diagEx handling below) so the full cause is available when needed.

Suggested change
" Could not retrieve logs for pod '{}': {}",
pod.getMetadata().getName(),
logEx.getMessage());
" Could not retrieve logs for pod '{}'",
pod.getMetadata().getName(),
logEx);

Copilot uses AI. Check for mistakes.
}
}

if (pods.isEmpty()) {
LOGGER.error(
"No pods found in namespace '{}'. The deployment may have failed to"
+ " create pods. Check if the image exists and is pullable.",
namespace);

// Log deployment events when no pods exist
for (Deployment deployment : deployments) {
var events =
kubernetesClient
.v1()
.events()
.inNamespace(namespace)
.withField("involvedObject.name", deployment.getMetadata().getName())
.list()
.getItems();
for (var event : events) {
LOGGER.error(
" Deployment event: type={}, reason={}, message={}",
event.getType(),
event.getReason(),
event.getMessage());
}
}
}
} catch (Exception diagEx) {
LOGGER.error("Failed to collect diagnostic info: {}", diagEx.getMessage(), diagEx);
}
}

public static class Builder extends AbstractBuilder<Builder> {
private final List<HasMetadata> operatorDeployment;
private Duration deploymentTimeout;
Expand Down
Loading