Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ The server dynamically filters the available tools based on the permissions asso
- **Required Permission**: `metrics-data.read`
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"

> **Note:** When a time window is provided, the underlying PromQL is wrapped in the aggregation appropriate for each tool (`avg_over_time`, `max_over_time`, `min_over_time`, `increase`, etc.) and evaluated at `end`. See [`internal/infra/mcp/tools/README.md`](./internal/infra/mcp/tools/README.md) for the per-tool aggregation table.

### Sysdig Secure

- **`list_runtime_events`**
Expand Down
32 changes: 16 additions & 16 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,22 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
tools.NewToolRunSysql(sysdigClient),
tools.NewToolGenerateSysql(sysdigClient),

tools.NewK8sListClusters(sysdigClient),
tools.NewK8sListNodes(sysdigClient),
tools.NewK8sListCronjobs(sysdigClient),
tools.NewK8sListWorkloads(sysdigClient),
tools.NewK8sListPodContainers(sysdigClient),
tools.NewK8sListTopUnavailablePods(sysdigClient),
tools.NewK8sListTopRestartedPods(sysdigClient),
tools.NewK8sListTopHttpErrorsInPods(sysdigClient),
tools.NewK8sListTopNetworkErrorsInPods(sysdigClient),
tools.NewK8sListCountPodsPerCluster(sysdigClient),
tools.NewK8sListUnderutilizedPodsCPUQuota(sysdigClient),
tools.NewK8sListTopCPUConsumedWorkload(sysdigClient),
tools.NewK8sListTopCPUConsumedContainer(sysdigClient),
tools.NewK8sListUnderutilizedPodsMemoryQuota(sysdigClient),
tools.NewK8sListTopMemoryConsumedWorkload(sysdigClient),
tools.NewK8sListTopMemoryConsumedContainer(sysdigClient),
tools.NewK8sListClusters(sysdigClient, systemClock),
tools.NewK8sListNodes(sysdigClient, systemClock),
tools.NewK8sListCronjobs(sysdigClient, systemClock),
tools.NewK8sListWorkloads(sysdigClient, systemClock),
tools.NewK8sListPodContainers(sysdigClient, systemClock),
tools.NewK8sListTopUnavailablePods(sysdigClient, systemClock),
tools.NewK8sListTopRestartedPods(sysdigClient, systemClock),
tools.NewK8sListTopHttpErrorsInPods(sysdigClient, systemClock),
tools.NewK8sListTopNetworkErrorsInPods(sysdigClient, systemClock),
tools.NewK8sListCountPodsPerCluster(sysdigClient, systemClock),
tools.NewK8sListUnderutilizedPodsCPUQuota(sysdigClient, systemClock),
tools.NewK8sListTopCPUConsumedWorkload(sysdigClient, systemClock),
tools.NewK8sListTopCPUConsumedContainer(sysdigClient, systemClock),
tools.NewK8sListUnderutilizedPodsMemoryQuota(sysdigClient, systemClock),
tools.NewK8sListTopMemoryConsumedWorkload(sysdigClient, systemClock),
tools.NewK8sListTopMemoryConsumedContainer(sysdigClient, systemClock),
)
return handler
}
Expand Down
29 changes: 29 additions & 0 deletions internal/infra/mcp/tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,35 @@ The handler filters tools dynamically based on the Sysdig user's permissions. Ea
|---|---|---|---|---|
| `generate_sysql` | `tool_generate_sysql.go` | Convert natural language to SysQL via Sysdig Sage. | `sage.exec` (does not work with Service Accounts) | "Create a SysQL to list S3 buckets." |

## Historical range (start / end)

All Sysdig Monitor `k8s_list_*` tools accept two optional parameters:

- `start` — RFC3339 timestamp, e.g. `2026-04-16T00:00:00Z`
- `end` — RFC3339 timestamp, e.g. `2026-04-16T01:00:00Z`

When omitted, tools return an instant snapshot (current behaviour). When provided,
the underlying PromQL is wrapped in the aggregation appropriate for each tool and
evaluated at `end`:

| Tool group | Wrapping applied when windowed |
|---|---|
| CPU / memory usage, underutilized quota, pod count | `avg_over_time(metric[Ns])` |
| Top restarted pods | `increase(kube_pod_container_status_restarts_total[Ns])` |
| Top unavailable pods | `min_over_time(kube_workload_status_unavailable[Ns]) >= 1` (Sysdig-canonical pattern — requires continuous unavailability for the entire window) |
| HTTP / network errors | `sum_over_time(metric[Ns]) / N` (rate per second) |
| Inventory tools (clusters, nodes, workloads, pod_containers, cronjobs) | `max_over_time(metric[Ns]) > 0` (workloads with status=ready/desired/running drop the `> 0` guard) |

Validation rules (helper: `utils.go`):

- `end` without `start` → error.
- `start` without `end``end` defaults to now.
- `end` in the future → clamped to now.
- `end <= start` → error.

Windowed queries carry a 60 s client-side PromQL `Timeout` to fail fast before the
Sysdig edge proxy's own 80–90 s cut-off.

# Adding a New Tool

1. **See other tools:** Check how other tools are implemented so you can have the context on how they should look like.
Expand Down
30 changes: 25 additions & 5 deletions internal/infra/mcp/tools/tool_k8s_list_clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,31 @@ import (

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
)

type K8sListClusters struct {
SysdigClient sysdig.ExtendedClientWithResponsesInterface
clock clock.Clock
}

func NewK8sListClusters(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *K8sListClusters {
func NewK8sListClusters(sysdigClient sysdig.ExtendedClientWithResponsesInterface, clk clock.Clock) *K8sListClusters {
return &K8sListClusters{
SysdigClient: sysdigClient,
clock: clk,
}
}

func (t *K8sListClusters) RegisterInServer(s *server.MCPServer) {
tool := mcp.NewTool("k8s_list_clusters",
mcp.WithDescription("Lists the cluster information for all clusters or just the cluster specified."),
mcp.WithDescription("Lists the cluster information for all clusters or just the cluster specified. Optionally pass start/end (RFC3339) to list clusters that existed at any point in the window."),
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
mcp.WithNumber("limit",
mcp.Description("Maximum number of clusters to return."),
mcp.DefaultNumber(10),
),
WithTimeWindowParams(),
mcp.WithOutputSchema[map[string]any](),
mcp.WithReadOnlyHintAnnotation(true),
mcp.WithDestructiveHintAnnotation(false),
Expand All @@ -41,16 +45,21 @@ func (t *K8sListClusters) handle(ctx context.Context, request mcp.CallToolReques
clusterName := mcp.ParseString(request, "cluster_name", "")
limit := mcp.ParseInt(request, "limit", 10)

query := "kube_cluster_info"
if clusterName != "" {
query = fmt.Sprintf("kube_cluster_info{cluster=\"%s\"}", clusterName)
tw, err := ParseTimeWindow(request, t.clock)
if err != nil {
return mcp.NewToolResultErrorFromErr("invalid time window", err), nil
}

query := buildKubeClusterInfoQuery(clusterName, tw)

limitQuery := sysdig.LimitQuery(limit)
params := &sysdig.GetQueryV1Params{
Query: query,
Limit: &limitQuery,
}
if err := tw.ApplyToParams(params); err != nil {
return mcp.NewToolResultErrorFromErr("failed to build eval time", err), nil
}

httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
if err != nil {
Expand All @@ -69,3 +78,14 @@ func (t *K8sListClusters) handle(ctx context.Context, request mcp.CallToolReques

return mcp.NewToolResultJSON(queryResponse)
}

func buildKubeClusterInfoQuery(clusterName string, tw TimeWindow) string {
metric := "kube_cluster_info"
if clusterName != "" {
metric = fmt.Sprintf(`kube_cluster_info{cluster="%s"}`, clusterName)
}
if !tw.IsZero() {
return fmt.Sprintf("max_over_time(%s%s) > 0", metric, tw.RangeSelector())
}
return metric
}
43 changes: 41 additions & 2 deletions internal/infra/mcp/tools/tool_k8s_list_clusters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,35 @@ import (
"context"
"io"
"net/http"
"time"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"go.uber.org/mock/gomock"

mocks_clock "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock/mocks"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
"go.uber.org/mock/gomock"
)

var _ = Describe("KubernetesListClusters Tool", func() {
var (
tool *tools.K8sListClusters
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
mockClock *mocks_clock.MockClock
mcpServer *server.MCPServer
ctrl *gomock.Controller
)

BeforeEach(func() {
ctrl = gomock.NewController(GinkgoT())
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
tool = tools.NewK8sListClusters(mockSysdig)
mockClock = mocks_clock.NewMockClock(ctrl)
mockClock.EXPECT().Now().AnyTimes().Return(time.Date(2026, time.April, 16, 12, 0, 0, 0, time.UTC))
tool = tools.NewK8sListClusters(mockSysdig, mockClock)
mcpServer = server.NewMCPServer("test", "test")
tool.RegisterInServer(mcpServer)
})
Expand Down Expand Up @@ -103,6 +109,39 @@ var _ = Describe("KubernetesListClusters Tool", func() {
Limit: new(sysdig.LimitQuery(20)),
},
),
Entry("windowed, no filters",
"k8s_list_clusters",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "k8s_list_clusters",
Arguments: map[string]any{
"start": "2026-04-16T10:00:00Z",
"end": "2026-04-16T11:00:00Z",
},
},
},
mergeLimit(newWindowedQueryParams(
`max_over_time(kube_cluster_info[3600s]) > 0`,
time.Date(2026, time.April, 16, 11, 0, 0, 0, time.UTC),
), 10),
),
Entry("windowed, cluster_name filter",
"k8s_list_clusters",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "k8s_list_clusters",
Arguments: map[string]any{
"cluster_name": "my_cluster",
"start": "2026-04-16T10:00:00Z",
"end": "2026-04-16T11:00:00Z",
},
},
},
mergeLimit(newWindowedQueryParams(
`max_over_time(kube_cluster_info{cluster="my_cluster"}[3600s]) > 0`,
time.Date(2026, time.April, 16, 11, 0, 0, 0, time.UTC),
), 10),
),
)
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,32 @@ import (

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
)

type K8sListCountPodsPerCluster struct {
SysdigClient sysdig.ExtendedClientWithResponsesInterface
clock clock.Clock
}

func NewK8sListCountPodsPerCluster(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *K8sListCountPodsPerCluster {
func NewK8sListCountPodsPerCluster(sysdigClient sysdig.ExtendedClientWithResponsesInterface, clk clock.Clock) *K8sListCountPodsPerCluster {
return &K8sListCountPodsPerCluster{
SysdigClient: sysdigClient,
clock: clk,
}
}

func (t *K8sListCountPodsPerCluster) RegisterInServer(s *server.MCPServer) {
tool := mcp.NewTool("k8s_list_count_pods_per_cluster",
mcp.WithDescription("List the count of running Kubernetes Pods grouped by cluster and namespace."),
mcp.WithDescription("List the count of running Kubernetes Pods grouped by cluster and namespace. Optionally pass start/end (RFC3339) to count pods averaged over a historical window instead of the current instant snapshot."),
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
mcp.WithNumber("limit",
mcp.Description("Maximum number of results to return."),
mcp.DefaultNumber(20),
),
WithTimeWindowParams(),
mcp.WithOutputSchema[map[string]any](),
mcp.WithReadOnlyHintAnnotation(true),
mcp.WithDestructiveHintAnnotation(false),
Expand All @@ -44,13 +48,21 @@ func (t *K8sListCountPodsPerCluster) handle(ctx context.Context, request mcp.Cal
namespaceName := mcp.ParseString(request, "namespace_name", "")
limit := mcp.ParseInt(request, "limit", 20)

query := buildKubePodCountQuery(clusterName, namespaceName)
tw, err := ParseTimeWindow(request, t.clock)
if err != nil {
return mcp.NewToolResultErrorFromErr("invalid time window", err), nil
}

query := buildKubePodCountQuery(clusterName, namespaceName, tw)

limitQuery := sysdig.LimitQuery(limit)
params := &sysdig.GetQueryV1Params{
Query: query,
Limit: &limitQuery,
}
if err := tw.ApplyToParams(params); err != nil {
return mcp.NewToolResultErrorFromErr("failed to build eval time", err), nil
}

httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
if err != nil {
Expand All @@ -70,7 +82,7 @@ func (t *K8sListCountPodsPerCluster) handle(ctx context.Context, request mcp.Cal
return mcp.NewToolResultJSON(queryResponse)
}

func buildKubePodCountQuery(clusterName, namespaceName string) string {
func buildKubePodCountQuery(clusterName, namespaceName string, tw TimeWindow) string {
filters := []string{}
if clusterName != "" {
filters = append(filters, fmt.Sprintf("kube_cluster_name=\"%s\"", clusterName))
Expand All @@ -84,5 +96,10 @@ func buildKubePodCountQuery(clusterName, namespaceName string) string {
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
}

return fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name) (kube_pod_info%s)", filterString)
metric := fmt.Sprintf("kube_pod_info%s", filterString)
if !tw.IsZero() {
metric = fmt.Sprintf("avg_over_time(%s%s)", metric, tw.RangeSelector())
}

return fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name) (%s)", metric)
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,35 @@ import (
"context"
"io"
"net/http"
"time"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"go.uber.org/mock/gomock"

mocks_clock "github.com/sysdiglabs/sysdig-mcp-server/internal/infra/clock/mocks"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
"go.uber.org/mock/gomock"
)

var _ = Describe("KubernetesListCountPodsPerCluster Tool", func() {
var (
tool *tools.K8sListCountPodsPerCluster
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
mockClock *mocks_clock.MockClock
mcpServer *server.MCPServer
ctrl *gomock.Controller
)

BeforeEach(func() {
ctrl = gomock.NewController(GinkgoT())
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
tool = tools.NewK8sListCountPodsPerCluster(mockSysdig)
mockClock = mocks_clock.NewMockClock(ctrl)
mockClock.EXPECT().Now().AnyTimes().Return(time.Date(2026, time.April, 16, 12, 0, 0, 0, time.UTC))
tool = tools.NewK8sListCountPodsPerCluster(mockSysdig, mockClock)
mcpServer = server.NewMCPServer("test", "test")
tool.RegisterInServer(mcpServer)
})
Expand Down Expand Up @@ -116,6 +122,22 @@ var _ = Describe("KubernetesListCountPodsPerCluster Tool", func() {
Limit: new(sysdig.LimitQuery(20)),
},
),
Entry("windowed, both start and end",
"k8s_list_count_pods_per_cluster",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "k8s_list_count_pods_per_cluster",
Arguments: map[string]any{
"start": "2026-04-16T10:00:00Z",
"end": "2026-04-16T11:00:00Z",
},
},
},
mergeLimit(newWindowedQueryParams(
`sum by (kube_cluster_name, kube_namespace_name) (avg_over_time(kube_pod_info[3600s]))`,
time.Date(2026, time.April, 16, 11, 0, 0, 0, time.UTC),
), 20),
),
)
})
})
Loading
Loading