diff --git a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml index f8688ce0..c5cf9711 100644 --- a/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml +++ b/config/crd/bases/barmancloud.cnpg.io_objectstores.yaml @@ -176,6 +176,25 @@ spec: format: int32 minimum: 1 type: integer + restoreAdditionalCommandArgs: + description: |- + Additional arguments that can be appended to the 'barman-cloud-restore' + command-line invocation. These arguments provide flexibility to customize + the data restore process further, according to specific requirements or + configurations. + + Example: + In a scenario where specialized restore options are required, such as setting + a specific read timeout or defining custom behavior, users can use this field + to specify additional command arguments. + + Note: + It's essential to ensure that the provided arguments are valid and supported + by the 'barman-cloud-restore' command, to avoid potential errors or unintended + behavior during execution. + items: + type: string + type: array type: object destinationPath: description: |- diff --git a/go.mod b/go.mod index 7813e622..e07525bb 100644 --- a/go.mod +++ b/go.mod @@ -137,3 +137,5 @@ require ( sigs.k8s.io/structured-merge-diff/v6 v6.4.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) + +replace github.com/cloudnative-pg/barman-cloud => github.com/leonardoce/barman-cloud v0.0.0-20260527084420-d2ef1c114ae8 diff --git a/go.sum b/go.sum index 2ccdba42..c81dae7f 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,6 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cloudnative-pg/api v1.29.1 h1:FWr8S7EQeOfdhYXyr2cof8wUXLARZiiQt5Qa6ltED7w= github.com/cloudnative-pg/api v1.29.1/go.mod h1:QtWF3yzSvIfORMHaSkPAk/o3bhCJwEHJgN3riyRiz3o= -github.com/cloudnative-pg/barman-cloud v0.5.1 h1:vjkXrrxo2DQXHT9u9usqhtaHiPZ/lTfDVs/pIWYTepQ= -github.com/cloudnative-pg/barman-cloud v0.5.1/go.mod h1:XPc5IUFP1y4cZX1sg+Pd8j9V4tmUEVnv3BGCpfQOOg8= github.com/cloudnative-pg/cloudnative-pg v1.29.1 h1:ZNEt1TMlnQKXI1kho2UqQuqdfvIvjGln4kN7C1lsmGA= github.com/cloudnative-pg/cloudnative-pg v1.29.1/go.mod h1:Sbgx9jVmkle4/gR2U5JHrzDd74sRPOBHDtPkvncg5v8= github.com/cloudnative-pg/cnpg-i v0.5.0 h1:/TOzpNT6cwNgrpftTtrnLKdoHgMwd+88vZgXjlVgXeE= @@ -143,6 +141,8 @@ github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 h1:bMqrb3UHgHbP+ github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0/go.mod h1:E3vdYxHj2C2q6qo8/Da4g7P+IcwqRZyy3gJBzYybV9Y= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/leonardoce/barman-cloud v0.0.0-20260527084420-d2ef1c114ae8 h1:svNVHqtp2a7FfxuLPTm7bs9esj/tAp4arYY/8hvK3no= +github.com/leonardoce/barman-cloud v0.0.0-20260527084420-d2ef1c114ae8/go.mod h1:DACs5rdlMIQ7mUOpJVBRTrbf+d7OGyMOLsFUEMJ2JV8= github.com/lib/pq v1.12.3 h1:tTWxr2YLKwIvK90ZXEw8GP7UFHtcbTtty8zsI+YjrfQ= github.com/lib/pq v1.12.3/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= diff --git a/internal/cnpgi/common/errors.go b/internal/cnpgi/common/errors.go index 2adff8d5..2aa18363 100644 --- a/internal/cnpgi/common/errors.go +++ b/internal/cnpgi/common/errors.go @@ -20,10 +20,38 @@ SPDX-License-Identifier: Apache-2.0 package common import ( + "errors" + + barmanRestorer "github.com/cloudnative-pg/barman-cloud/pkg/restorer" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) +// classifyWALRestoreError maps an error returned by the WAL +// restorer to a gRPC-coded error so the caller can tell terminal +// failures apart from transient ones via the status code. +func classifyWALRestoreError(walName string, walErr error) error { + switch { + case errors.Is(walErr, barmanRestorer.ErrWALNotFound): + return newWALNotFoundError(walName) + case errors.Is(walErr, barmanRestorer.ErrInvalidWalName): + // A malformed WAL name will never become valid on retry. + return newInvalidWALNameError(walName, walErr) + case errors.Is(walErr, barmanRestorer.ErrConnectivity), + errors.Is(walErr, barmanRestorer.ErrGeneric): + // barman-cloud exit codes 2 (connectivity) and 4 + // (generic) both surface conditions that are retryable + // in practice — barman uses the "generic" bucket for + // some connection-class failures too, not just exit 2. + return newUnavailableError(walName, walErr) + default: + // Unrecognized exit codes and unexpected failures (e.g. + // the barman-cloud command could not be executed). No + // positive signal that retry would help. + return newInternalWALRestoreError(walName, walErr) + } +} + // ErrEndOfWALStreamReached is returned when end of WAL is detected in the cloud archive. var ErrEndOfWALStreamReached = status.Errorf(codes.OutOfRange, "end of WAL reached") @@ -35,10 +63,48 @@ var ErrEndOfWALStreamReached = status.Errorf(codes.OutOfRange, "end of WAL reach var ErrMissingPermissions = status.Errorf(codes.FailedPrecondition, "no permission to download the backup credentials, retrying") -// newWALNotFoundError returns a error that states that a -// certain WAL file has not been found. This error is -// compatible with GRPC status codes, resulting in a 404 -// being used as a response code. +// newWALNotFoundError reports that the requested WAL file is not +// present in the object store. Emits codes.NotFound: this is a +// terminal condition (the file won't appear on retry). func newWALNotFoundError(walName string) error { return status.Errorf(codes.NotFound, "wal %q not found", walName) } + +// newUnavailableError reports that downloading the WAL file failed +// for a reason expected to be transient (a barman-cloud connectivity +// blip, or a generic exit code that in practice covers retryable +// conditions too). Emits codes.Unavailable: per gRPC conventions, +// the canonical signal for "retry may succeed". +func newUnavailableError(walName string, err error) error { + return status.Errorf( + codes.Unavailable, + "transient error while downloading %q: %s", + walName, + err.Error(), + ) +} + +// newInvalidWALNameError reports that the requested WAL name is +// not a valid name. Emits codes.InvalidArgument: this is a +// terminal condition (the same name won't become valid on retry). +func newInvalidWALNameError(walName string, err error) error { + return status.Errorf( + codes.InvalidArgument, + "invalid WAL name %q: %s", + walName, + err.Error(), + ) +} + +// newInternalWALRestoreError reports that downloading the WAL +// file failed for an unclassified reason. Emits codes.Internal: +// we have no positive signal that retry would help, so by gRPC +// convention this is treated as terminal. +func newInternalWALRestoreError(walName string, err error) error { + return status.Errorf( + codes.Internal, + "internal error while downloading %q: %s", + walName, + err.Error(), + ) +} diff --git a/internal/cnpgi/common/errors_test.go b/internal/cnpgi/common/errors_test.go new file mode 100644 index 00000000..ac3d509a --- /dev/null +++ b/internal/cnpgi/common/errors_test.go @@ -0,0 +1,86 @@ +/* +Copyright © contributors to CloudNativePG, established as +CloudNativePG a Series of LF Projects, LLC. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 +*/ + +package common + +import ( + "errors" + "fmt" + + barmanRestorer "github.com/cloudnative-pg/barman-cloud/pkg/restorer" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +var _ = Describe("classifyWALRestoreError", func() { + const walName = "000000010000000000000001" + + DescribeTable( + "maps barman restorer sentinels to gRPC status codes", + func(walErr error, expectedCode codes.Code) { + got := classifyWALRestoreError(walName, walErr) + Expect(got).To(HaveOccurred()) + + st, ok := status.FromError(got) + Expect(ok).To(BeTrue(), "returned error must carry a gRPC status") + Expect(st.Code()).To(Equal(expectedCode)) + Expect(st.Message()).To(ContainSubstring(walName)) + }, + Entry("ErrWALNotFound -> NotFound", + fmt.Errorf("object storage or file not found: %w", barmanRestorer.ErrWALNotFound), + codes.NotFound), + Entry("ErrInvalidWalName -> InvalidArgument", + fmt.Errorf("invalid name for a WAL file: %w", barmanRestorer.ErrInvalidWalName), + codes.InvalidArgument), + Entry("ErrConnectivity -> Unavailable", + fmt.Errorf("connectivity failure, retrying: %w", barmanRestorer.ErrConnectivity), + codes.Unavailable), + Entry("ErrGeneric -> Unavailable (barman uses exit 4 for some retryable cases too)", + fmt.Errorf("generic error: %w", barmanRestorer.ErrGeneric), + codes.Unavailable), + Entry("unknown error -> Internal", + errors.New("something we did not classify"), + codes.Internal), + ) + + It("matches the sentinel even through several wrapping layers", func() { + // The plugin wraps barman errors via fmt.Errorf("...: %w", ...); + // classification must keep working if more wraps appear above. + inner := fmt.Errorf("connectivity failure, retrying: %w", barmanRestorer.ErrConnectivity) + wrapped := fmt.Errorf("while restoring WAL %s: %w", walName, inner) + + got := classifyWALRestoreError(walName, wrapped) + st, ok := status.FromError(got) + Expect(ok).To(BeTrue()) + Expect(st.Code()).To(Equal(codes.Unavailable)) + }) + + It("treats ErrWALNotFound as terminal even when the error chain mentions other sentinels in its message", func() { + // Defensive: if the underlying error stringifies to something + // resembling another sentinel's message, the switch must still + // match by identity (errors.Is), not by substring. + walErr := fmt.Errorf("not found, looks like a connectivity failure: %w", barmanRestorer.ErrWALNotFound) + + got := classifyWALRestoreError(walName, walErr) + st, _ := status.FromError(got) + Expect(st.Code()).To(Equal(codes.NotFound)) + }) +}) diff --git a/internal/cnpgi/common/suite_test.go b/internal/cnpgi/common/suite_test.go new file mode 100644 index 00000000..52a93755 --- /dev/null +++ b/internal/cnpgi/common/suite_test.go @@ -0,0 +1,32 @@ +/* +Copyright © contributors to CloudNativePG, established as +CloudNativePG a Series of LF Projects, LLC. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +SPDX-License-Identifier: Apache-2.0 +*/ + +package common + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCommon(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Common Suite") +} diff --git a/internal/cnpgi/common/wal.go b/internal/cnpgi/common/wal.go index 8e58cb42..7ccc55d5 100644 --- a/internal/cnpgi/common/wal.go +++ b/internal/cnpgi/common/wal.go @@ -365,11 +365,7 @@ func (w WALServiceImplementation) restoreFromBarmanObjectStore( // is the one that PostgreSQL has requested to restore. // The failure has already been logged in walRestorer.RestoreList method if walStatus[0].Err != nil { - if errors.Is(walStatus[0].Err, barmanRestorer.ErrWALNotFound) { - return newWALNotFoundError(walStatus[0].WalName) - } - - return walStatus[0].Err + return classifyWALRestoreError(walStatus[0].WalName, walStatus[0].Err) } // We skip this step if streaming connection is not available diff --git a/manifest.yaml b/manifest.yaml index 3b99c39b..85115eaa 100644 --- a/manifest.yaml +++ b/manifest.yaml @@ -175,6 +175,25 @@ spec: format: int32 minimum: 1 type: integer + restoreAdditionalCommandArgs: + description: |- + Additional arguments that can be appended to the 'barman-cloud-restore' + command-line invocation. These arguments provide flexibility to customize + the data restore process further, according to specific requirements or + configurations. + + Example: + In a scenario where specialized restore options are required, such as setting + a specific read timeout or defining custom behavior, users can use this field + to specify additional command arguments. + + Note: + It's essential to ensure that the provided arguments are valid and supported + by the 'barman-cloud-restore' command, to avoid potential errors or unintended + behavior during execution. + items: + type: string + type: array type: object destinationPath: description: |-