Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions internal/controller/core/device_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,6 @@ func (r *DeviceReconciler) reconcileMaintenance(ctx context.Context, obj *v1alph
if !ok {
return nil
}
delete(obj.Annotations, v1alpha1.DeviceMaintenanceAnnotation)

switch action {
case v1alpha1.DeviceMaintenanceReboot:
prov, ok := r.Provider().(provider.MaintenanceProvider)
Expand Down Expand Up @@ -499,9 +497,12 @@ func (r *DeviceReconciler) reconcileMaintenance(ctx context.Context, obj *v1alph

default:
r.Recorder.Eventf(obj, nil, "Warning", "UnknownMaintenanceAction", "Maintenance", "Unknown maintenance action: %s", action)
return fmt.Errorf("unknown maintenance action: %s", action)
return reconcile.TerminalError(fmt.Errorf("unknown maintenance action: %s", action))
}

// Only remove the annotation after the operation succeeds so that
// failed actions are retried on the next reconciliation.
delete(obj.Annotations, v1alpha1.DeviceMaintenanceAnnotation)
return nil
}

Expand Down
49 changes: 31 additions & 18 deletions internal/provider/cisco/nxos/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,14 @@ type Provider struct {
nxapi *nxapi.Client
}

// timeout is the default timeout for all HTTP/gRPC requests made by the provider.
const timeout = 30 * time.Second

func NewProvider() provider.Provider {
return &Provider{}
}

func (p *Provider) Connect(ctx context.Context, conn *deviceutil.Connection) (err error) {
// timeout is the default timeout for all HTTP/gRPC requests made by the provider.
const timeout = 30 * time.Second
p.conn, err = grpcext.NewClient(conn, grpcext.WithDefaultTimeout(timeout))
if err != nil {
return fmt.Errorf("failed to create grpc connection: %w", err)
Expand All @@ -91,7 +92,7 @@ func (p *Provider) Connect(ctx context.Context, conn *deviceutil.Connection) (er
}
// NXAPI only uses the address for URI construction.
c := *conn
c.Address = netip.MustParseAddrPort(conn.Address).String()
c.Address = netip.MustParseAddrPort(conn.Address).Addr().String()
p.nxapi, err = nxapi.NewClient(&c, timeout)
if err != nil {
return fmt.Errorf("failed to create nxapi client: %w", err)
Expand Down Expand Up @@ -141,23 +142,35 @@ func (p *Provider) FactoryReset(ctx context.Context, conn *deviceutil.Connection
return FactoryReset(ctx, p.conn)
}

func (p *Provider) Reprovision(ctx context.Context, conn *deviceutil.Connection) (reterr error) {
if err := p.Connect(ctx, conn); err != nil {
return err
}
defer func() {
if err := p.Disconnect(ctx, conn); err != nil {
reterr = errors.Join(reterr, err)
func (p *Provider) Reprovision(ctx context.Context, conn *deviceutil.Connection) error {
client := p.nxapi
if client == nil {
c := *conn
c.Address = netip.MustParseAddrPort(conn.Address).Addr().String()
var err error
client, err = nxapi.NewClient(&c, timeout)
if err != nil {
return fmt.Errorf("failed to create nxapi client: %w", err)
}
}()
// This is currently defunct on NX-OS, as enabling POAP requires a `copy running-config startup-config` which we
// cannot issue via GNMI
// TODO add once NXAPI client is available
poap := BootPOAP("enable")
if err := p.client.Update(ctx, &poap); err != nil {
return err
}
return Reboot(ctx, p.conn)

_, err := client.Do(ctx, nxapi.NewRequest(
"boot poap enable",
"copy running-config startup-config",
).WithRollback(nxapi.Stop))
if err != nil {
return fmt.Errorf("failed to prepare device for reprovisioning: %w", err)
}

// Reboot is issued as a separate request because it actually restarts
// the device. The connection will drop before a response is received,
// so transport errors are expected and tolerated.
_, err = client.Do(ctx, nxapi.NewRequest("reload"))
if err != nil && !nxapi.IsTransportError(err) {
return fmt.Errorf("failed to reboot device: %w", err)
}

return nil
}

func (p *Provider) ListPorts(ctx context.Context) ([]provider.DevicePort, error) {
Expand Down
136 changes: 136 additions & 0 deletions internal/provider/cisco/nxos/reprovision_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors
// SPDX-License-Identifier: Apache-2.0

package nxos

import (
"encoding/json"
"fmt"
"net"
"net/http"
"net/http/httptest"
"slices"
"testing"

"github.com/ironcore-dev/network-operator/internal/deviceutil"
"github.com/ironcore-dev/network-operator/internal/transport/nxapi"
)

func TestReprovision(t *testing.T) {
t.Run("success with connection drop on reload", func(t *testing.T) {
var requests [][]string
called := 0

srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var cmds []struct {
Params struct {
Cmd string `json:"cmd"`
} `json:"params"`
}
if err := json.NewDecoder(r.Body).Decode(&cmds); err != nil {
t.Fatalf("failed to decode request: %v", err)
}

batch := make([]string, len(cmds))
for i, c := range cmds {
batch[i] = c.Params.Cmd
}
requests = append(requests, batch)
called++

if called == 1 {
w.Header().Set("Content-Type", "application/json-rpc")
fmt.Fprint(w, `[
{"jsonrpc":"2.0","result":null,"id":1},
{"jsonrpc":"2.0","result":null,"id":2}
]`)
return
}

// Simulate device going down by closing connection abruptly.
hijacker, ok := w.(http.Hijacker)
if !ok {
t.Fatal("server does not support hijacking")
}
conn, _, err := hijacker.Hijack()
if err != nil {
t.Fatalf("hijack failed: %v", err)
}
conn.Close()
}))
defer srv.Close()

_, port, _ := net.SplitHostPort(srv.Listener.Addr().String())

Check failure on line 63 in internal/provider/cisco/nxos/reprovision_test.go

View workflow job for this annotation

GitHub Actions / Check Go Code

Error return value of `net.SplitHostPort` is not checked (errcheck)
conn := &deviceutil.Connection{Address: srv.Listener.Addr().String(), Username: "admin", Password: "secret"}
client, err := nxapi.NewClient(conn, 0, nxapi.WithPort(port))
if err != nil {
t.Fatalf("failed to create nxapi client: %v", err)
}

p := &Provider{nxapi: client}
if err := p.Reprovision(t.Context(), conn); err != nil {
t.Fatalf("Reprovision returned unexpected error: %v", err)
}

if len(requests) != 2 {
t.Fatalf("expected 2 NXAPI requests, got %d", len(requests))
}
if !slices.Equal(requests[0], []string{"boot poap enable", "copy running-config startup-config"}) {
t.Errorf("prep batch = %v", requests[0])
}
if !slices.Equal(requests[1], []string{"reload"}) {
t.Errorf("reload request = %v", requests[1])
}
})

t.Run("prep batch RPC error fails", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json-rpc")
w.WriteHeader(http.StatusBadRequest)
fmt.Fprint(w, `[{"jsonrpc":"2.0","error":{"code":-32602,"message":"Invalid command"},"id":1}]`)
}))
defer srv.Close()

_, port, _ := net.SplitHostPort(srv.Listener.Addr().String())

Check failure on line 94 in internal/provider/cisco/nxos/reprovision_test.go

View workflow job for this annotation

GitHub Actions / Check Go Code

Error return value of `net.SplitHostPort` is not checked (errcheck)
conn := &deviceutil.Connection{Address: srv.Listener.Addr().String(), Username: "admin", Password: "secret"}
client, err := nxapi.NewClient(conn, 0, nxapi.WithPort(port))
if err != nil {
t.Fatalf("failed to create nxapi client: %v", err)
}

p := &Provider{nxapi: client}
if err := p.Reprovision(t.Context(), conn); err == nil {
t.Fatal("expected error from prep batch, got nil")
}
})

t.Run("reload RPC error fails", func(t *testing.T) {
called := 0
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
called++
w.Header().Set("Content-Type", "application/json-rpc")
if called == 1 {
fmt.Fprint(w, `[
{"jsonrpc":"2.0","result":null,"id":1},
{"jsonrpc":"2.0","result":null,"id":2}
]`)
return
}
w.WriteHeader(http.StatusBadRequest)
fmt.Fprint(w, `[{"jsonrpc":"2.0","error":{"code":-32602,"message":"Permission denied"},"id":1}]`)
}))
defer srv.Close()

_, port, _ := net.SplitHostPort(srv.Listener.Addr().String())

Check failure on line 124 in internal/provider/cisco/nxos/reprovision_test.go

View workflow job for this annotation

GitHub Actions / Check Go Code

Error return value of `net.SplitHostPort` is not checked (errcheck)
conn := &deviceutil.Connection{Address: srv.Listener.Addr().String(), Username: "admin", Password: "secret"}
client, err := nxapi.NewClient(conn, 0, nxapi.WithPort(port))
if err != nil {
t.Fatalf("failed to create nxapi client: %v", err)
}

p := &Provider{nxapi: client}
if err := p.Reprovision(t.Context(), conn); err == nil {
t.Fatal("expected error from reload RPC failure, got nil")
}
})
}
8 changes: 0 additions & 8 deletions internal/provider/cisco/nxos/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,6 @@ func (*FirmwareVersion) XPath() string {
return "System/showversion-items/nxosVersion"
}

var _ gnmiext.DataElement = (*BootPOAP)(nil)

type BootPOAP string

func (*BootPOAP) XPath() string {
return "/System/boot-items/poap"
}

type BootTime UnixTime

func (*BootTime) XPath() string {
Expand Down
46 changes: 44 additions & 2 deletions internal/transport/nxapi/nxapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
"time"
Expand All @@ -35,16 +36,41 @@
uri string
}

// Option configures a [Client].
type Option func(*options)

type options struct {
port string
}

// WithPort overrides the port in the connection address.
// This is useful when NX-API is reachable on a different
// port (e.g. 8443) than the default (80/443).
func WithPort(port string) Option {
return func(o *options) { o.port = port }
}

// NewClient creates a new [Client] for the given connection.
// If the connection has a TLS configuration set, HTTPS is used; otherwise HTTP.
// The underlying HTTP client uses timeout for all requests; a value of 0
// means no timeout.
func NewClient(c *deviceutil.Connection, timeout time.Duration) (*Client, error) {
func NewClient(c *deviceutil.Connection, timeout time.Duration, opts ...Option) (*Client, error) {
var o options
for _, opt := range opts {
opt(&o)
}

addr := c.Address
if o.port != "" {
host, _, _ := net.SplitHostPort(addr)

Check failure on line 65 in internal/transport/nxapi/nxapi.go

View workflow job for this annotation

GitHub Actions / Check Go Code

Error return value of `net.SplitHostPort` is not checked (errcheck)
addr = net.JoinHostPort(host, o.port)
}

proto := "http"
if c.TLS != nil {
proto = "https"
}
uri, err := url.JoinPath(proto+"://"+c.Address, "ins")
uri, err := url.JoinPath(proto+"://"+addr, "ins")
if err != nil {
return nil, fmt.Errorf("nxapi: failed to join path: %w", err)
}
Expand Down Expand Up @@ -257,3 +283,19 @@
func (e *HTTPError) Error() string {
return fmt.Sprintf("nxapi: non-2xx status code: %d - %s", e.Code, string(e.Body))
}

// IsTransportError reports whether err is a network-level transport error
// (connection reset, timeout, EOF) as opposed to a logical error returned
// by the NX-API endpoint (RPCError, HTTPError). This is useful for callers
// that issue disruptive commands (e.g. reboot) where the device going down
// mid-request is expected.
func IsTransportError(err error) bool {
if err == nil {
return false
}
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
return true
}
var netErr net.Error
return errors.As(err, &netErr)
}
Loading
Loading