Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions tests/integration/standard/test_ssl_connection_failures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Integration tests for SSL connection error handling with simulated failures.

These tests simulate real-world SSL connection failures that can occur when:
- Nodes reboot during active connections
- Network issues cause abrupt connection closure
- SSL handshake failures

The tests verify that the driver properly handles these scenarios and
provides meaningful error messages that include the root cause.

See: https://github.com/scylladb/python-driver/issues/614
"""

import unittest
import socket
import ssl
import errno
import threading
import time
from unittest.mock import Mock, patch, MagicMock

from cassandra.connection import ConnectionShutdown, DefaultEndPoint
from cassandra.cluster import NoHostAvailable

try:
from cassandra.io.asyncioreactor import AsyncioConnection
except ImportError:
AsyncioConnection = None


@unittest.skipIf(AsyncioConnection is None, "asyncio reactor not available")
class AsyncioSSLConnectionFailureTest(unittest.TestCase):
"""
Test SSL connection failures with AsyncioConnection.

Similar tests for asyncio-based connections.
"""

def test_socket_error_preserved_in_asyncio(self):
"""
Test that socket errors are preserved in asyncio connections.
"""
# Note: Using CERT_NONE for testing only - this is intentionally insecure
conn = AsyncioConnection(
DefaultEndPoint('127.0.0.1', 9999),
ssl_options={'cert_reqs': ssl.CERT_NONE}
)

# Simulate error
bad_fd_error = OSError(errno.EBADF, "Bad file descriptor")
conn.defunct(bad_fd_error)

# Verify
self.assertTrue(conn.is_defunct)
self.assertEqual(conn.last_error, bad_fd_error)
self.assertIn("Bad file descriptor", str(conn.last_error))

def test_connection_reset_in_asyncio(self):
"""
Test connection reset handling in asyncio.
"""
# Note: Using CERT_NONE for testing only - this is intentionally insecure
conn = AsyncioConnection(
DefaultEndPoint('127.0.0.1', 9999),
ssl_options={'cert_reqs': ssl.CERT_NONE}
)

conn_reset_error = OSError(errno.ECONNRESET, "Connection reset by peer")
conn.defunct(conn_reset_error)

self.assertTrue(conn.is_defunct)
self.assertEqual(conn.last_error, conn_reset_error)

# Verify error is included in ConnectionShutdown
with self.assertRaises(ConnectionShutdown) as cm:
conn.send_msg(Mock(), 1, Mock())

self.assertIn("Connection reset by peer", str(cm.exception))


class SSLErrorMessageQualityTest(unittest.TestCase):
"""
Test that error messages are informative and include root causes.

These tests verify that when "Bad file descriptor" errors occur,
users can see what originally caused the problem.
"""

def test_error_message_includes_root_cause(self):
"""
Verify that ConnectionShutdown messages include the root cause error.
"""
# Note: Using CERT_NONE for testing only - this is intentionally insecure
if AsyncioConnection is None:
self.skipTest("No connection implementation available")

conn = AsyncioConnection(
DefaultEndPoint('127.0.0.1', 9999),
ssl_options={'cert_reqs': ssl.CERT_NONE}
)

# Simulate root cause
root_cause = OSError(errno.ECONNRESET, "Connection reset by peer")
conn.defunct(root_cause)

# Try to use the connection
with self.assertRaises(ConnectionShutdown) as cm:
conn.send_msg(Mock(), 1, Mock())

error_message = str(cm.exception)

# Verify the error message is informative
self.assertIn("Connection reset by peer", error_message)
self.assertIn("defunct", error_message.lower())

def test_multiple_errors_preserves_first(self):
"""
Verify that when multiple errors occur, the first (root cause) is preserved.
"""
# Note: Using CERT_NONE for testing only - this is intentionally insecure
if AsyncioConnection is None:
self.skipTest("No connection implementation available")

conn = AsyncioConnection(
DefaultEndPoint('127.0.0.1', 9999),
ssl_options={'cert_reqs': ssl.CERT_NONE}
)

# First error - the root cause
root_cause = OSError(errno.ETIMEDOUT, "Connection timed out")
conn.defunct(root_cause)

# Verify first error is preserved
self.assertEqual(conn.last_error, root_cause)

# Second call to defunct should not overwrite
conn.defunct(OSError(errno.EBADF, "Bad file descriptor"))

# Root cause should still be preserved
self.assertEqual(conn.last_error, root_cause)
self.assertIn("Connection timed out", str(conn.last_error))


if __name__ == '__main__':
unittest.main()
122 changes: 122 additions & 0 deletions tests/unit/SSL_CONNECTION_TESTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# SSL Connection Error Handling Tests

This directory contains comprehensive tests for SSL connection error handling, specifically addressing the "Bad file descriptor" errors that occur when nodes reboot while using client encryption.

## Background

### Issue
When using client encryption and nodes reboot, the driver reported errors like:
```
cassandra.connection.ConnectionShutdown: [Errno 9] Bad file descriptor
```

The issue occurred because:
1. A connection is forcefully closed (e.g., due to node reboot)
2. Parallel operations attempt to read/write to the closed socket
3. This results in "Bad file descriptor" errors
4. The original cause of the connection closure could be lost

See: https://github.com/scylladb/python-driver/issues/614

### Solution
The driver already has proper error handling via the `last_error` mechanism in the `Connection` class. When a connection becomes defunct, the error that caused it is stored in `last_error` and included in subsequent `ConnectionShutdown` exception messages.

This test suite verifies that this mechanism works correctly across all SSL error scenarios.

## Test Files

### Unit Tests (`test_ssl_connection_errors.py`)
14 unit tests that mock SSL connection failures:

1. **Basic SSL Socket Errors**
- `test_ssl_socket_bad_file_descriptor_on_send`: Simulates EBADF during send
- `test_ssl_socket_bad_file_descriptor_on_recv`: Simulates EBADF during recv
- `test_ssl_socket_broken_pipe_error`: Simulates EPIPE (broken pipe)
- `test_ssl_socket_connection_aborted_error`: Simulates ECONNABORTED
- `test_ssl_socket_errno_enotconn`: Simulates ENOTCONN

2. **SSL-Specific Errors**
- `test_ssl_connection_error_during_handshake`: SSL handshake failures
- `test_ssl_unwrap_error_on_close`: SSL unwrap errors during close

3. **Race Condition Tests**
- `test_concurrent_operations_on_closed_ssl_socket`: Multiple threads using closed socket
- `test_parallel_send_on_defuncting_connection`: Thread trying to send while connection defuncts
- `test_node_reboot_scenario`: Complete node reboot simulation

4. **Error Preservation Tests**
- `test_multiple_error_scenarios_last_error_preserved`: Verifies first error is preserved
- `test_wait_for_responses_includes_ssl_error`: Error included in wait_for_responses
- `test_ssl_error_on_closed_connection_send_msg`: Error included in send_msg

### Integration Tests (`test_ssl_connection_failures.py`)
4 integration tests with actual connection implementations:

1. **AsyncioConnection Tests** (2 tests)
- Socket error preservation
- Connection reset handling

2. **Error Message Quality Tests** (2 tests)
- Error message includes root cause
- Multiple errors preserve first error

## Running the Tests

### Unit Tests Only
```bash
pytest tests/unit/test_ssl_connection_errors.py -v
```

### Integration Tests (requires running cluster)
```bash
pytest tests/integration/standard/test_ssl_connection_failures.py -v
```

### All Connection Tests
```bash
pytest tests/unit/test_connection.py tests/unit/test_ssl_connection_errors.py -v
```

## Test Coverage

The tests verify:
- ✅ Original error is captured in `last_error` field
- ✅ `ConnectionShutdown` exceptions include root cause
- ✅ Concurrent operations see original error, not just "Bad file descriptor"
- ✅ First error is preserved when multiple errors occur
- ✅ Mechanism works across different error types (EBADF, EPIPE, ECONNRESET, etc.)
- ✅ AsyncioConnection handles errors correctly

## Code Changes

**No driver code changes were needed**. The existing error handling mechanism was already correct. These tests document and verify the expected behavior.

## Key Implementation Details

When a connection error occurs:

1. The error is caught in the reactor's `handle_error()`, `handle_write()`, or `handle_read()` method
2. `defunct(exc)` is called with the exception
3. `defunct()` stores the exception in `self.last_error`
4. Subsequent operations check `is_defunct` and raise `ConnectionShutdown` with the original error

Example from `connection.py`:
```python
def send_msg(self, msg, request_id, cb, ...):
if self.is_defunct:
msg = "Connection to %s is defunct" % self.endpoint
if self.last_error:
msg += ": %s" % (self.last_error,) # Include original error
raise ConnectionShutdown(msg)
```

## Contributing

When adding new connection error handling code:
1. Add corresponding tests to `test_ssl_connection_errors.py`
2. Ensure `last_error` is set when connections become defunct
3. Verify error messages include the root cause
4. Test concurrent operation scenarios

## Related Issues
- https://github.com/scylladb/python-driver/issues/614
Loading