healthchecks_healthchecks/hc/lib/tests/test_s3.py
Pēteris Caune b5d4f2aa74
Implement S3 outage mitigation
The mitigation is to not attempt GetObject calls if there have
been more than 3 S3 errors in the past minute. The implementation
uses the TokenBucket class that we normally use for rate-limiting.

An example scenario this is trying to avoid is:

* the S3 service becomes unavailable for 10 straight minutes.
  Each S3 request hangs until we hit the configured timeout
  (settings.S3_TIMEOUT)
* A client is frequently requesting the "Get ping's logged body"
  API call. Each call causes one webserver process to become
  busy for S3_TIMEOUT seconds.
* All workers become busy, request backlog fills up, our service
  starts returning 5xx errors.

With the mitigation, during an S3 outage, only the calls that
retrieve ping's logged body will return 503, the rest of the service
will (hopefully) work normally.

Fixes: #1114
2025-01-13 14:21:42 +02:00

63 lines
2.5 KiB
Python

from __future__ import annotations
from unittest import skipIf
from unittest.mock import call, Mock, patch
from django.test import TestCase
from django.test.utils import override_settings
from hc.lib.s3 import get_object, GetObjectError
try:
from minio import InvalidResponseError, S3Error
from urllib3.exceptions import InvalidHeader, ProtocolError
have_minio = True
except ImportError:
have_minio = False
@skipIf(not have_minio, "minio not installed")
@override_settings(S3_BUCKET="dummy-bucket")
class S3TestCase(TestCase):
@patch("hc.lib.s3.statsd")
@patch("hc.lib.s3._client")
def test_get_object_handles_nosuchkey(self, client: Mock, statsd: Mock) -> None:
e = S3Error("NoSuchKey", "b", "c", "d", "e", Mock())
client.get_object.return_value.read = Mock(side_effect=e)
self.assertIsNone(get_object("dummy-code", 1))
# Should not increase the error counter for NoSuchKey responses
self.assertEqual(statsd.incr.mock_calls, [call("hc.lib.s3.getObject")])
@patch("hc.lib.s3.statsd")
@patch("hc.lib.s3._client")
def test_get_object_handles_s3error(self, client: Mock, statsd: Mock) -> None:
e = S3Error("DummyError", "b", "c", "d", "e", Mock())
client.get_object.return_value.read = Mock(side_effect=e)
with self.assertRaises(GetObjectError):
get_object("dummy-code", 1)
client.get_object.assert_called_once()
statsd.incr.assert_called_once()
@patch("hc.lib.s3._client")
def test_get_object_handles_urllib_exceptions(self, client: Mock) -> None:
for e in [ProtocolError, InvalidHeader]:
client.get_object.reset_mock()
client.get_object.return_value.read = Mock(side_effect=e)
with self.assertRaises(GetObjectError):
get_object("dummy-code", 1)
client.get_object.assert_called_once()
@patch("hc.lib.s3._client")
def test_get_object_handles_invalidresponseerror(self, client: Mock) -> None:
e = InvalidResponseError(123, "text/plain", None)
client.get_object.return_value.read = Mock(side_effect=e)
with self.assertRaises(GetObjectError):
get_object("dummy-code", 1)
client.get_object.assert_called_once()
@override_settings(S3_BUCKET=None)
@patch("hc.lib.s3._client")
def test_get_object_handles_no_s3_configuration(self, client: Mock) -> None:
self.assertIsNone(get_object("dummy-code", 1))
client.get_object.assert_not_called()