feat: Refactor sensor health checker for improved response handling
Some checks failed
Integration Tests / integration-tests (push) Failing after 19s
Integration Tests / performance-tests (push) Has been skipped
Service Adapters (Python FastAPI) / test (3.11) (push) Failing after 55s
Service Adapters (Python FastAPI) / test (3.12) (push) Failing after 1m0s
Service Adapters (Python FastAPI) / test (3.13) (push) Failing after 58s
Service Adapters (Python FastAPI) / build (push) Has been skipped

### Summary of Changes
- Introduced `_handle_sensor_response` and `_handle_successful_response` methods to streamline the processing of sensor API responses.
- Enhanced readability and maintainability by breaking down complex logic into smaller, focused methods.
- Added specific parsing methods for Home Assistant sensors, including `_parse_home_assistant_sensor`, `_parse_uptime_sensor`, and others to improve clarity and separation of concerns.

### Expected Results
- Improved code organization, making it easier to understand and extend the health checker functionality.
- Enhanced error handling and response management for sensor data, leading to more robust health checks.
This commit is contained in:
GSRN
2025-09-18 13:11:50 +02:00
parent 7eaea39928
commit 227597b512

View File

@@ -9,6 +9,7 @@ import logging
import time
from typing import Any, Dict, Optional
import httpx
from httpx import HTTPError, TimeoutException
from utils.time_formatter import format_uptime_for_frontend
@@ -63,7 +64,56 @@ class SensorHealthChecker(BaseHealthChecker):
logger.info(f"Service {service_name} sensor responded with status {response.status_code} in {response_time:.3f}s")
return self._handle_sensor_response(response, service_name, sensor_entity, response_time)
except TimeoutException:
logger.error(f"Service {service_name} timed out after {self.timeout}s")
return HealthCheckResult("timeout", error=f"Request timed out after {self.timeout}s")
except HTTPError as e:
logger.error(f"HTTP error checking {service_name}: {str(e)}")
return HealthCheckResult("error", error=f"HTTP error: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error checking {service_name}: {str(e)}")
return HealthCheckResult("error", error=f"Unexpected error: {str(e)}")
def _handle_sensor_response(self, response: httpx.Response, service_name: str, sensor_entity: str, response_time: float) -> HealthCheckResult:
"""
Handle sensor API response and return appropriate HealthCheckResult.
Args:
response: HTTP response from sensor API
service_name: Name of the service
sensor_entity: Sensor entity ID
response_time: Response time in seconds
Returns:
HealthCheckResult with status information
"""
if response.status_code == 200:
return self._handle_successful_response(response, service_name, sensor_entity, response_time)
elif response.status_code == 401:
logger.warning(f"Service {service_name} returned 401 - authentication required")
return HealthCheckResult("unauthorized", response_time, "Authentication required")
elif response.status_code == 404:
logger.warning(f"Service {service_name} sensor {sensor_entity} not found")
return HealthCheckResult("error", response_time, f"Sensor {sensor_entity} not found")
else:
logger.warning(f"Service {service_name} returned {response.status_code}")
return HealthCheckResult("unhealthy", response_time, f"HTTP {response.status_code}")
def _handle_successful_response(self, response: httpx.Response, service_name: str, sensor_entity: str, response_time: float) -> HealthCheckResult:
"""
Handle successful sensor API response (200 status).
Args:
response: HTTP response from sensor API
service_name: Name of the service
sensor_entity: Sensor entity ID
response_time: Response time in seconds
Returns:
HealthCheckResult with parsed sensor data
"""
# Parse sensor data
sensor_data = response.json()
logger.debug(f"Raw sensor data for {service_name}: {sensor_data}")
@@ -86,25 +136,6 @@ class SensorHealthChecker(BaseHealthChecker):
}
return HealthCheckResult(health_status, response_time, metadata=metadata, uptime=formatted_uptime)
elif response.status_code == 401:
logger.warning(f"Service {service_name} returned 401 - authentication required")
return HealthCheckResult("unauthorized", response_time, "Authentication required")
elif response.status_code == 404:
logger.warning(f"Service {service_name} sensor {sensor_entity} not found")
return HealthCheckResult("error", response_time, f"Sensor {sensor_entity} not found")
else:
logger.warning(f"Service {service_name} returned {response.status_code}")
return HealthCheckResult("unhealthy", response_time, f"HTTP {response.status_code}")
except TimeoutException:
logger.error(f"Service {service_name} timed out after {self.timeout}s")
return HealthCheckResult("timeout", error=f"Request timed out after {self.timeout}s")
except HTTPError as e:
logger.error(f"HTTP error checking {service_name}: {str(e)}")
return HealthCheckResult("error", error=f"HTTP error: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error checking {service_name}: {str(e)}")
return HealthCheckResult("error", error=f"Unexpected error: {str(e)}")
def _parse_sensor_data(self, sensor_data: Dict[str, Any], service_name: str) -> str:
"""
@@ -126,20 +157,66 @@ class SensorHealthChecker(BaseHealthChecker):
# Service-specific sensor parsing
if service_name == "home_assistant":
# For HA, check uptime sensor or system health
return self._parse_home_assistant_sensor(state, entity_id, attributes)
else:
return self._parse_generic_sensor(state)
except Exception as e:
logger.error(f"Could not parse sensor data from {service_name}: {e}")
return "unhealthy"
def _parse_home_assistant_sensor(self, state: str, entity_id: str, attributes: Dict[str, Any]) -> str:
"""
Parse Home Assistant specific sensor data.
Args:
state: Sensor state value
entity_id: Sensor entity ID
attributes: Sensor attributes
Returns:
Health status string
"""
if "uptime" in entity_id:
# Check if this is a timestamp sensor (device_class: timestamp)
return self._parse_uptime_sensor(state, attributes)
elif "system" in entity_id:
return self._parse_system_sensor(state)
else:
return self._parse_generic_sensor(state)
def _parse_uptime_sensor(self, state: str, attributes: Dict[str, Any]) -> str:
"""
Parse uptime sensor data (timestamp or numeric).
Args:
state: Sensor state value
attributes: Sensor attributes
Returns:
Health status string
"""
device_class = attributes.get("device_class", "")
if device_class == "timestamp":
# Timestamp sensor - if it has a valid timestamp, service is healthy
return self._parse_timestamp_sensor(state)
else:
return self._parse_numeric_uptime_sensor(state)
def _parse_timestamp_sensor(self, state: str) -> str:
"""
Parse timestamp sensor data.
Args:
state: Sensor state value (timestamp string)
Returns:
Health status string
"""
try:
from datetime import datetime
from datetime import datetime, timezone
# Try to parse the timestamp
parsed_time = datetime.fromisoformat(state.replace("Z", "+00:00"))
# If we can parse it and it's recent (within last 24 hours), it's healthy
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
time_diff = now - parsed_time
is_healthy = time_diff.total_seconds() < 86400 # 24 hours
@@ -148,8 +225,17 @@ class SensorHealthChecker(BaseHealthChecker):
except (ValueError, TypeError) as e:
logger.warning(f"Could not parse timestamp '{state}': {e}")
return "unhealthy"
else:
# Numeric uptime sensor - check if it's a valid number
def _parse_numeric_uptime_sensor(self, state: str) -> str:
"""
Parse numeric uptime sensor data.
Args:
state: Sensor state value (numeric string)
Returns:
Health status string
"""
try:
uptime_seconds = float(state)
# If uptime > 0, service is healthy
@@ -159,26 +245,35 @@ class SensorHealthChecker(BaseHealthChecker):
except ValueError:
logger.warning(f"Uptime sensor state '{state}' is not a valid number")
return "unhealthy"
elif "system" in entity_id:
# System health sensor
def _parse_system_sensor(self, state: str) -> str:
"""
Parse system health sensor data.
Args:
state: Sensor state value
Returns:
Health status string
"""
is_healthy = state.lower() in ["ok", "healthy", "online"]
logger.debug(f"System sensor: state={state}, healthy: {is_healthy}")
return "healthy" if is_healthy else "unhealthy"
else:
# Generic sensor - check if state indicates health
is_healthy = state.lower() not in ["unavailable", "unknown", "off"]
logger.debug(f"Generic sensor: state={state}, healthy: {is_healthy}")
return "healthy" if is_healthy else "unhealthy"
else:
# Generic sensor parsing
def _parse_generic_sensor(self, state: str) -> str:
"""
Parse generic sensor data.
Args:
state: Sensor state value
Returns:
Health status string
"""
is_healthy = state.lower() not in ["unavailable", "unknown", "off", "error"]
logger.debug(f"Generic sensor: state={state}, healthy: {is_healthy}")
return "healthy" if is_healthy else "unhealthy"
except Exception as e:
logger.error(f"Could not parse sensor data from {service_name}: {e}")
return "unhealthy"
def _extract_uptime_info(self, sensor_data: Dict[str, Any], service_name: str) -> Optional[str]:
"""
Extract uptime information from sensor data for top-level display.