Skip to content
Snippets Groups Projects
Commit adefa23d authored by Gregor Cerar's avatar Gregor Cerar
Browse files

update

parent d06e3999
No related branches found
No related tags found
No related merge requests found
Pipeline #65838 failed
......@@ -15,6 +15,7 @@ from src.structures import TrafficStats
logging.config.dictConfig(LOGGING_CONFIG)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
T = TypeVar("T")
......@@ -41,24 +42,25 @@ async def _query_instant(
return None, None
except (KeyError, ValueError) as parse_error:
logger.error("Error parsing response data:", exc_info=True)
logger.error("Error parsing response data:", exc_info=parse_error)
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY, detail=f"Error parsing Prometheus data: {parse_error}"
) from parse_error
) from None
except httpx.RequestError as request_error:
logger.error("HTTP request error:", exc_info=True)
logger.error("HTTP request error:", exc_info=request_error)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"HTTP request error: {request_error}"
) from request_error
) from None
except httpx.HTTPStatusError as status_error:
logger.error(
f"HTTP status error: {status_error.response.status_code} - {status_error.response.text}", exc_info=True
f"HTTP status error: {status_error.response.status_code} - {status_error.response.text}",
exc_info=status_error,
)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"HTTP status error: {status_error}"
) from status_error
) from None
async def _query_series(client: httpx.AsyncClient, cfg: PromConfig, query: str) -> list[dict[str, Any]]:
......@@ -73,24 +75,25 @@ async def _query_series(client: httpx.AsyncClient, cfg: PromConfig, query: str)
return []
except (AttributeError, KeyError, ValueError) as parse_error:
logger.error("Error parsing series response data:", exc_info=True)
logger.error("Error parsing series response data:", exc_info=parse_error)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error parsing Prometheus series data."
) from parse_error
) from None
except httpx.RequestError as request_error:
logger.error(f"HTTP request error: {request_error}", exc_info=True)
logger.error(f"HTTP request error: {request_error}", exc_info=request_error)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"HTTP request error: {request_error}"
) from request_error
) from None
except httpx.HTTPStatusError as status_error:
logger.error(
f"HTTP status error: {status_error.response.status_code} - {status_error.response.text}", exc_info=True
f"HTTP status error: {status_error.response.status_code} - {status_error.response.text}",
exc_info=status_error,
)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"HTTP status error: {status_error}"
) from status_error
) from None
async def _get_pod_inbound_traffic_bps(
......@@ -149,9 +152,9 @@ async def _get_pod_traffic_per_link_bytes_per_second(
return traffic_bytes
except Exception as default_error:
logger.error("Error parsing response data:", exc_info=True)
raise HTTPException(status_code=500, detail="Error parsing data.") from default_error
except Exception as ex:
logger.error("Error parsing response data:", exc_info=ex)
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error parsing data.") from None
async def _get_pod_inbound_traffic_rate(
......@@ -198,12 +201,12 @@ async def _get_outbound_traffic_rate_by_status_code(
return traffic_res_rate_by_code
except (AttributeError, KeyError, ValueError) as parse_error:
logger.error(f"Error parsing series response data: {parse_error}")
except (AttributeError, KeyError, ValueError) as ex:
logger.error("Error parsing series response data:", exc_info=ex)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=f"Error parsing Prometheus series data: {parse_error}",
) from parse_error
detail="Error parsing Prometheus series data.",
) from None
async def _get_pod_outbound_traffic_latency(
......@@ -228,10 +231,10 @@ async def _get_pod_outbound_traffic_latency(
return results
except Exception as ex:
logger.error(f"Request error while fetching pod traffic response stats:\n{ex}")
logger.error("Request error while fetching pod traffic response stats.", exc_info=ex)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=f"Request error while fetching data: {ex}"
) from ex
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Request error while fetching data"
) from None
app = FastAPI()
......@@ -290,14 +293,14 @@ async def get_workloads_list():
return workloads
async def get_pod_by_name_or_uid(client: httpx.AsyncClient, pod_name_or_uid: str, cfg: PromConfig) -> dict:
async def get_pod_by_name_or_uid(client: httpx.AsyncClient, pod_name_or_uid: str, cfg: PromConfig) -> dict[str, Any]:
try:
uid = UUID(pod_name_or_uid)
query = f'kube_pod_labels{{uid="{uid}"}}'
pod, _ = await _query_instant(client, cfg, query)
if pod is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain pod info.")
return pod
......@@ -310,7 +313,7 @@ async def get_pod_by_name_or_uid(client: httpx.AsyncClient, pod_name_or_uid: str
pod, _ = await _query_instant(client, cfg, query)
if pod is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain pod info.")
return pod
......@@ -332,37 +335,42 @@ async def get_pod_details(pod_name_or_uid: str) -> Optional[PodInfoExtended]:
_, value = await _query_instant(client, cfg, query)
return int(value)
async def get_num_replicas(client, pod):
async def get_num_replicas(client, pod) -> Optional[int]:
# Step 1: Get the ReplicaSet owner of the pod
query = f'kube_pod_owner{{namespace="{pod["namespace"]}", pod="{pod["pod"]}"}}'
metric, _ = await _query_instant(client, cfg, query)
if metric is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
# raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain pod owner")
return None
replicaset_name = metric.get("owner_name")
if replicaset_name is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
# raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain pod owner")
return None
# Step 2: Get the Deployment owner of the ReplicaSet
query = f'kube_replicaset_owner{{namespace="{pod["namespace"]}", replicaset="{replicaset_name}"}}'
metric, _ = await _query_instant(client, cfg, query)
if metric is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
# raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain replica owner")
return None
deployment_name = metric.get("owner_name")
if deployment_name is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
# raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="Unable to obtain replica owner")
return None
# Step 3: Figure out how many deployments are there
query = f'kube_deployment_spec_replicas{{namespace="{pod["namespace"]}", deployment="{deployment_name}"}}'
_, value = await _query_instant(client, cfg, query)
if value is None:
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
# raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY)
return None
return int(value)
......@@ -370,8 +378,21 @@ async def get_pod_details(pod_name_or_uid: str) -> Optional[PodInfoExtended]:
pod = await get_pod_by_name_or_uid(client, pod_name_or_uid, cfg)
uid = pod.get("uid")
if uid is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Invalid UID '{uid}'",
)
pod_name = pod.get("pod")
if pod_name is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Could not obtain pod name '{uid}' -> '{pod_name}'",
)
# obtain last valid state
pod["current_state"] = get_current_state(client, uid)
......@@ -407,4 +428,7 @@ async def get_traffic_stats(pod_name_or_uid: str):
pod = await get_pod_by_name_or_uid(client, pod_name_or_uid, cfg)
results = await _get_pod_traffic_per_link_bytes_per_second(client, pod["pod"], timestamp, cfg)
if not results:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Traffic data not available.")
return results
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment