Skip to content

Commit a086d5b

Browse files
committed
Apply health check related tweaks
Signed-off-by: declark1 <[email protected]>
1 parent 9495057 commit a086d5b

File tree

9 files changed

+133
-187
lines changed

9 files changed

+133
-187
lines changed

src/clients.rs

Lines changed: 52 additions & 78 deletions
Large diffs are not rendered by default.

src/clients/chunker.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,14 @@ impl Client for ChunkerClient {
134134
}
135135
Err(status) => status.code(),
136136
};
137-
let health_status = if matches!(code, Code::Ok) {
137+
let status = if matches!(code, Code::Ok) {
138138
HealthStatus::Healthy
139139
} else {
140140
HealthStatus::Unhealthy
141141
};
142142
HealthCheckResult {
143-
health_status,
144-
response_code: ClientCode::Grpc(code),
143+
status,
144+
code: ClientCode::Grpc(code),
145145
reason: None,
146146
}
147147
}

src/clients/http.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ impl HttpClient {
3737
if let Ok(body) = response.json::<OptionalHealthCheckResponseBody>().await {
3838
// If the service provided a body, we only anticipate a minimal health status and optional reason.
3939
HealthCheckResult {
40-
health_status: body.health_status.clone(),
41-
response_code: ClientCode::Http(StatusCode::OK),
42-
reason: match body.health_status {
40+
status: body.status.clone(),
41+
code: ClientCode::Http(StatusCode::OK),
42+
reason: match body.status {
4343
HealthStatus::Healthy => None,
4444
_ => body.reason,
4545
},
4646
}
4747
} else {
4848
// If the service did not provide a body, we assume it is healthy.
4949
HealthCheckResult {
50-
health_status: HealthStatus::Healthy,
51-
response_code: ClientCode::Http(StatusCode::OK),
50+
status: HealthStatus::Healthy,
51+
code: ClientCode::Http(StatusCode::OK),
5252
reason: None,
5353
}
5454
}
@@ -59,7 +59,7 @@ impl HttpClient {
5959
// Regardless we can't be certain, so the reason is also provided.
6060
// TODO: We will likely circle back to re-evaluate this logic in the future
6161
// when we know more about how the client health results will be used.
62-
health_status: if response.status().as_u16() >= 500
62+
status: if response.status().as_u16() >= 500
6363
&& response.status().as_u16() < 600
6464
{
6565
HealthStatus::Unhealthy
@@ -74,7 +74,7 @@ impl HttpClient {
7474
);
7575
HealthStatus::Unknown
7676
},
77-
response_code: ClientCode::Http(response.status()),
77+
code: ClientCode::Http(response.status()),
7878
reason: Some(format!(
7979
"{}{}",
8080
response.error_for_status_ref().unwrap_err(),
@@ -94,10 +94,8 @@ impl HttpClient {
9494
Err(e) => {
9595
error!("error checking health: {}", e);
9696
HealthCheckResult {
97-
health_status: HealthStatus::Unknown,
98-
response_code: ClientCode::Http(
99-
e.status().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR),
100-
),
97+
status: HealthStatus::Unknown,
98+
code: ClientCode::Http(e.status().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR)),
10199
reason: Some(e.to_string()),
102100
}
103101
}

src/clients/nlp.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,14 +137,14 @@ impl Client for NlpClient {
137137
}
138138
Err(status) => status.code(),
139139
};
140-
let health_status = if matches!(code, Code::Ok) {
140+
let status = if matches!(code, Code::Ok) {
141141
HealthStatus::Healthy
142142
} else {
143143
HealthStatus::Unhealthy
144144
};
145145
HealthCheckResult {
146-
health_status,
147-
response_code: ClientCode::Grpc(code),
146+
status,
147+
code: ClientCode::Grpc(code),
148148
reason: None,
149149
}
150150
}

src/clients/tgis.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,14 +102,14 @@ impl Client for TgisClient {
102102
}
103103
Err(status) => status.code(),
104104
};
105-
let health_status = if matches!(code, Code::Ok) {
105+
let status = if matches!(code, Code::Ok) {
106106
HealthStatus::Healthy
107107
} else {
108108
HealthStatus::Unhealthy
109109
};
110110
HealthCheckResult {
111-
health_status,
112-
response_code: ClientCode::Grpc(code),
111+
status,
112+
code: ClientCode::Grpc(code),
113113
reason: None,
114114
}
115115
}

src/health.rs

Lines changed: 51 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ use std::{collections::HashMap, fmt::Display};
33
use axum::http::StatusCode;
44
use serde::{ser::SerializeStruct, Deserialize, Serialize};
55
use tonic::Code;
6-
use tracing::{error, warn};
76

8-
use crate::{clients::ClientCode, pb::grpc::health::v1::HealthCheckResponse};
7+
use crate::{
8+
clients::ClientCode,
9+
pb::grpc::health::v1::{health_check_response::ServingStatus, HealthCheckResponse},
10+
};
911

1012
/// Health status determined for or returned by a client service.
1113
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -31,50 +33,30 @@ impl Display for HealthStatus {
3133

3234
impl From<HealthCheckResponse> for HealthStatus {
3335
fn from(value: HealthCheckResponse) -> Self {
34-
// NOTE: gRPC Health v1 status codes: 0 = UNKNOWN, 1 = SERVING, 2 = NOT_SERVING, 3 = SERVICE_UNKNOWN
35-
match value.status {
36-
1 => Self::Healthy,
37-
2 => Self::Unhealthy,
38-
_ => Self::Unknown,
36+
match value.status() {
37+
ServingStatus::Serving => Self::Healthy,
38+
ServingStatus::NotServing => Self::Unhealthy,
39+
ServingStatus::Unknown | ServingStatus::ServiceUnknown => Self::Unknown,
3940
}
4041
}
4142
}
4243

4344
impl From<StatusCode> for HealthStatus {
4445
fn from(code: StatusCode) -> Self {
4546
match code.as_u16() {
46-
200 => Self::Healthy,
47-
201..=299 => {
48-
warn!(
49-
"Unexpected HTTP successful health check response status code: {}",
50-
code
51-
);
52-
Self::Healthy
53-
}
54-
503 => Self::Unhealthy,
55-
500..=502 | 504..=599 => {
56-
warn!(
57-
"Unexpected HTTP server error health check response status code: {}",
58-
code
59-
);
60-
Self::Unhealthy
61-
}
62-
_ => {
63-
warn!(
64-
"Unexpected HTTP client error health check response status code: {}",
65-
code
66-
);
67-
Self::Unknown
68-
}
47+
200..=299 => Self::Healthy,
48+
500..=599 => Self::Unhealthy,
49+
_ => Self::Unknown,
6950
}
7051
}
7152
}
7253

73-
/// Holds health check results for all clients.
54+
/// A cache to hold the latest health check results for each client service.
55+
/// Orchestrator has a reference-counted mutex-protected instance of this cache.
7456
#[derive(Debug, Clone, Default, Serialize)]
75-
pub struct ClientHealth(HashMap<String, HealthCheckResult>);
57+
pub struct HealthCheckCache(HashMap<String, HealthCheckResult>);
7658

77-
impl ClientHealth {
59+
impl HealthCheckCache {
7860
pub fn new() -> Self {
7961
Self(HashMap::new())
8062
}
@@ -83,82 +65,78 @@ impl ClientHealth {
8365
Self(HashMap::with_capacity(capacity))
8466
}
8567

68+
/// Returns `true` if all services are healthy or unknown.
8669
pub fn healthy(&self) -> bool {
8770
!self
8871
.0
8972
.iter()
90-
.any(|(_, value)| matches!(value.health_status, HealthStatus::Unhealthy))
73+
.any(|(_, value)| matches!(value.status, HealthStatus::Unhealthy))
9174
}
9275
}
9376

94-
impl std::ops::Deref for ClientHealth {
77+
impl std::ops::Deref for HealthCheckCache {
9578
type Target = HashMap<String, HealthCheckResult>;
9679

9780
fn deref(&self) -> &Self::Target {
9881
&self.0
9982
}
10083
}
10184

102-
impl std::ops::DerefMut for ClientHealth {
85+
impl std::ops::DerefMut for HealthCheckCache {
10386
fn deref_mut(&mut self) -> &mut Self::Target {
10487
&mut self.0
10588
}
10689
}
10790

91+
impl Display for HealthCheckCache {
92+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93+
write!(f, "{}", serde_json::to_string(self).unwrap())
94+
}
95+
}
96+
97+
impl HealthCheckResponse {
98+
pub fn reason(&self) -> Option<String> {
99+
let status = self.status();
100+
match status {
101+
ServingStatus::Serving => None,
102+
_ => Some(status.as_str_name().to_string()),
103+
}
104+
}
105+
}
106+
108107
/// Result of a health check request.
109108
#[derive(Debug, Clone)]
110109
pub struct HealthCheckResult {
111110
/// Overall health status of client service.
112111
/// `HEALTHY`, `UNHEALTHY`, or `UNKNOWN`.
113-
pub health_status: HealthStatus,
112+
pub status: HealthStatus,
114113
/// Response code of the latest health check request.
115114
/// This should be omitted on serialization if the health check was successful (when the response is `HTTP 200 OK` or `gRPC 0 OK`).
116-
pub response_code: ClientCode,
115+
pub code: ClientCode,
117116
/// Optional reason for the health check result status being `UNHEALTHY` or `UNKNOWN`.
118117
/// May be omitted overall if the health check was successful.
119118
pub reason: Option<String>,
120119
}
121120

122-
impl HealthCheckResult {
123-
pub fn reason_from_health_check_response(response: &HealthCheckResponse) -> Option<String> {
124-
match response.status {
125-
0 => Some("from gRPC health check serving status: UNKNOWN".to_string()),
126-
1 => None,
127-
2 => Some("from gRPC health check serving status: NOT_SERVING".to_string()),
128-
3 => Some("from gRPC health check serving status: SERVICE_UNKNOWN".to_string()),
129-
_ => {
130-
error!(
131-
"Unexpected gRPC health check serving status: {}",
132-
response.status
133-
);
134-
Some(format!(
135-
"Unexpected gRPC health check serving status: {}",
136-
response.status
137-
))
138-
}
139-
}
140-
}
141-
}
142-
143121
impl Serialize for HealthCheckResult {
144122
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
145123
where
146124
S: serde::Serializer,
147125
{
148-
match self.health_status {
149-
HealthStatus::Healthy => self.health_status.serialize(serializer),
126+
match self.status {
127+
HealthStatus::Healthy => self.status.serialize(serializer),
150128
_ => match &self.reason {
151129
Some(reason) => {
152130
let mut state = serializer.serialize_struct("HealthCheckResult", 3)?;
153-
state.serialize_field("health_status", &self.health_status)?;
154-
state.serialize_field("response_code", &self.response_code.to_string())?;
131+
state.serialize_field("status", &self.status)?;
132+
state.serialize_field("code", &self.code.to_string())?;
155133
state.serialize_field("reason", reason)?;
156134
state.end()
157135
}
158136
None => {
159137
let mut state = serializer.serialize_struct("HealthCheckResult", 2)?;
160-
state.serialize_field("health_status", &self.health_status)?;
161-
state.serialize_field("response_code", &self.response_code.to_string())?;
138+
state.serialize_field("status", &self.status)?;
139+
state.serialize_field("code", &self.code.to_string())?;
162140
state.end()
163141
}
164142
},
@@ -169,12 +147,8 @@ impl Serialize for HealthCheckResult {
169147
impl Display for HealthCheckResult {
170148
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171149
match &self.reason {
172-
Some(reason) => write!(
173-
f,
174-
"{} ({})\n\t\t\t{}",
175-
self.health_status, self.response_code, reason
176-
),
177-
None => write!(f, "{} ({})", self.health_status, self.response_code),
150+
Some(reason) => write!(f, "{} ({})\n\t\t\t{}", self.status, self.code, reason),
151+
None => write!(f, "{} ({})", self.status, self.code),
178152
}
179153
}
180154
}
@@ -185,14 +159,14 @@ impl From<Result<tonic::Response<HealthCheckResponse>, tonic::Status>> for Healt
185159
Ok(response) => {
186160
let response = response.into_inner();
187161
Self {
188-
health_status: response.into(),
189-
response_code: ClientCode::Grpc(Code::Ok),
190-
reason: Self::reason_from_health_check_response(&response),
162+
status: response.into(),
163+
code: ClientCode::Grpc(Code::Ok),
164+
reason: response.reason(),
191165
}
192166
}
193167
Err(status) => Self {
194-
health_status: HealthStatus::Unknown,
195-
response_code: ClientCode::Grpc(status.code()),
168+
status: HealthStatus::Unknown,
169+
code: ClientCode::Grpc(status.code()),
196170
reason: Some(format!("gRPC health check failed: {}", status)),
197171
},
198172
}
@@ -205,7 +179,7 @@ impl From<Result<tonic::Response<HealthCheckResponse>, tonic::Status>> for Healt
205179
#[derive(Deserialize)]
206180
pub struct OptionalHealthCheckResponseBody {
207181
/// `HEALTHY`, `UNHEALTHY`, or `UNKNOWN`. Although `HEALTHY` is already implied without a body.
208-
pub health_status: HealthStatus,
182+
pub status: HealthStatus,
209183
/// Optional reason for the health check result status being `UNHEALTHY` or `UNKNOWN`.
210184
/// May be omitted overall if the health check was successful.
211185
#[serde(default)]

src/models.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ use serde::{Deserialize, Serialize};
2323

2424
use crate::{
2525
clients::detector::{ContentAnalysisResponse, ContextType},
26-
health::ClientHealth,
26+
health::HealthCheckCache,
2727
pb,
2828
};
2929

3030
#[derive(Clone, Debug, Serialize)]
3131
pub struct InfoResponse {
32-
pub client_health: ClientHealth,
32+
pub services: HealthCheckCache,
3333
}
3434

3535
#[derive(Debug, Clone, Deserialize)]

0 commit comments

Comments
 (0)