Skip to content
1 change: 1 addition & 0 deletions crates/router/src/routes/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ counter_metric!(APPLE_PAY_MANUAL_FLOW_FAILED_PAYMENT, GLOBAL_METER);
counter_metric!(APPLE_PAY_SIMPLIFIED_FLOW_FAILED_PAYMENT, GLOBAL_METER);

// Metrics for Auto Retries
counter_metric!(AUTO_RETRY_CONNECTION_CLOSED, GLOBAL_METER);
counter_metric!(AUTO_RETRY_ELIGIBLE_REQUEST_COUNT, GLOBAL_METER);
counter_metric!(AUTO_RETRY_GSM_MISS_COUNT, GLOBAL_METER);
counter_metric!(AUTO_RETRY_GSM_FETCH_FAILURE_COUNT, GLOBAL_METER);
Expand Down
103 changes: 81 additions & 22 deletions crates/router/src/services/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,7 @@ pub async fn send_request(
key: consts::METRICS_HOST_TAG_NAME.into(),
value: url.host_str().unwrap_or_default().to_string().into(),
};

let send_request = async {
let request = {
match request.method {
Method::Get => client.get(url),
Method::Post => {
Expand Down Expand Up @@ -607,32 +606,92 @@ pub async fn send_request(
.timeout(Duration::from_secs(
option_timeout_secs.unwrap_or(crate::consts::REQUEST_TIME_OUT),
))
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosed
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
};

metrics_request::record_operation_time(
// We cannot clone the request type, because it has Form trait which is not clonable. So we are cloning the request builder here.
let cloned_send_request = request.try_clone().map(|cloned_request| async {
cloned_request
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed_before_message_could_complete(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosedIncompleteMessage
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
});

let send_request = async {
request
.send()
.await
.map_err(|error| match error {
error if error.is_timeout() => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::RequestTimeoutReceived
}
error if is_connection_closed_before_message_could_complete(&error) => {
metrics::REQUEST_BUILD_FAILURE.add(&metrics::CONTEXT, 1, &[]);
errors::ApiClientError::ConnectionClosedIncompleteMessage
}
_ => errors::ApiClientError::RequestNotSent(error.to_string()),
})
.into_report()
.attach_printable("Unable to send request to connector")
};

let response = metrics_request::record_operation_time(
send_request,
&metrics::EXTERNAL_REQUEST_TIME,
&[metrics_tag],
&[metrics_tag.clone()],
)
.await
.await;
// Retry once if the response is connection closed.
//
// This is just due to the racy nature of networking.
// hyper has a connection pool of idle connections, and it selected one to send your request.
// Most of the time, hyper will receive the server’s FIN and drop the dead connection from its pool.
// But occasionally, a connection will be selected from the pool
// and written to at the same time the server is deciding to close the connection.
// Since hyper already wrote some of the request,
// it can’t really retry it automatically on a new connection, since the server may have acted already
match response {
Ok(response) => Ok(response),
Err(error)
if error.current_context()
== &errors::ApiClientError::ConnectionClosedIncompleteMessage =>
{
metrics::AUTO_RETRY_CONNECTION_CLOSED.add(&metrics::CONTEXT, 1, &[]);
match cloned_send_request {
Some(cloned_request) => {
logger::info!(
"Retrying request due to connection closed before message could complete"
);
metrics_request::record_operation_time(
cloned_request,
&metrics::EXTERNAL_REQUEST_TIME,
&[metrics_tag],
)
.await
}
None => {
logger::info!("Retrying request due to connection closed before message could complete failed as request is not clonable");
Err(error)
}
}
}
err @ Err(_) => err,
}
}

fn is_connection_closed(error: &reqwest::Error) -> bool {
fn is_connection_closed_before_message_could_complete(error: &reqwest::Error) -> bool {
let mut source = error.source();
while let Some(err) = source {
if let Some(hyper_err) = err.downcast_ref::<hyper::Error>() {
Expand Down Expand Up @@ -1674,7 +1733,7 @@ pub fn build_redirection_form(

// Initialize the ThreeDSService
const threeDS = gateway.get3DSecure();

const options = {{
customerVaultId: '{customer_vault_id}',
currency: '{currency}',
Expand Down
6 changes: 3 additions & 3 deletions crates/storage_impl/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ pub enum ApiClientError {
RequestTimeoutReceived,

#[error("connection closed before a message could complete")]
ConnectionClosed,
ConnectionClosedIncompleteMessage,

#[error("Server responded with Internal Server Error")]
InternalServerErrorReceived,
Expand All @@ -285,8 +285,8 @@ impl ApiClientError {
pub fn is_upstream_timeout(&self) -> bool {
self == &Self::RequestTimeoutReceived
}
pub fn is_connection_closed(&self) -> bool {
self == &Self::ConnectionClosed
pub fn is_connection_closed_before_message_could_complete(&self) -> bool {
self == &Self::ConnectionClosedIncompleteMessage
}
}

Expand Down