zebra_network/peer/
error.rs

1//! Peer-related errors.
2
3use std::{borrow::Cow, sync::Arc};
4
5use thiserror::Error;
6
7use tracing_error::TracedError;
8use zebra_chain::serialization::SerializationError;
9
10use crate::protocol::external::InventoryHash;
11
12/// A wrapper around `Arc<PeerError>` that implements `Error`.
13#[derive(Error, Debug, Clone)]
14#[error(transparent)]
15pub struct SharedPeerError(Arc<TracedError<PeerError>>);
16
17impl<E> From<E> for SharedPeerError
18where
19    PeerError: From<E>,
20{
21    fn from(source: E) -> Self {
22        Self(Arc::new(TracedError::from(PeerError::from(source))))
23    }
24}
25
26impl SharedPeerError {
27    /// Returns a debug-formatted string describing the inner [`PeerError`].
28    ///
29    /// Unfortunately, [`TracedError`] makes it impossible to get a reference to the original error.
30    pub fn inner_debug(&self) -> String {
31        format!("{:?}", self.0.as_ref())
32    }
33}
34
35/// An error related to peer connection handling.
36#[derive(Error, Debug)]
37#[allow(dead_code)]
38pub enum PeerError {
39    /// The remote peer closed the connection.
40    #[error("Peer closed connection")]
41    ConnectionClosed,
42
43    /// Zebra dropped the [`Connection`](crate::peer::Connection).
44    #[error("Internal connection dropped")]
45    ConnectionDropped,
46
47    /// Zebra dropped the [`Client`](crate::peer::Client).
48    #[error("Internal client dropped")]
49    ClientDropped,
50
51    /// A [`Client`](crate::peer::Client)'s internal connection task exited.
52    #[error("Internal peer connection task exited")]
53    ConnectionTaskExited,
54
55    /// Zebra's [`Client`](crate::peer::Client) cancelled its heartbeat task.
56    #[error("Internal client cancelled its heartbeat task")]
57    ClientCancelledHeartbeatTask,
58
59    /// Zebra's internal heartbeat task exited.
60    #[error("Internal heartbeat task exited with message: {0:?}")]
61    HeartbeatTaskExited(String),
62
63    /// Sending a message to a remote peer took too long.
64    #[error("Sending Client request timed out")]
65    ConnectionSendTimeout,
66
67    /// Receiving a response to a [`Client`](crate::peer::Client) request took too long.
68    #[error("Receiving client response timed out")]
69    ConnectionReceiveTimeout,
70
71    /// A serialization error occurred while reading or writing a message.
72    #[error("Serialization error: {0}")]
73    Serialization(#[from] SerializationError),
74
75    /// A badly-behaved remote peer sent a handshake message after the handshake was
76    /// already complete.
77    #[error("Remote peer sent handshake messages after handshake")]
78    DuplicateHandshake,
79
80    /// This node's internal services were overloaded, so the connection was dropped
81    /// to shed load.
82    #[error("Internal services over capacity")]
83    Overloaded,
84
85    /// There are no ready remote peers.
86    #[error("No ready peers available")]
87    NoReadyPeers,
88
89    /// This peer request's caused an internal service timeout, so the connection was dropped
90    /// to shed load or prevent attacks.
91    #[error("Internal services timed out")]
92    InboundTimeout,
93
94    /// This node's internal services are no longer able to service requests.
95    #[error("Internal services have failed or shutdown")]
96    ServiceShutdown,
97
98    /// We requested data, but the peer replied with a `notfound` message.
99    /// (Or it didn't respond before the request finished.)
100    ///
101    /// This error happens when the peer doesn't have any of the requested data,
102    /// so that the original request can be retried.
103    ///
104    /// This is a temporary error.
105    ///
106    /// Zebra can try different peers if the request is retried,
107    /// or peers can download and verify the missing data.
108    ///
109    /// If the peer has some of the data, the request returns an [`Ok`] response,
110    /// with any `notfound` data is marked as [`Missing`][1].
111    ///
112    /// [1]: crate::protocol::internal::InventoryResponse::Missing
113    #[error("Remote peer could not find any of the items: {0:?}")]
114    NotFoundResponse(Vec<InventoryHash>),
115
116    /// We requested data, but all our ready peers are marked as recently
117    /// [`Missing`][1] that data in our local inventory registry.
118    ///
119    /// This is a temporary error.
120    ///
121    /// Peers with the inventory can finish their requests and become ready, or
122    /// other peers can download and verify the missing data.
123    ///
124    /// # Correctness
125    ///
126    /// This error is produced using Zebra's local inventory registry, without
127    /// contacting any peers.
128    ///
129    /// Client responses containing this error must not be used to update the
130    /// inventory registry. This makes sure that we eventually expire our local
131    /// cache of missing inventory, and send requests to peers again.
132    ///
133    /// [1]: crate::protocol::internal::InventoryResponse::Missing
134    #[error("All ready peers are registered as recently missing these items: {0:?}")]
135    NotFoundRegistry(Vec<InventoryHash>),
136}
137
138impl PeerError {
139    /// Returns the Zebra internal handler type as a string.
140    pub fn kind(&self) -> Cow<'static, str> {
141        match self {
142            PeerError::ConnectionClosed => "ConnectionClosed".into(),
143            PeerError::ConnectionDropped => "ConnectionDropped".into(),
144            PeerError::ClientDropped => "ClientDropped".into(),
145            PeerError::ClientCancelledHeartbeatTask => "ClientCancelledHeartbeatTask".into(),
146            PeerError::HeartbeatTaskExited(_) => "HeartbeatTaskExited".into(),
147            PeerError::ConnectionTaskExited => "ConnectionTaskExited".into(),
148            PeerError::ConnectionSendTimeout => "ConnectionSendTimeout".into(),
149            PeerError::ConnectionReceiveTimeout => "ConnectionReceiveTimeout".into(),
150            // TODO: add error kinds or summaries to `SerializationError`
151            PeerError::Serialization(inner) => format!("Serialization({inner})").into(),
152            PeerError::DuplicateHandshake => "DuplicateHandshake".into(),
153            PeerError::Overloaded => "Overloaded".into(),
154            PeerError::NoReadyPeers => "NoReadyPeers".into(),
155            PeerError::InboundTimeout => "InboundTimeout".into(),
156            PeerError::ServiceShutdown => "ServiceShutdown".into(),
157            PeerError::NotFoundResponse(_) => "NotFoundResponse".into(),
158            PeerError::NotFoundRegistry(_) => "NotFoundRegistry".into(),
159        }
160    }
161}
162
163/// A shared error slot for peer errors.
164///
165/// # Correctness
166///
167/// Error slots are shared between sync and async code. In async code, the error
168/// mutex should be held for as short a time as possible. This avoids blocking
169/// the async task thread on acquiring the mutex.
170///
171/// > If the value behind the mutex is just data, it’s usually appropriate to use a blocking mutex
172/// > ...
173/// > wrap the `Arc<Mutex<...>>` in a struct
174/// > that provides non-async methods for performing operations on the data within,
175/// > and only lock the mutex inside these methods
176///
177/// <https://docs.rs/tokio/1.15.0/tokio/sync/struct.Mutex.html#which-kind-of-mutex-should-you-use>
178#[derive(Default, Clone)]
179pub struct ErrorSlot(Arc<std::sync::Mutex<Option<SharedPeerError>>>);
180
181impl std::fmt::Debug for ErrorSlot {
182    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
183        // don't hang if the mutex is locked
184        // show the panic if the mutex was poisoned
185        f.debug_struct("ErrorSlot")
186            .field("error", &self.0.try_lock())
187            .finish()
188    }
189}
190
191impl ErrorSlot {
192    /// Read the current error in the slot.
193    ///
194    /// Returns `None` if there is no error in the slot.
195    ///
196    /// # Correctness
197    ///
198    /// Briefly locks the error slot's threaded `std::sync::Mutex`, to get a
199    /// reference to the error in the slot.
200    #[allow(clippy::unwrap_in_result)]
201    pub fn try_get_error(&self) -> Option<SharedPeerError> {
202        self.0
203            .lock()
204            .expect("error mutex should be unpoisoned")
205            .as_ref()
206            .cloned()
207    }
208
209    /// Update the current error in the slot.
210    ///
211    /// Returns `Err(AlreadyErrored)` if there was already an error in the slot.
212    ///
213    /// # Correctness
214    ///
215    /// Briefly locks the error slot's threaded `std::sync::Mutex`, to check for
216    /// a previous error, then update the error in the slot.
217    #[allow(clippy::unwrap_in_result)]
218    pub fn try_update_error(&self, e: SharedPeerError) -> Result<(), AlreadyErrored> {
219        let mut guard = self.0.lock().expect("error mutex should be unpoisoned");
220
221        if let Some(original_error) = guard.clone() {
222            Err(AlreadyErrored { original_error })
223        } else {
224            *guard = Some(e);
225            Ok(())
226        }
227    }
228}
229
230/// Error returned when the [`ErrorSlot`] already contains an error.
231#[derive(Clone, Debug)]
232pub struct AlreadyErrored {
233    /// The original error in the error slot.
234    pub original_error: SharedPeerError,
235}
236
237/// An error during a handshake with a remote peer.
238#[derive(Error, Debug)]
239pub enum HandshakeError {
240    /// The remote peer sent an unexpected message during the handshake.
241    #[error("The remote peer sent an unexpected message: {0:?}")]
242    UnexpectedMessage(Box<crate::protocol::external::Message>),
243    /// The peer connector detected handshake nonce reuse, possibly indicating self-connection.
244    #[error("Detected nonce reuse, possible self-connection")]
245    RemoteNonceReuse,
246    /// The peer connector created a duplicate random nonce. This is very unlikely,
247    /// because the range of the data type is 2^64.
248    #[error("Unexpectedly created a duplicate random local nonce")]
249    LocalDuplicateNonce,
250    /// The remote peer closed the connection.
251    #[error("Peer closed connection")]
252    ConnectionClosed,
253    /// An error occurred while performing an IO operation.
254    #[error("Underlying IO error: {0}")]
255    Io(#[from] std::io::Error),
256    /// A serialization error occurred while reading or writing a message.
257    #[error("Serialization error: {0}")]
258    Serialization(#[from] SerializationError),
259    /// The remote peer offered a version older than our minimum version.
260    #[error("Peer offered obsolete version: {0:?}")]
261    ObsoleteVersion(crate::protocol::external::types::Version),
262    /// Sending or receiving a message timed out.
263    #[error("Timeout when sending or receiving a message to peer")]
264    Timeout,
265}
266
267impl From<tokio::time::error::Elapsed> for HandshakeError {
268    fn from(_source: tokio::time::error::Elapsed) -> Self {
269        HandshakeError::Timeout
270    }
271}