zebra_network/peer_set/
set.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
//! Abstractions that represent "the rest of the network".
//!
//! # Implementation
//!
//! The [`PeerSet`] implementation is adapted from the one in [tower::Balance][tower-balance].
//!
//! As described in Tower's documentation, it:
//!
//! > Distributes requests across inner services using the [Power of Two Choices][p2c].
//! >
//! > As described in the [Finagle Guide][finagle]:
//! >
//! > > The algorithm randomly picks two services from the set of ready endpoints and
//! > > selects the least loaded of the two. By repeatedly using this strategy, we can
//! > > expect a manageable upper bound on the maximum load of any server.
//! > >
//! > > The maximum load variance between any two servers is bound by `ln(ln(n))` where
//! > > `n` is the number of servers in the cluster.
//!
//! The Power of Two Choices should work well for many network requests, but not all of them.
//! Some requests should only be made to a subset of connected peers.
//! For example, a request for a particular inventory item
//! should be made to a peer that has recently advertised that inventory hash.
//! Other requests require broadcasts, such as transaction diffusion.
//!
//! Implementing this specialized routing logic inside the `PeerSet` -- so that
//! it continues to abstract away "the rest of the network" into one endpoint --
//! is not a problem, as the `PeerSet` can simply maintain more information on
//! its peers and route requests appropriately. However, there is a problem with
//! maintaining accurate backpressure information, because the `Service` trait
//! requires that service readiness is independent of the data in the request.
//!
//! For this reason, in the future, this code will probably be refactored to
//! address this backpressure mismatch. One possibility is to refactor the code
//! so that one entity holds and maintains the peer set and metadata on the
//! peers, and each "backpressure category" of request is assigned to different
//! `Service` impls with specialized `poll_ready()` implementations. Another
//! less-elegant solution (which might be useful as an intermediate step for the
//! inventory case) is to provide a way to borrow a particular backing service,
//! say by address.
//!
//! [finagle]: https://twitter.github.io/finagle/guide/Clients.html#power-of-two-choices-p2c-least-loaded
//! [p2c]: http://www.eecs.harvard.edu/~michaelm/postscripts/handbook2001.pdf
//! [tower-balance]: https://github.com/tower-rs/tower/tree/master/tower/src/balance
//!
//! # Behavior During Network Upgrades
//!
//! [ZIP-201] specifies peer behavior during network upgrades:
//!
//! > With scheduled network upgrades, at the activation height, nodes on each consensus branch
//! > should disconnect from nodes on other consensus branches and only accept new incoming
//! > connections from nodes on the same consensus branch.
//!
//! Zebra handles this with the help of [`MinimumPeerVersion`], which determines the minimum peer
//! protocol version to accept based on the current best chain tip height. The minimum version is
//! therefore automatically increased when the block height reaches a network upgrade's activation
//! height. The helper type is then used to:
//!
//! - cancel handshakes to outdated peers, in `handshake::negotiate_version`
//! - cancel requests to and disconnect from peers that have become outdated, in
//!   [`PeerSet::push_unready`]
//! - disconnect from peers that have just responded and became outdated, in
//!   [`PeerSet::poll_unready`]
//! - disconnect from idle peers that have become outdated, in
//!   [`PeerSet::disconnect_from_outdated_peers`]
//!
//! ## Network Coalescence
//!
//! [ZIP-201] also specifies how Zcashd behaves [leading up to a activation
//! height][1]. Since Zcashd limits the number of connections to at most eight
//! peers, it will gradually migrate its connections to up-to-date peers as it
//! approaches the activation height.
//!
//! The motivation for this behavior is to avoid an abrupt partitioning the network, which can lead
//! to isolated peers and increases the chance of an eclipse attack on some peers of the network.
//!
//! Zebra does not gradually migrate its peers as it approaches an activation height. This is
//! because Zebra by default can connect to up to 75 peers, as can be seen in [`Config::default`].
//! Since this is a lot larger than the 8 peers Zcashd connects to, an eclipse attack becomes a lot
//! more costly to execute, and the probability of an abrupt network partition that isolates peers
//! is lower.
//!
//! Even if a Zebra node is manually configured to connect to a smaller number
//! of peers, the [`AddressBook`][2] is configured to hold a large number of
//! peer addresses ([`MAX_ADDRS_IN_ADDRESS_BOOK`][3]). Since the address book
//! prioritizes addresses it trusts (like those that it has successfully
//! connected to before), the node should be able to recover and rejoin the
//! network by itself, as long as the address book is populated with enough
//! entries.
//!
//! [1]: https://zips.z.cash/zip-0201#network-coalescence
//! [2]: crate::AddressBook
//! [3]: crate::constants::MAX_ADDRS_IN_ADDRESS_BOOK
//! [ZIP-201]: https://zips.z.cash/zip-0201

use std::{
    collections::{HashMap, HashSet},
    convert,
    fmt::Debug,
    marker::PhantomData,
    net::IpAddr,
    pin::Pin,
    task::{Context, Poll},
    time::Instant,
};

use futures::{
    channel::{mpsc, oneshot},
    future::{FutureExt, TryFutureExt},
    prelude::*,
    stream::FuturesUnordered,
    task::noop_waker,
};
use itertools::Itertools;
use num_integer::div_ceil;
use tokio::{
    sync::{broadcast, watch},
    task::JoinHandle,
};
use tower::{
    discover::{Change, Discover},
    load::Load,
    Service,
};

use zebra_chain::chain_tip::ChainTip;

use crate::{
    address_book::AddressMetrics,
    constants::MIN_PEER_SET_LOG_INTERVAL,
    peer::{LoadTrackedClient, MinimumPeerVersion},
    peer_set::{
        unready_service::{Error as UnreadyError, UnreadyService},
        InventoryChange, InventoryRegistry,
    },
    protocol::{
        external::InventoryHash,
        internal::{Request, Response},
    },
    BoxError, Config, PeerError, PeerSocketAddr, SharedPeerError,
};

#[cfg(test)]
mod tests;

/// A signal sent by the [`PeerSet`] when it has no ready peers, and gets a request from Zebra.
///
/// In response to this signal, the crawler tries to open more peer connections.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct MorePeers;

/// A signal sent by the [`PeerSet`] to cancel a [`Client`][1]'s current request
/// or response.
///
/// When it receives this signal, the [`Client`][1] stops processing and exits.
///
/// [1]: crate::peer::Client
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct CancelClientWork;

/// A [`tower::Service`] that abstractly represents "the rest of the network".
///
/// # Security
///
/// The `Discover::Key` must be the transient remote address of each peer. This
/// address may only be valid for the duration of a single connection. (For
/// example, inbound connections have an ephemeral remote port, and proxy
/// connections have an ephemeral local or proxy port.)
///
/// Otherwise, malicious peers could interfere with other peers' `PeerSet` state.
pub struct PeerSet<D, C>
where
    D: Discover<Key = PeerSocketAddr, Service = LoadTrackedClient> + Unpin,
    D::Error: Into<BoxError>,
    C: ChainTip,
{
    // Peer Tracking: New Peers
    //
    /// Provides new and deleted peer [`Change`]s to the peer set,
    /// via the [`Discover`] trait implementation.
    discover: D,

    /// A channel that asks the peer crawler task to connect to more peers.
    demand_signal: mpsc::Sender<MorePeers>,

    // Peer Tracking: Ready Peers
    //
    /// Connected peers that are ready to receive requests from Zebra,
    /// or send requests to Zebra.
    ready_services: HashMap<D::Key, D::Service>,

    // Request Routing
    //
    /// Stores gossiped inventory hashes from connected peers.
    ///
    /// Used to route inventory requests to peers that are likely to have it.
    inventory_registry: InventoryRegistry,

    // Peer Tracking: Busy Peers
    //
    /// Connected peers that are handling a Zebra request,
    /// or Zebra is handling one of their requests.
    unready_services: FuturesUnordered<UnreadyService<D::Key, D::Service, Request>>,

    /// Channels used to cancel the request that an unready service is doing.
    cancel_handles: HashMap<D::Key, oneshot::Sender<CancelClientWork>>,

    // Peer Validation
    //
    /// An endpoint to see the minimum peer protocol version in real time.
    ///
    /// The minimum version depends on the block height, and [`MinimumPeerVersion`] listens for
    /// height changes and determines the correct minimum version.
    minimum_peer_version: MinimumPeerVersion<C>,

    /// The configured limit for inbound and outbound connections.
    ///
    /// The peer set panics if this size is exceeded.
    /// If that happens, our connection limit code has a bug.
    peerset_total_connection_limit: usize,

    // Background Tasks
    //
    /// Channel for passing ownership of tokio JoinHandles from PeerSet's background tasks
    ///
    /// The join handles passed into the PeerSet are used populate the `guards` member
    handle_rx: tokio::sync::oneshot::Receiver<Vec<JoinHandle<Result<(), BoxError>>>>,

    /// Unordered set of handles to background tasks associated with the `PeerSet`
    ///
    /// These guards are checked for errors as part of `poll_ready` which lets
    /// the `PeerSet` propagate errors from background tasks back to the user
    guards: futures::stream::FuturesUnordered<JoinHandle<Result<(), BoxError>>>,

    // Metrics and Logging
    //
    /// Address book metrics watch channel.
    ///
    /// Used for logging diagnostics.
    address_metrics: watch::Receiver<AddressMetrics>,

    /// The last time we logged a message about the peer set size
    last_peer_log: Option<Instant>,

    /// The configured maximum number of peers that can be in the
    /// peer set per IP, defaults to [`crate::constants::DEFAULT_MAX_CONNS_PER_IP`]
    max_conns_per_ip: usize,
}

impl<D, C> Drop for PeerSet<D, C>
where
    D: Discover<Key = PeerSocketAddr, Service = LoadTrackedClient> + Unpin,
    D::Error: Into<BoxError>,
    C: ChainTip,
{
    fn drop(&mut self) {
        // We don't have access to the current task (if any), so we just drop everything we can.
        let waker = noop_waker();
        let mut cx = Context::from_waker(&waker);

        self.shut_down_tasks_and_channels(&mut cx);
    }
}

impl<D, C> PeerSet<D, C>
where
    D: Discover<Key = PeerSocketAddr, Service = LoadTrackedClient> + Unpin,
    D::Error: Into<BoxError>,
    C: ChainTip,
{
    #[allow(clippy::too_many_arguments)]
    /// Construct a peerset which uses `discover` to manage peer connections.
    ///
    /// Arguments:
    /// - `config`: configures the peer set connection limit;
    /// - `discover`: handles peer connects and disconnects;
    /// - `demand_signal`: requests more peers when all peers are busy (unready);
    /// - `handle_rx`: receives background task handles,
    ///                monitors them to make sure they're still running,
    ///                and shuts down all the tasks as soon as one task exits;
    /// - `inv_stream`: receives inventory changes from peers,
    ///                 allowing the peer set to direct inventory requests;
    /// - `address_book`: when peer set is busy, it logs address book diagnostics.
    /// - `minimum_peer_version`: endpoint to see the minimum peer protocol version in real time.
    /// - `max_conns_per_ip`: configured maximum number of peers that can be in the
    ///                       peer set per IP, defaults to the config value or to
    ///                       [`crate::constants::DEFAULT_MAX_CONNS_PER_IP`].
    pub fn new(
        config: &Config,
        discover: D,
        demand_signal: mpsc::Sender<MorePeers>,
        handle_rx: tokio::sync::oneshot::Receiver<Vec<JoinHandle<Result<(), BoxError>>>>,
        inv_stream: broadcast::Receiver<InventoryChange>,
        address_metrics: watch::Receiver<AddressMetrics>,
        minimum_peer_version: MinimumPeerVersion<C>,
        max_conns_per_ip: Option<usize>,
    ) -> Self {
        Self {
            // New peers
            discover,
            demand_signal,

            // Ready peers
            ready_services: HashMap::new(),
            // Request Routing
            inventory_registry: InventoryRegistry::new(inv_stream),

            // Busy peers
            unready_services: FuturesUnordered::new(),
            cancel_handles: HashMap::new(),

            // Peer validation
            minimum_peer_version,
            peerset_total_connection_limit: config.peerset_total_connection_limit(),

            // Background tasks
            handle_rx,
            guards: futures::stream::FuturesUnordered::new(),

            // Metrics
            last_peer_log: None,
            address_metrics,

            max_conns_per_ip: max_conns_per_ip.unwrap_or(config.max_connections_per_ip),
        }
    }

    /// Check background task handles to make sure they're still running.
    ///
    /// Never returns `Ok`.
    ///
    /// If any background task exits, shuts down all other background tasks,
    /// and returns an error. Otherwise, returns `Pending`, and registers a wakeup for
    /// receiving the background tasks, or the background tasks exiting.
    fn poll_background_errors(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), BoxError>> {
        futures::ready!(self.receive_tasks_if_needed(cx))?;

        // Return Pending if all background tasks are still running.
        match futures::ready!(Pin::new(&mut self.guards).poll_next(cx)) {
            Some(res) => {
                info!(
                    background_tasks = %self.guards.len(),
                    "a peer set background task exited, shutting down other peer set tasks"
                );

                self.shut_down_tasks_and_channels(cx);

                // Flatten the join result and inner result, and return any errors.
                res.map_err(Into::into)
                    // TODO: replace with Result::flatten when it stabilises (#70142)
                    .and_then(convert::identity)?;

                // Turn Ok() task exits into errors.
                Poll::Ready(Err("a peer set background task exited".into()))
            }

            None => {
                self.shut_down_tasks_and_channels(cx);
                Poll::Ready(Err("all peer set background tasks have exited".into()))
            }
        }
    }

    /// Receive background tasks, if they've been sent on the channel, but not consumed yet.
    ///
    /// Returns a result representing the current task state, or `Poll::Pending` if the background
    /// tasks should be polled again to check their state.
    fn receive_tasks_if_needed(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), BoxError>> {
        if self.guards.is_empty() {
            // Return Pending if the tasks have not been sent yet.
            let handles = futures::ready!(Pin::new(&mut self.handle_rx).poll(cx));

            match handles {
                // The tasks have been sent, but not consumed yet.
                Ok(handles) => {
                    // Currently, the peer set treats an empty background task set as an error.
                    //
                    // TODO: refactor `handle_rx` and `guards` into an enum
                    //       for the background task state: Waiting/Running/Shutdown.
                    assert!(
                        !handles.is_empty(),
                        "the peer set requires at least one background task"
                    );

                    self.guards.extend(handles);

                    Poll::Ready(Ok(()))
                }

                // The sender was dropped without sending the tasks.
                Err(_) => Poll::Ready(Err(
                    "sender did not send peer background tasks before it was dropped".into(),
                )),
            }
        } else {
            Poll::Ready(Ok(()))
        }
    }

    /// Shut down:
    /// - services by dropping the service lists
    /// - background tasks via their join handles or cancel handles
    /// - channels by closing the channel
    fn shut_down_tasks_and_channels(&mut self, cx: &mut Context<'_>) {
        // Drop services and cancel their background tasks.
        self.ready_services = HashMap::new();

        for (_peer_key, handle) in self.cancel_handles.drain() {
            let _ = handle.send(CancelClientWork);
        }
        self.unready_services = FuturesUnordered::new();

        // Close the MorePeers channel for all senders,
        // so we don't add more peers to a shut down peer set.
        self.demand_signal.close_channel();

        // Shut down background tasks, ignoring pending polls.
        self.handle_rx.close();
        let _ = self.receive_tasks_if_needed(cx);
        for guard in self.guards.iter() {
            guard.abort();
        }
    }

    /// Check busy peer services for request completion or errors.
    ///
    /// Move newly ready services to the ready list if they are for peers with supported protocol
    /// versions, otherwise they are dropped. Also drop failed services.
    ///
    /// Never returns an error.
    ///
    /// Returns `Ok(Some(())` if at least one peer became ready, `Poll::Pending` if there are
    /// unready peers, but none became ready, and `Ok(None)` if the unready peers were empty.
    ///
    /// If there are any remaining unready peers, registers a wakeup for the next time one becomes
    /// ready. If there are no unready peers, doesn't register any wakeups. (Since wakeups come
    /// from peers, there needs to be at least one peer to register a wakeup.)
    fn poll_unready(&mut self, cx: &mut Context<'_>) -> Poll<Result<Option<()>, BoxError>> {
        let mut result = Poll::Pending;

        // # Correctness
        //
        // `poll_next()` must always be called, because `self.unready_services` could have been
        // empty before the call to `self.poll_ready()`.
        //
        // > When new futures are added, `poll_next` must be called in order to begin receiving
        // > wake-ups for new futures.
        //
        // <https://docs.rs/futures/latest/futures/stream/futures_unordered/struct.FuturesUnordered.html>
        //
        // Returns Pending if we've finished processing the unready service changes,
        // but there are still some unready services.
        loop {
            // No ready peers left, but there are some unready peers pending.
            let Poll::Ready(ready_peer) = Pin::new(&mut self.unready_services).poll_next(cx) else {
                break;
            };

            match ready_peer {
                // No unready peers in the list.
                None => {
                    // If we've finished processing the unready service changes, and there are no
                    // unready services left, it doesn't make sense to return Pending, because
                    // their stream is terminated. But when we add more unready peers and call
                    // `poll_next()`, its termination status will be reset, and it will receive
                    // wakeups again.
                    if result.is_pending() {
                        result = Poll::Ready(Ok(None));
                    }

                    break;
                }

                // Unready -> Ready
                Some(Ok((key, svc))) => {
                    trace!(?key, "service became ready");

                    self.push_ready(true, key, svc);

                    // Return Ok if at least one peer became ready.
                    result = Poll::Ready(Ok(Some(())));
                }

                // Unready -> Canceled
                Some(Err((key, UnreadyError::Canceled))) => {
                    // A service be canceled because we've connected to the same service twice.
                    // In that case, there is a cancel handle for the peer address,
                    // but it belongs to the service for the newer connection.
                    trace!(
                        ?key,
                        duplicate_connection = self.cancel_handles.contains_key(&key),
                        "service was canceled, dropping service"
                    );
                }
                Some(Err((key, UnreadyError::CancelHandleDropped(_)))) => {
                    // Similarly, services with dropped cancel handes can have duplicates.
                    trace!(
                        ?key,
                        duplicate_connection = self.cancel_handles.contains_key(&key),
                        "cancel handle was dropped, dropping service"
                    );
                }

                // Unready -> Errored
                Some(Err((key, UnreadyError::Inner(error)))) => {
                    debug!(%error, "service failed while unready, dropping service");

                    let cancel = self.cancel_handles.remove(&key);
                    assert!(cancel.is_some(), "missing cancel handle");
                }
            }
        }

        result
    }

    /// Checks previously ready peer services for errors.
    ///
    /// The only way these peer `Client`s can become unready is when we send them a request,
    /// because the peer set has exclusive access to send requests to each peer. (If an inbound
    /// request is in progress, it will be handled, then our request will be sent by the connection
    /// task.)
    ///
    /// Returns `Poll::Ready` if there are some ready peers, and `Poll::Pending` if there are no
    /// ready peers. Registers a wakeup if any peer has failed due to a disconnection, hang, or protocol error.
    ///
    /// # Panics
    ///
    /// If any peers somehow became unready without being sent a request. This indicates a bug in the peer set, where requests
    /// are sent to peers without putting them in `unready_peers`.
    fn poll_ready_peer_errors(&mut self, cx: &mut Context<'_>) -> Poll<()> {
        let mut previous = HashMap::new();
        std::mem::swap(&mut previous, &mut self.ready_services);

        // TODO: consider only checking some peers each poll (for performance reasons),
        //       but make sure we eventually check all of them.
        for (key, mut svc) in previous.drain() {
            let Poll::Ready(peer_readiness) = Pin::new(&mut svc).poll_ready(cx) else {
                unreachable!(
                    "unexpected unready peer: peers must be put into the unready_peers list \
                     after sending them a request"
                );
            };

            match peer_readiness {
                // Still ready, add it back to the list.
                Ok(()) => self.push_ready(false, key, svc),

                // Ready -> Errored
                Err(error) => {
                    debug!(%error, "service failed while ready, dropping service");

                    // Ready services can just be dropped, they don't need any cleanup.
                    std::mem::drop(svc);
                }
            }
        }

        if self.ready_services.is_empty() {
            Poll::Pending
        } else {
            Poll::Ready(())
        }
    }

    /// Returns the number of peer connections Zebra already has with
    /// the provided IP address
    ///
    /// # Performance
    ///
    /// This method is `O(connected peers)`, so it should not be called from a loop
    /// that is already iterating through the peer set.
    fn num_peers_with_ip(&self, ip: IpAddr) -> usize {
        self.ready_services
            .keys()
            .chain(self.cancel_handles.keys())
            .filter(|addr| addr.ip() == ip)
            .count()
    }

    /// Returns `true` if Zebra is already connected to the IP and port in `addr`.
    fn has_peer_with_addr(&self, addr: PeerSocketAddr) -> bool {
        self.ready_services.contains_key(&addr) || self.cancel_handles.contains_key(&addr)
    }

    /// Processes the entire list of newly inserted or removed services.
    ///
    /// Puts inserted services in the unready list.
    /// Drops removed services, after cancelling any pending requests.
    ///
    /// If the peer connector channel is closed, returns an error.
    ///
    /// Otherwise, returns `Ok` if it discovered at least one peer, or `Poll::Pending` if it didn't
    /// discover any peers. Always registers a wakeup for new peers, even when it returns `Ok`.
    fn poll_discover(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), BoxError>> {
        // Return pending if there are no peers in the list.
        let mut result = Poll::Pending;

        loop {
            // If we've emptied the list, finish looping, otherwise process the new peer.
            let Poll::Ready(discovered) = Pin::new(&mut self.discover).poll_discover(cx) else {
                break;
            };

            // If the change channel has a permanent error, return that error.
            let change = discovered
                .ok_or("discovery stream closed")?
                .map_err(Into::into)?;

            // Otherwise we have successfully processed a peer.
            result = Poll::Ready(Ok(()));

            // Process each change.
            match change {
                Change::Remove(key) => {
                    trace!(?key, "got Change::Remove from Discover");
                    self.remove(&key);
                }
                Change::Insert(key, svc) => {
                    // We add peers as unready, so that we:
                    // - always do the same checks on every ready peer, and
                    // - check for any errors that happened right after the handshake
                    trace!(?key, "got Change::Insert from Discover");

                    // # Security
                    //
                    // Drop the new peer if we are already connected to it.
                    // Preferring old connections avoids connection thrashing.
                    if self.has_peer_with_addr(key) {
                        std::mem::drop(svc);
                        continue;
                    }

                    // # Security
                    //
                    // drop the new peer if there are already `max_conns_per_ip` peers with
                    // the same IP address in the peer set.
                    if self.num_peers_with_ip(key.ip()) >= self.max_conns_per_ip {
                        std::mem::drop(svc);
                        continue;
                    }

                    self.push_unready(key, svc);
                }
            }
        }

        result
    }

    /// Checks if the minimum peer version has changed, and disconnects from outdated peers.
    fn disconnect_from_outdated_peers(&mut self) {
        if let Some(minimum_version) = self.minimum_peer_version.changed() {
            // It is ok to drop ready services, they don't need anything cancelled.
            self.ready_services
                .retain(|_address, peer| peer.remote_version() >= minimum_version);
        }
    }

    /// Takes a ready service by key.
    fn take_ready_service(&mut self, key: &D::Key) -> Option<D::Service> {
        if let Some(svc) = self.ready_services.remove(key) {
            assert!(
                !self.cancel_handles.contains_key(key),
                "cancel handles are only used for unready service work"
            );

            Some(svc)
        } else {
            None
        }
    }

    /// Remove the service corresponding to `key` from the peer set.
    ///
    /// Drops the service, cancelling any pending request or response to that peer.
    /// If the peer does not exist, does nothing.
    fn remove(&mut self, key: &D::Key) {
        if let Some(ready_service) = self.take_ready_service(key) {
            // A ready service has no work to cancel, so just drop it.
            std::mem::drop(ready_service);
        } else if let Some(handle) = self.cancel_handles.remove(key) {
            // Cancel the work, implicitly dropping the cancel handle.
            // The service future returns a `Canceled` error,
            // making `poll_unready` drop the service.
            let _ = handle.send(CancelClientWork);
        }
    }

    /// Adds a ready service to the ready list if it's for a peer with a supported version.
    /// If `was_unready` is true, also removes the peer's cancel handle.
    ///
    /// If the service is for a connection to an outdated peer, the service is dropped.
    fn push_ready(&mut self, was_unready: bool, key: D::Key, svc: D::Service) {
        let cancel = self.cancel_handles.remove(&key);
        assert_eq!(
            cancel.is_some(),
            was_unready,
            "missing or unexpected cancel handle"
        );

        if svc.remote_version() >= self.minimum_peer_version.current() {
            self.ready_services.insert(key, svc);
        } else {
            std::mem::drop(svc);
        }
    }

    /// Adds a busy service to the unready list if it's for a peer with a supported version,
    /// and adds a cancel handle for the service's current request.
    ///
    /// If the service is for a connection to an outdated peer, the request is cancelled and the
    /// service is dropped.
    fn push_unready(&mut self, key: D::Key, svc: D::Service) {
        let peer_version = svc.remote_version();
        let (tx, rx) = oneshot::channel();

        self.unready_services.push(UnreadyService {
            key: Some(key),
            service: Some(svc),
            cancel: rx,
            _req: PhantomData,
        });

        if peer_version >= self.minimum_peer_version.current() {
            self.cancel_handles.insert(key, tx);
        } else {
            // Cancel any request made to the service because it is using an outdated protocol
            // version.
            let _ = tx.send(CancelClientWork);
        }
    }

    /// Performs P2C on `self.ready_services` to randomly select a less-loaded ready service.
    fn select_ready_p2c_peer(&self) -> Option<D::Key> {
        self.select_p2c_peer_from_list(&self.ready_services.keys().copied().collect())
    }

    /// Performs P2C on `ready_service_list` to randomly select a less-loaded ready service.
    #[allow(clippy::unwrap_in_result)]
    fn select_p2c_peer_from_list(&self, ready_service_list: &HashSet<D::Key>) -> Option<D::Key> {
        match ready_service_list.len() {
            0 => None,
            1 => Some(
                *ready_service_list
                    .iter()
                    .next()
                    .expect("just checked there is one service"),
            ),
            len => {
                // Choose 2 random peers, then return the least loaded of those 2 peers.
                let (a, b) = {
                    let idxs = rand::seq::index::sample(&mut rand::thread_rng(), len, 2);
                    let a = idxs.index(0);
                    let b = idxs.index(1);

                    let a = *ready_service_list
                        .iter()
                        .nth(a)
                        .expect("sample returns valid indexes");
                    let b = *ready_service_list
                        .iter()
                        .nth(b)
                        .expect("sample returns valid indexes");

                    (a, b)
                };

                let a_load = self.query_load(&a).expect("supplied services are ready");
                let b_load = self.query_load(&b).expect("supplied services are ready");

                let selected = if a_load <= b_load { a } else { b };

                trace!(
                    a.key = ?a,
                    a.load = ?a_load,
                    b.key = ?b,
                    b.load = ?b_load,
                    selected = ?selected,
                    ?len,
                    "selected service by p2c"
                );

                Some(selected)
            }
        }
    }

    /// Randomly chooses `max_peers` ready services, ignoring service load.
    ///
    /// The chosen peers are unique, but their order is not fully random.
    fn select_random_ready_peers(&self, max_peers: usize) -> Vec<D::Key> {
        use rand::seq::IteratorRandom;

        self.ready_services
            .keys()
            .copied()
            .choose_multiple(&mut rand::thread_rng(), max_peers)
    }

    /// Accesses a ready endpoint by `key` and returns its current load.
    ///
    /// Returns `None` if the service is not in the ready service list.
    fn query_load(&self, key: &D::Key) -> Option<<D::Service as Load>::Metric> {
        let svc = self.ready_services.get(key);
        svc.map(|svc| svc.load())
    }

    /// Routes a request using P2C load-balancing.
    fn route_p2c(&mut self, req: Request) -> <Self as tower::Service<Request>>::Future {
        if let Some(p2c_key) = self.select_ready_p2c_peer() {
            tracing::trace!(?p2c_key, "routing based on p2c");

            let mut svc = self
                .take_ready_service(&p2c_key)
                .expect("selected peer must be ready");

            let fut = svc.call(req);
            self.push_unready(p2c_key, svc);

            return fut.map_err(Into::into).boxed();
        }

        async move {
            // Let other tasks run, so a retry request might get different ready peers.
            tokio::task::yield_now().await;

            // # Security
            //
            // Avoid routing requests to peers that are missing inventory.
            // If we kept trying doomed requests, peers that are missing our requested inventory
            // could take up a large amount of our bandwidth and retry limits.
            Err(SharedPeerError::from(PeerError::NoReadyPeers))
        }
        .map_err(Into::into)
        .boxed()
    }

    /// Tries to route a request to a ready peer that advertised that inventory,
    /// falling back to a ready peer that isn't missing the inventory.
    ///
    /// If all ready peers are missing the inventory,
    /// returns a synthetic [`NotFoundRegistry`](PeerError::NotFoundRegistry) error.
    ///
    /// Uses P2C to route requests to the least loaded peer in each list.
    fn route_inv(
        &mut self,
        req: Request,
        hash: InventoryHash,
    ) -> <Self as tower::Service<Request>>::Future {
        let advertising_peer_list = self
            .inventory_registry
            .advertising_peers(hash)
            .filter(|&addr| self.ready_services.contains_key(addr))
            .copied()
            .collect();

        // # Security
        //
        // Choose a random, less-loaded peer with the inventory.
        //
        // If we chose the first peer in HashMap order,
        // peers would be able to influence our choice by switching addresses.
        // But we need the choice to be random,
        // so that a peer can't provide all our inventory responses.
        let peer = self.select_p2c_peer_from_list(&advertising_peer_list);

        if let Some(mut svc) = peer.and_then(|key| self.take_ready_service(&key)) {
            let peer = peer.expect("just checked peer is Some");
            tracing::trace!(?hash, ?peer, "routing to a peer which advertised inventory");
            let fut = svc.call(req);
            self.push_unready(peer, svc);
            return fut.map_err(Into::into).boxed();
        }

        let missing_peer_list: HashSet<PeerSocketAddr> = self
            .inventory_registry
            .missing_peers(hash)
            .copied()
            .collect();
        let maybe_peer_list = self
            .ready_services
            .keys()
            .filter(|addr| !missing_peer_list.contains(addr))
            .copied()
            .collect();

        // Security: choose a random, less-loaded peer that might have the inventory.
        let peer = self.select_p2c_peer_from_list(&maybe_peer_list);

        if let Some(mut svc) = peer.and_then(|key| self.take_ready_service(&key)) {
            let peer = peer.expect("just checked peer is Some");
            tracing::trace!(?hash, ?peer, "routing to a peer that might have inventory");
            let fut = svc.call(req);
            self.push_unready(peer, svc);
            return fut.map_err(Into::into).boxed();
        }

        tracing::debug!(
            ?hash,
            "all ready peers are missing inventory, failing request"
        );

        async move {
            // Let other tasks run, so a retry request might get different ready peers.
            tokio::task::yield_now().await;

            // # Security
            //
            // Avoid routing requests to peers that are missing inventory.
            // If we kept trying doomed requests, peers that are missing our requested inventory
            // could take up a large amount of our bandwidth and retry limits.
            Err(SharedPeerError::from(PeerError::NotFoundRegistry(vec![
                hash,
            ])))
        }
        .map_err(Into::into)
        .boxed()
    }

    /// Routes the same request to up to `max_peers` ready peers, ignoring return values.
    ///
    /// `max_peers` must be at least one, and at most the number of ready peers.
    fn route_multiple(
        &mut self,
        req: Request,
        max_peers: usize,
    ) -> <Self as tower::Service<Request>>::Future {
        assert!(
            max_peers > 0,
            "requests must be routed to at least one peer"
        );
        assert!(
            max_peers <= self.ready_services.len(),
            "requests can only be routed to ready peers"
        );

        // # Security
        //
        // We choose peers randomly, ignoring load.
        // This avoids favouring malicious peers, because peers can influence their own load.
        //
        // The order of peers isn't completely random,
        // but peer request order is not security-sensitive.

        let futs = FuturesUnordered::new();
        for key in self.select_random_ready_peers(max_peers) {
            let mut svc = self
                .take_ready_service(&key)
                .expect("selected peers are ready");
            futs.push(svc.call(req.clone()).map_err(|_| ()));
            self.push_unready(key, svc);
        }

        async move {
            let results = futs.collect::<Vec<Result<_, _>>>().await;
            tracing::debug!(
                ok.len = results.iter().filter(|r| r.is_ok()).count(),
                err.len = results.iter().filter(|r| r.is_err()).count(),
                "sent peer request to multiple peers"
            );
            Ok(Response::Nil)
        }
        .boxed()
    }

    /// Broadcasts the same request to lots of ready peers, ignoring return values.
    fn route_broadcast(&mut self, req: Request) -> <Self as tower::Service<Request>>::Future {
        // Broadcasts ignore the response
        self.route_multiple(req, self.number_of_peers_to_broadcast())
    }

    /// Given a number of ready peers calculate to how many of them Zebra will
    /// actually send the request to. Return this number.
    pub(crate) fn number_of_peers_to_broadcast(&self) -> usize {
        // We are currently sending broadcast messages to a third of the total peers.
        const PEER_FRACTION_TO_BROADCAST: usize = 3;

        // Round up, so that if we have one ready peer, it gets the request.
        div_ceil(self.ready_services.len(), PEER_FRACTION_TO_BROADCAST)
    }

    /// Returns the list of addresses in the peer set.
    fn peer_set_addresses(&self) -> Vec<PeerSocketAddr> {
        self.ready_services
            .keys()
            .chain(self.cancel_handles.keys())
            .cloned()
            .collect()
    }

    /// Logs the peer set size, and any potential connectivity issues.
    fn log_peer_set_size(&mut self) {
        let ready_services_len = self.ready_services.len();
        let unready_services_len = self.unready_services.len();
        trace!(ready_peers = ?ready_services_len, unready_peers = ?unready_services_len);

        let now = Instant::now();

        // These logs are designed to be human-readable in a terminal, at the
        // default Zebra log level. If you need to know the peer set size for
        // every request, use the trace-level logs, or the metrics exporter.
        if let Some(last_peer_log) = self.last_peer_log {
            // Avoid duplicate peer set logs
            if now.duration_since(last_peer_log) < MIN_PEER_SET_LOG_INTERVAL {
                return;
            }
        } else {
            // Suppress initial logs until the peer set has started up.
            // There can be multiple initial requests before the first peer is
            // ready.
            self.last_peer_log = Some(now);
            return;
        }

        self.last_peer_log = Some(now);

        // Log potential duplicate connections.
        let peers = self.peer_set_addresses();

        // Check for duplicates by address and port: these are unexpected and represent a bug.
        let duplicates: Vec<PeerSocketAddr> = peers.iter().duplicates().cloned().collect();

        let mut peer_counts = peers.iter().counts();
        peer_counts.retain(|peer, _count| duplicates.contains(peer));

        if !peer_counts.is_empty() {
            let duplicate_connections: usize = peer_counts.values().sum();

            warn!(
                ?duplicate_connections,
                duplicated_peers = ?peer_counts.len(),
                peers = ?peers.len(),
                "duplicate peer connections in peer set"
            );
        }

        // Check for duplicates by address: these can happen if there are multiple nodes
        // behind a NAT or on a single server.
        let peers: Vec<IpAddr> = peers.iter().map(|addr| addr.ip()).collect();
        let duplicates: Vec<IpAddr> = peers.iter().duplicates().cloned().collect();

        let mut peer_counts = peers.iter().counts();
        peer_counts.retain(|peer, _count| duplicates.contains(peer));

        if !peer_counts.is_empty() {
            let duplicate_connections: usize = peer_counts.values().sum();

            info!(
                ?duplicate_connections,
                duplicated_peers = ?peer_counts.len(),
                peers = ?peers.len(),
                "duplicate IP addresses in peer set"
            );
        }

        // Only log connectivity warnings if all our peers are busy (or there are no peers).
        if ready_services_len > 0 {
            return;
        }

        let address_metrics = *self.address_metrics.borrow();
        if unready_services_len == 0 {
            warn!(
                ?address_metrics,
                "network request with no peer connections. Hint: check your network connection"
            );
        } else {
            info!(?address_metrics, "network request with no ready peers: finding more peers, waiting for {} peers to answer requests",
                  unready_services_len);
        }
    }

    /// Updates the peer set metrics.
    ///
    /// # Panics
    ///
    /// If the peer set size exceeds the connection limit.
    fn update_metrics(&self) {
        let num_ready = self.ready_services.len();
        let num_unready = self.unready_services.len();
        let num_peers = num_ready + num_unready;
        metrics::gauge!("pool.num_ready").set(num_ready as f64);
        metrics::gauge!("pool.num_unready").set(num_unready as f64);
        metrics::gauge!("zcash.net.peers").set(num_peers as f64);

        // Security: make sure we haven't exceeded the connection limit
        if num_peers > self.peerset_total_connection_limit {
            let address_metrics = *self.address_metrics.borrow();
            panic!(
                "unexpectedly exceeded configured peer set connection limit: \n\
                 peers: {num_peers:?}, ready: {num_ready:?}, unready: {num_unready:?}, \n\
                 address_metrics: {address_metrics:?}",
            );
        }
    }
}

impl<D, C> Service<Request> for PeerSet<D, C>
where
    D: Discover<Key = PeerSocketAddr, Service = LoadTrackedClient> + Unpin,
    D::Error: Into<BoxError>,
    C: ChainTip,
{
    type Response = Response;
    type Error = BoxError;
    type Future =
        Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>;

    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
        // Update service and peer statuses.
        //
        // # Correctness
        //
        // All of the futures that receive a context from this method can wake the peer set buffer
        // task. If there are no ready peers, and no new peers, network requests will pause until:
        // - an unready peer becomes ready, or
        // - a new peer arrives.

        // Check for new peers, and register a task wakeup when the next new peers arrive. New peers
        // can be infrequent if our connection slots are full, or we're connected to all
        // available/useful peers.
        let _poll_pending_or_ready: Poll<()> = self.poll_discover(cx)?;

        // These tasks don't provide new peers or newly ready peers.
        let _poll_pending: Poll<()> = self.poll_background_errors(cx)?;
        let _poll_pending_or_ready: Poll<()> = self.inventory_registry.poll_inventory(cx)?;

        // Check for newly ready peers, including newly added peers (which are added as unready).
        // So it needs to run after `poll_discover()`. Registers a wakeup if there are any unready
        // peers.
        //
        // Each connected peer should become ready within a few minutes, or timeout, close the
        // connection, and release its connection slot.
        //
        // TODO: drop peers that overload us with inbound messages and never become ready (#7822)
        let _poll_pending_or_ready: Poll<Option<()>> = self.poll_unready(cx)?;

        // Cleanup and metrics.

        // Only checks the versions of ready peers, so it needs to run after `poll_unready()`.
        self.disconnect_from_outdated_peers();

        // These metrics should run last, to report the most up-to-date information.
        self.log_peer_set_size();
        self.update_metrics();

        // Check for failures in ready peers, removing newly errored or disconnected peers.
        // So it needs to run after `poll_unready()`.
        let ready_peers: Poll<()> = self.poll_ready_peer_errors(cx);

        if ready_peers.is_pending() {
            // # Correctness
            //
            // If the channel is full, drop the demand signal rather than waiting. If we waited
            // here, the crawler could deadlock sending a request to fetch more peers, because it
            // also empties the channel.
            trace!("no ready services, sending demand signal");
            let _ = self.demand_signal.try_send(MorePeers);

            // # Correctness
            //
            // The current task must be scheduled for wakeup every time we return `Poll::Pending`.
            //
            // As long as there are unready or new peers, this task will run, because:
            // - `poll_discover` schedules this task for wakeup when new peers arrive.
            // - if there are unready peers, `poll_unready` or `poll_ready_peers` schedule this
            //   task for wakeup when peer services become ready.
            //
            // To avoid peers blocking on a full peer status/error channel:
            // - `poll_background_errors` schedules this task for wakeup when the peer status
            //   update task exits.
            Poll::Pending
        } else {
            Poll::Ready(Ok(()))
        }
    }

    fn call(&mut self, req: Request) -> Self::Future {
        let fut = match req {
            // Only do inventory-aware routing on individual items.
            Request::BlocksByHash(ref hashes) if hashes.len() == 1 => {
                let hash = InventoryHash::from(*hashes.iter().next().unwrap());
                self.route_inv(req, hash)
            }
            Request::TransactionsById(ref hashes) if hashes.len() == 1 => {
                let hash = InventoryHash::from(*hashes.iter().next().unwrap());
                self.route_inv(req, hash)
            }

            // Broadcast advertisements to lots of peers
            Request::AdvertiseTransactionIds(_) => self.route_broadcast(req),
            Request::AdvertiseBlock(_) => self.route_broadcast(req),

            // Choose a random less-loaded peer for all other requests
            _ => self.route_p2c(req),
        };
        self.update_metrics();

        fut
    }
}