zebrad/components/sync/
progress.rs

1//! Progress tracking for blockchain syncing.
2
3use std::{
4    cmp::min,
5    ops::Add,
6    time::{Duration, Instant},
7};
8
9use chrono::Utc;
10use num_integer::div_ceil;
11
12use zebra_chain::{
13    block::{Height, HeightDiff},
14    chain_sync_status::ChainSyncStatus,
15    chain_tip::ChainTip,
16    fmt::humantime_seconds,
17    parameters::{Network, NetworkUpgrade, POST_BLOSSOM_POW_TARGET_SPACING},
18};
19use zebra_state::MAX_BLOCK_REORG_HEIGHT;
20
21use crate::components::sync::SyncStatus;
22
23/// The amount of time between progress logs.
24const LOG_INTERVAL: Duration = Duration::from_secs(60);
25
26/// The amount of time between progress bar updates.
27const PROGRESS_BAR_INTERVAL: Duration = Duration::from_secs(5);
28
29/// The number of blocks we consider to be close to the tip.
30///
31/// Most chain forks are 1-7 blocks long.
32const MAX_CLOSE_TO_TIP_BLOCKS: HeightDiff = 1;
33
34/// Skip slow sync warnings when we are this close to the tip.
35///
36/// In testing, we've seen warnings around 30 blocks.
37///
38/// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
39const MIN_SYNC_WARNING_BLOCKS: HeightDiff = 60;
40
41/// The number of fractional digits in sync percentages.
42const SYNC_PERCENT_FRAC_DIGITS: usize = 3;
43
44/// The minimum number of extra blocks mined between updating a checkpoint list,
45/// and running an automated test that depends on that list.
46///
47/// Makes sure that the block finalization code always runs in sync tests,
48/// even if the miner or test node clock is wrong by a few minutes.
49///
50/// This is an estimate based on the time it takes to:
51/// - get the tip height from `zcashd`,
52/// - run `zebra-checkpoints` to update the checkpoint list,
53/// - submit a pull request, and
54/// - run a CI test that logs progress based on the new checkpoint height.
55///
56/// We might add tests that sync from a cached tip state,
57/// so we only allow a few extra blocks here.
58//
59// TODO: change to HeightDiff?
60const MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE: u32 = 10;
61
62/// Logs Zebra's estimated progress towards the chain tip every minute or so, and
63/// updates a terminal progress bar every few seconds.
64///
65/// TODO:
66/// - log progress towards, remaining blocks before, and remaining time to next network upgrade
67/// - add some progress info to the metrics
68pub async fn show_block_chain_progress(
69    network: Network,
70    latest_chain_tip: impl ChainTip,
71    sync_status: SyncStatus,
72) -> ! {
73    // The minimum number of extra blocks after the highest checkpoint, based on:
74    // - the non-finalized state limit, and
75    // - the minimum number of extra blocks mined between a checkpoint update,
76    //   and the automated tests for that update.
77    let min_after_checkpoint_blocks =
78        MAX_BLOCK_REORG_HEIGHT + MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE;
79    let min_after_checkpoint_blocks: HeightDiff = min_after_checkpoint_blocks.into();
80
81    // The minimum height of the valid best chain, based on:
82    // - the hard-coded checkpoint height,
83    // - the minimum number of blocks after the highest checkpoint.
84    let after_checkpoint_height = network
85        .checkpoint_list()
86        .max_height()
87        .add(min_after_checkpoint_blocks)
88        .expect("hard-coded checkpoint height is far below Height::MAX");
89
90    let target_block_spacing = NetworkUpgrade::target_spacing_for_height(&network, Height::MAX);
91    let max_block_spacing =
92        NetworkUpgrade::minimum_difficulty_spacing_for_height(&network, Height::MAX);
93
94    // We expect the state height to increase at least once in this interval.
95    //
96    // Most chain forks are 1-7 blocks long.
97    //
98    // TODO: remove the target_block_spacing multiplier,
99    //       after fixing slow syncing near tip (#3375)
100    let min_state_block_interval = max_block_spacing.unwrap_or(target_block_spacing * 4) * 2;
101
102    // Formatted strings for logging.
103    let target_block_spacing = humantime_seconds(
104        target_block_spacing
105            .to_std()
106            .expect("constant fits in std::Duration"),
107    );
108    let max_block_spacing = max_block_spacing
109        .map(|duration| {
110            humantime_seconds(duration.to_std().expect("constant fits in std::Duration"))
111        })
112        .unwrap_or_else(|| "None".to_string());
113
114    // The last time we downloaded and verified at least one block.
115    //
116    // Initialized to the start time to simplify the code.
117    let mut last_state_change_time = Utc::now();
118
119    // The state tip height, when we last downloaded and verified at least one block.
120    //
121    // Initialized to the genesis height to simplify the code.
122    let mut last_state_change_height = Height(0);
123
124    // The last time we logged an update.
125    let mut last_log_time = Instant::now();
126
127    #[cfg(feature = "progress-bar")]
128    let block_bar = howudoin::new().label("Blocks");
129
130    loop {
131        let now = Utc::now();
132        let instant_now = Instant::now();
133
134        let is_syncer_stopped = sync_status.is_close_to_tip();
135
136        if let Some(estimated_height) =
137            latest_chain_tip.estimate_network_chain_tip_height(&network, now)
138        {
139            // The estimate/actual race doesn't matter here,
140            // because we're only using it for metrics and logging.
141            let current_height = latest_chain_tip
142                .best_tip_height()
143                .expect("unexpected empty state: estimate requires a block height");
144            let network_upgrade = NetworkUpgrade::current(&network, current_height);
145
146            // Send progress reports for block height
147            //
148            // TODO: split the progress bar height update into its own function.
149            #[cfg(feature = "progress-bar")]
150            if matches!(howudoin::cancelled(), Some(true)) {
151                block_bar.close();
152            } else {
153                block_bar
154                    .set_pos(current_height.0)
155                    .set_len(u64::from(estimated_height.0));
156            }
157
158            // Skip logging and status updates if it isn't time for them yet.
159            let elapsed_since_log = instant_now.saturating_duration_since(last_log_time);
160            if elapsed_since_log < LOG_INTERVAL {
161                tokio::time::sleep(PROGRESS_BAR_INTERVAL).await;
162                continue;
163            } else {
164                last_log_time = instant_now;
165            }
166
167            // TODO: split logging / status updates into their own function.
168
169            // Work out the sync progress towards the estimated tip.
170            let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
171            let sync_percent = format!(
172                "{:.frac$}%",
173                sync_progress * 100.0,
174                frac = SYNC_PERCENT_FRAC_DIGITS,
175            );
176
177            let mut remaining_sync_blocks = estimated_height - current_height;
178            if remaining_sync_blocks < 0 {
179                remaining_sync_blocks = 0;
180            }
181
182            // Work out how long it has been since the state height has increased.
183            //
184            // Non-finalized forks can decrease the height, we only want to track increases.
185            if current_height > last_state_change_height {
186                last_state_change_height = current_height;
187                last_state_change_time = now;
188            }
189
190            let time_since_last_state_block_chrono =
191                now.signed_duration_since(last_state_change_time);
192            let time_since_last_state_block = humantime_seconds(
193                time_since_last_state_block_chrono
194                    .to_std()
195                    .unwrap_or_default(),
196            );
197
198            if time_since_last_state_block_chrono > min_state_block_interval {
199                // The state tip height hasn't increased for a long time.
200                //
201                // Block verification can fail if the local node's clock is wrong.
202                warn!(
203                    %sync_percent,
204                    ?current_height,
205                    ?network_upgrade,
206                    %time_since_last_state_block,
207                    %target_block_spacing,
208                    %max_block_spacing,
209                    ?is_syncer_stopped,
210                    "chain updates have stalled, \
211                     state height has not increased for {} minutes. \
212                     Hint: check your network connection, \
213                     and your computer clock and time zone",
214                    time_since_last_state_block_chrono.num_minutes(),
215                );
216
217                // TODO: use add_warn(), but only add each warning once
218                #[cfg(feature = "progress-bar")]
219                block_bar.desc(format!("{network_upgrade}: sync has stalled"));
220            } else if is_syncer_stopped && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
221                // We've stopped syncing blocks, but we estimate we're a long way from the tip.
222                //
223                // TODO: warn after fixing slow syncing near tip (#3375)
224                info!(
225                    %sync_percent,
226                    ?current_height,
227                    ?network_upgrade,
228                    ?remaining_sync_blocks,
229                    ?after_checkpoint_height,
230                    %time_since_last_state_block,
231                    "initial sync is very slow, or estimated tip is wrong. \
232                     Hint: check your network connection, \
233                     and your computer clock and time zone",
234                );
235
236                #[cfg(feature = "progress-bar")]
237                block_bar.desc(format!(
238                    "{network_upgrade}: sync is very slow, or estimated tip is wrong"
239                ));
240            } else if is_syncer_stopped && current_height <= after_checkpoint_height {
241                // We've stopped syncing blocks,
242                // but we're below the minimum height estimated from our checkpoints.
243                let min_minutes_after_checkpoint_update = div_ceil(
244                    MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE * POST_BLOSSOM_POW_TARGET_SPACING,
245                    60,
246                );
247
248                warn!(
249                    %sync_percent,
250                    ?current_height,
251                    ?network_upgrade,
252                    ?remaining_sync_blocks,
253                    ?after_checkpoint_height,
254                    %time_since_last_state_block,
255                    "initial sync is very slow, and state is below the highest checkpoint. \
256                     Hint: check your network connection, \
257                     and your computer clock and time zone. \
258                     Dev Hint: were the checkpoints updated in the last {} minutes?",
259                    min_minutes_after_checkpoint_update,
260                );
261
262                #[cfg(feature = "progress-bar")]
263                block_bar.desc(format!("{network_upgrade}: sync is very slow"));
264            } else if is_syncer_stopped {
265                // We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
266                // So we're mostly using gossiped blocks now.
267                info!(
268                    %sync_percent,
269                    ?current_height,
270                    ?network_upgrade,
271                    ?remaining_sync_blocks,
272                    %time_since_last_state_block,
273                    "finished initial sync to chain tip, using gossiped blocks",
274                );
275
276                #[cfg(feature = "progress-bar")]
277                block_bar.desc(format!("{network_upgrade}: waiting for next block"));
278            } else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
279                // We estimate we're near the tip, but we have been syncing lots of blocks recently.
280                // We might also be using some gossiped blocks.
281                info!(
282                    %sync_percent,
283                    ?current_height,
284                    ?network_upgrade,
285                    ?remaining_sync_blocks,
286                    %time_since_last_state_block,
287                    "close to finishing initial sync, \
288                     confirming using syncer and gossiped blocks",
289                );
290
291                #[cfg(feature = "progress-bar")]
292                block_bar.desc(format!("{network_upgrade}: finishing initial sync"));
293            } else {
294                // We estimate we're far from the tip, and we've been syncing lots of blocks.
295                info!(
296                    %sync_percent,
297                    ?current_height,
298                    ?network_upgrade,
299                    ?remaining_sync_blocks,
300                    %time_since_last_state_block,
301                    "estimated progress to chain tip",
302                );
303
304                #[cfg(feature = "progress-bar")]
305                block_bar.desc(format!("{network_upgrade}: syncing blocks"));
306            }
307        } else {
308            let sync_percent = format!("{:.SYNC_PERCENT_FRAC_DIGITS$} %", 0.0f64,);
309            #[cfg(feature = "progress-bar")]
310            let network_upgrade = NetworkUpgrade::Genesis;
311
312            if is_syncer_stopped {
313                // We've stopped syncing blocks,
314                // but we haven't downloaded and verified the genesis block.
315                warn!(
316                    %sync_percent,
317                    current_height = %"None",
318                    "initial sync can't download and verify the genesis block. \
319                     Hint: check your network connection, \
320                     and your computer clock and time zone",
321                );
322
323                #[cfg(feature = "progress-bar")]
324                block_bar.desc(format!("{network_upgrade}: can't download genesis block"));
325            } else {
326                // We're waiting for the genesis block to be committed to the state,
327                // before we can estimate the best chain tip.
328                info!(
329                    %sync_percent,
330                    current_height = %"None",
331                    "initial sync is waiting to download the genesis block",
332                );
333
334                #[cfg(feature = "progress-bar")]
335                block_bar.desc(format!(
336                    "{network_upgrade}: waiting to download genesis block"
337                ));
338            }
339        }
340
341        tokio::time::sleep(min(LOG_INTERVAL, PROGRESS_BAR_INTERVAL)).await;
342    }
343}