zebrad/components/sync/
progress.rs

1//! Progress tracking for blockchain syncing.
2
3use std::{
4    cmp::min,
5    ops::Add,
6    time::{Duration, Instant},
7};
8
9use chrono::Utc;
10use num_integer::div_ceil;
11
12use zebra_chain::{
13    block::{Height, HeightDiff},
14    chain_sync_status::ChainSyncStatus,
15    chain_tip::ChainTip,
16    fmt::humantime_seconds,
17    parameters::{Network, NetworkUpgrade, POST_BLOSSOM_POW_TARGET_SPACING},
18};
19use zebra_consensus::ParameterCheckpoint as _;
20use zebra_state::MAX_BLOCK_REORG_HEIGHT;
21
22use crate::components::sync::SyncStatus;
23
24/// The amount of time between progress logs.
25const LOG_INTERVAL: Duration = Duration::from_secs(60);
26
27/// The amount of time between progress bar updates.
28const PROGRESS_BAR_INTERVAL: Duration = Duration::from_secs(5);
29
30/// The number of blocks we consider to be close to the tip.
31///
32/// Most chain forks are 1-7 blocks long.
33const MAX_CLOSE_TO_TIP_BLOCKS: HeightDiff = 1;
34
35/// Skip slow sync warnings when we are this close to the tip.
36///
37/// In testing, we've seen warnings around 30 blocks.
38///
39/// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
40const MIN_SYNC_WARNING_BLOCKS: HeightDiff = 60;
41
42/// The number of fractional digits in sync percentages.
43const SYNC_PERCENT_FRAC_DIGITS: usize = 3;
44
45/// The minimum number of extra blocks mined between updating a checkpoint list,
46/// and running an automated test that depends on that list.
47///
48/// Makes sure that the block finalization code always runs in sync tests,
49/// even if the miner or test node clock is wrong by a few minutes.
50///
51/// This is an estimate based on the time it takes to:
52/// - get the tip height from `zcashd`,
53/// - run `zebra-checkpoints` to update the checkpoint list,
54/// - submit a pull request, and
55/// - run a CI test that logs progress based on the new checkpoint height.
56///
57/// We might add tests that sync from a cached tip state,
58/// so we only allow a few extra blocks here.
59//
60// TODO: change to HeightDiff?
61const MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE: u32 = 10;
62
63/// Logs Zebra's estimated progress towards the chain tip every minute or so, and
64/// updates a terminal progress bar every few seconds.
65///
66/// TODO:
67/// - log progress towards, remaining blocks before, and remaining time to next network upgrade
68/// - add some progress info to the metrics
69pub async fn show_block_chain_progress(
70    network: Network,
71    latest_chain_tip: impl ChainTip,
72    sync_status: SyncStatus,
73) -> ! {
74    // The minimum number of extra blocks after the highest checkpoint, based on:
75    // - the non-finalized state limit, and
76    // - the minimum number of extra blocks mined between a checkpoint update,
77    //   and the automated tests for that update.
78    let min_after_checkpoint_blocks =
79        MAX_BLOCK_REORG_HEIGHT + MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE;
80    let min_after_checkpoint_blocks: HeightDiff = min_after_checkpoint_blocks.into();
81
82    // The minimum height of the valid best chain, based on:
83    // - the hard-coded checkpoint height,
84    // - the minimum number of blocks after the highest checkpoint.
85    let after_checkpoint_height = network
86        .checkpoint_list()
87        .max_height()
88        .add(min_after_checkpoint_blocks)
89        .expect("hard-coded checkpoint height is far below Height::MAX");
90
91    let target_block_spacing = NetworkUpgrade::target_spacing_for_height(&network, Height::MAX);
92    let max_block_spacing =
93        NetworkUpgrade::minimum_difficulty_spacing_for_height(&network, Height::MAX);
94
95    // We expect the state height to increase at least once in this interval.
96    //
97    // Most chain forks are 1-7 blocks long.
98    //
99    // TODO: remove the target_block_spacing multiplier,
100    //       after fixing slow syncing near tip (#3375)
101    let min_state_block_interval = max_block_spacing.unwrap_or(target_block_spacing * 4) * 2;
102
103    // Formatted strings for logging.
104    let target_block_spacing = humantime_seconds(
105        target_block_spacing
106            .to_std()
107            .expect("constant fits in std::Duration"),
108    );
109    let max_block_spacing = max_block_spacing
110        .map(|duration| {
111            humantime_seconds(duration.to_std().expect("constant fits in std::Duration"))
112        })
113        .unwrap_or_else(|| "None".to_string());
114
115    // The last time we downloaded and verified at least one block.
116    //
117    // Initialized to the start time to simplify the code.
118    let mut last_state_change_time = Utc::now();
119
120    // The state tip height, when we last downloaded and verified at least one block.
121    //
122    // Initialized to the genesis height to simplify the code.
123    let mut last_state_change_height = Height(0);
124
125    // The last time we logged an update.
126    let mut last_log_time = Instant::now();
127
128    #[cfg(feature = "progress-bar")]
129    let block_bar = howudoin::new().label("Blocks");
130
131    loop {
132        let now = Utc::now();
133        let instant_now = Instant::now();
134
135        let is_syncer_stopped = sync_status.is_close_to_tip();
136
137        if let Some(estimated_height) =
138            latest_chain_tip.estimate_network_chain_tip_height(&network, now)
139        {
140            // The estimate/actual race doesn't matter here,
141            // because we're only using it for metrics and logging.
142            let current_height = latest_chain_tip
143                .best_tip_height()
144                .expect("unexpected empty state: estimate requires a block height");
145            let network_upgrade = NetworkUpgrade::current(&network, current_height);
146
147            // Send progress reports for block height
148            //
149            // TODO: split the progress bar height update into its own function.
150            #[cfg(feature = "progress-bar")]
151            if matches!(howudoin::cancelled(), Some(true)) {
152                block_bar.close();
153            } else {
154                block_bar
155                    .set_pos(current_height.0)
156                    .set_len(u64::from(estimated_height.0));
157            }
158
159            // Skip logging and status updates if it isn't time for them yet.
160            let elapsed_since_log = instant_now.saturating_duration_since(last_log_time);
161            if elapsed_since_log < LOG_INTERVAL {
162                tokio::time::sleep(PROGRESS_BAR_INTERVAL).await;
163                continue;
164            } else {
165                last_log_time = instant_now;
166            }
167
168            // TODO: split logging / status updates into their own function.
169
170            // Work out the sync progress towards the estimated tip.
171            let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
172            let sync_percent = format!(
173                "{:.frac$}%",
174                sync_progress * 100.0,
175                frac = SYNC_PERCENT_FRAC_DIGITS,
176            );
177
178            let mut remaining_sync_blocks = estimated_height - current_height;
179            if remaining_sync_blocks < 0 {
180                remaining_sync_blocks = 0;
181            }
182
183            // Work out how long it has been since the state height has increased.
184            //
185            // Non-finalized forks can decrease the height, we only want to track increases.
186            if current_height > last_state_change_height {
187                last_state_change_height = current_height;
188                last_state_change_time = now;
189            }
190
191            let time_since_last_state_block_chrono =
192                now.signed_duration_since(last_state_change_time);
193            let time_since_last_state_block = humantime_seconds(
194                time_since_last_state_block_chrono
195                    .to_std()
196                    .unwrap_or_default(),
197            );
198
199            if time_since_last_state_block_chrono > min_state_block_interval {
200                // The state tip height hasn't increased for a long time.
201                //
202                // Block verification can fail if the local node's clock is wrong.
203                warn!(
204                    %sync_percent,
205                    ?current_height,
206                    ?network_upgrade,
207                    %time_since_last_state_block,
208                    %target_block_spacing,
209                    %max_block_spacing,
210                    ?is_syncer_stopped,
211                    "chain updates have stalled, \
212                     state height has not increased for {} minutes. \
213                     Hint: check your network connection, \
214                     and your computer clock and time zone",
215                    time_since_last_state_block_chrono.num_minutes(),
216                );
217
218                // TODO: use add_warn(), but only add each warning once
219                #[cfg(feature = "progress-bar")]
220                block_bar.desc(format!("{}: sync has stalled", network_upgrade));
221            } else if is_syncer_stopped && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
222                // We've stopped syncing blocks, but we estimate we're a long way from the tip.
223                //
224                // TODO: warn after fixing slow syncing near tip (#3375)
225                info!(
226                    %sync_percent,
227                    ?current_height,
228                    ?network_upgrade,
229                    ?remaining_sync_blocks,
230                    ?after_checkpoint_height,
231                    %time_since_last_state_block,
232                    "initial sync is very slow, or estimated tip is wrong. \
233                     Hint: check your network connection, \
234                     and your computer clock and time zone",
235                );
236
237                #[cfg(feature = "progress-bar")]
238                block_bar.desc(format!(
239                    "{}: sync is very slow, or estimated tip is wrong",
240                    network_upgrade
241                ));
242            } else if is_syncer_stopped && current_height <= after_checkpoint_height {
243                // We've stopped syncing blocks,
244                // but we're below the minimum height estimated from our checkpoints.
245                let min_minutes_after_checkpoint_update = div_ceil(
246                    MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE * POST_BLOSSOM_POW_TARGET_SPACING,
247                    60,
248                );
249
250                warn!(
251                    %sync_percent,
252                    ?current_height,
253                    ?network_upgrade,
254                    ?remaining_sync_blocks,
255                    ?after_checkpoint_height,
256                    %time_since_last_state_block,
257                    "initial sync is very slow, and state is below the highest checkpoint. \
258                     Hint: check your network connection, \
259                     and your computer clock and time zone. \
260                     Dev Hint: were the checkpoints updated in the last {} minutes?",
261                    min_minutes_after_checkpoint_update,
262                );
263
264                #[cfg(feature = "progress-bar")]
265                block_bar.desc(format!("{}: sync is very slow", network_upgrade));
266            } else if is_syncer_stopped {
267                // We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
268                // So we're mostly using gossiped blocks now.
269                info!(
270                    %sync_percent,
271                    ?current_height,
272                    ?network_upgrade,
273                    ?remaining_sync_blocks,
274                    %time_since_last_state_block,
275                    "finished initial sync to chain tip, using gossiped blocks",
276                );
277
278                #[cfg(feature = "progress-bar")]
279                block_bar.desc(format!("{}: waiting for next block", network_upgrade));
280            } else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
281                // We estimate we're near the tip, but we have been syncing lots of blocks recently.
282                // We might also be using some gossiped blocks.
283                info!(
284                    %sync_percent,
285                    ?current_height,
286                    ?network_upgrade,
287                    ?remaining_sync_blocks,
288                    %time_since_last_state_block,
289                    "close to finishing initial sync, \
290                     confirming using syncer and gossiped blocks",
291                );
292
293                #[cfg(feature = "progress-bar")]
294                block_bar.desc(format!("{}: finishing initial sync", network_upgrade));
295            } else {
296                // We estimate we're far from the tip, and we've been syncing lots of blocks.
297                info!(
298                    %sync_percent,
299                    ?current_height,
300                    ?network_upgrade,
301                    ?remaining_sync_blocks,
302                    %time_since_last_state_block,
303                    "estimated progress to chain tip",
304                );
305
306                #[cfg(feature = "progress-bar")]
307                block_bar.desc(format!("{}: syncing blocks", network_upgrade));
308            }
309        } else {
310            let sync_percent = format!("{:.SYNC_PERCENT_FRAC_DIGITS$} %", 0.0f64,);
311            #[cfg(feature = "progress-bar")]
312            let network_upgrade = NetworkUpgrade::Genesis;
313
314            if is_syncer_stopped {
315                // We've stopped syncing blocks,
316                // but we haven't downloaded and verified the genesis block.
317                warn!(
318                    %sync_percent,
319                    current_height = %"None",
320                    "initial sync can't download and verify the genesis block. \
321                     Hint: check your network connection, \
322                     and your computer clock and time zone",
323                );
324
325                #[cfg(feature = "progress-bar")]
326                block_bar.desc(format!("{}: can't download genesis block", network_upgrade));
327            } else {
328                // We're waiting for the genesis block to be committed to the state,
329                // before we can estimate the best chain tip.
330                info!(
331                    %sync_percent,
332                    current_height = %"None",
333                    "initial sync is waiting to download the genesis block",
334                );
335
336                #[cfg(feature = "progress-bar")]
337                block_bar.desc(format!(
338                    "{}: waiting to download genesis block",
339                    network_upgrade
340                ));
341            }
342        }
343
344        tokio::time::sleep(min(LOG_INTERVAL, PROGRESS_BAR_INTERVAL)).await;
345    }
346}