zebrad/components/sync/progress.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
//! Progress tracking for blockchain syncing.
use std::{
cmp::min,
ops::Add,
time::{Duration, Instant},
};
use chrono::Utc;
use num_integer::div_ceil;
use zebra_chain::{
block::{Height, HeightDiff},
chain_sync_status::ChainSyncStatus,
chain_tip::ChainTip,
fmt::humantime_seconds,
parameters::{Network, NetworkUpgrade, POST_BLOSSOM_POW_TARGET_SPACING},
};
use zebra_consensus::ParameterCheckpoint as _;
use zebra_state::MAX_BLOCK_REORG_HEIGHT;
use crate::components::sync::SyncStatus;
/// The amount of time between progress logs.
const LOG_INTERVAL: Duration = Duration::from_secs(60);
/// The amount of time between progress bar updates.
const PROGRESS_BAR_INTERVAL: Duration = Duration::from_secs(5);
/// The number of blocks we consider to be close to the tip.
///
/// Most chain forks are 1-7 blocks long.
const MAX_CLOSE_TO_TIP_BLOCKS: HeightDiff = 1;
/// Skip slow sync warnings when we are this close to the tip.
///
/// In testing, we've seen warnings around 30 blocks.
///
/// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
const MIN_SYNC_WARNING_BLOCKS: HeightDiff = 60;
/// The number of fractional digits in sync percentages.
const SYNC_PERCENT_FRAC_DIGITS: usize = 3;
/// The minimum number of extra blocks mined between updating a checkpoint list,
/// and running an automated test that depends on that list.
///
/// Makes sure that the block finalization code always runs in sync tests,
/// even if the miner or test node clock is wrong by a few minutes.
///
/// This is an estimate based on the time it takes to:
/// - get the tip height from `zcashd`,
/// - run `zebra-checkpoints` to update the checkpoint list,
/// - submit a pull request, and
/// - run a CI test that logs progress based on the new checkpoint height.
///
/// We might add tests that sync from a cached tip state,
/// so we only allow a few extra blocks here.
//
// TODO: change to HeightDiff?
const MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE: u32 = 10;
/// Logs Zebra's estimated progress towards the chain tip every minute or so, and
/// updates a terminal progress bar every few seconds.
///
/// TODO:
/// - log progress towards, remaining blocks before, and remaining time to next network upgrade
/// - add some progress info to the metrics
pub async fn show_block_chain_progress(
network: Network,
latest_chain_tip: impl ChainTip,
sync_status: SyncStatus,
) -> ! {
// The minimum number of extra blocks after the highest checkpoint, based on:
// - the non-finalized state limit, and
// - the minimum number of extra blocks mined between a checkpoint update,
// and the automated tests for that update.
let min_after_checkpoint_blocks =
MAX_BLOCK_REORG_HEIGHT + MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE;
let min_after_checkpoint_blocks: HeightDiff = min_after_checkpoint_blocks.into();
// The minimum height of the valid best chain, based on:
// - the hard-coded checkpoint height,
// - the minimum number of blocks after the highest checkpoint.
let after_checkpoint_height = network
.checkpoint_list()
.max_height()
.add(min_after_checkpoint_blocks)
.expect("hard-coded checkpoint height is far below Height::MAX");
let target_block_spacing = NetworkUpgrade::target_spacing_for_height(&network, Height::MAX);
let max_block_spacing =
NetworkUpgrade::minimum_difficulty_spacing_for_height(&network, Height::MAX);
// We expect the state height to increase at least once in this interval.
//
// Most chain forks are 1-7 blocks long.
//
// TODO: remove the target_block_spacing multiplier,
// after fixing slow syncing near tip (#3375)
let min_state_block_interval = max_block_spacing.unwrap_or(target_block_spacing * 4) * 2;
// Formatted strings for logging.
let target_block_spacing = humantime_seconds(
target_block_spacing
.to_std()
.expect("constant fits in std::Duration"),
);
let max_block_spacing = max_block_spacing
.map(|duration| {
humantime_seconds(duration.to_std().expect("constant fits in std::Duration"))
})
.unwrap_or_else(|| "None".to_string());
// The last time we downloaded and verified at least one block.
//
// Initialized to the start time to simplify the code.
let mut last_state_change_time = Utc::now();
// The state tip height, when we last downloaded and verified at least one block.
//
// Initialized to the genesis height to simplify the code.
let mut last_state_change_height = Height(0);
// The last time we logged an update.
let mut last_log_time = Instant::now();
#[cfg(feature = "progress-bar")]
let block_bar = howudoin::new().label("Blocks");
loop {
let now = Utc::now();
let instant_now = Instant::now();
let is_syncer_stopped = sync_status.is_close_to_tip();
if let Some(estimated_height) =
latest_chain_tip.estimate_network_chain_tip_height(&network, now)
{
// The estimate/actual race doesn't matter here,
// because we're only using it for metrics and logging.
let current_height = latest_chain_tip
.best_tip_height()
.expect("unexpected empty state: estimate requires a block height");
let network_upgrade = NetworkUpgrade::current(&network, current_height);
// Send progress reports for block height
//
// TODO: split the progress bar height update into its own function.
#[cfg(feature = "progress-bar")]
if matches!(howudoin::cancelled(), Some(true)) {
block_bar.close();
} else {
block_bar
.set_pos(current_height.0)
.set_len(u64::from(estimated_height.0));
}
// Skip logging and status updates if it isn't time for them yet.
let elapsed_since_log = instant_now.saturating_duration_since(last_log_time);
if elapsed_since_log < LOG_INTERVAL {
tokio::time::sleep(PROGRESS_BAR_INTERVAL).await;
continue;
} else {
last_log_time = instant_now;
}
// TODO: split logging / status updates into their own function.
// Work out the sync progress towards the estimated tip.
let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
let sync_percent = format!(
"{:.frac$}%",
sync_progress * 100.0,
frac = SYNC_PERCENT_FRAC_DIGITS,
);
let mut remaining_sync_blocks = estimated_height - current_height;
if remaining_sync_blocks < 0 {
remaining_sync_blocks = 0;
}
// Work out how long it has been since the state height has increased.
//
// Non-finalized forks can decrease the height, we only want to track increases.
if current_height > last_state_change_height {
last_state_change_height = current_height;
last_state_change_time = now;
}
let time_since_last_state_block_chrono =
now.signed_duration_since(last_state_change_time);
let time_since_last_state_block = humantime_seconds(
time_since_last_state_block_chrono
.to_std()
.unwrap_or_default(),
);
if time_since_last_state_block_chrono > min_state_block_interval {
// The state tip height hasn't increased for a long time.
//
// Block verification can fail if the local node's clock is wrong.
warn!(
%sync_percent,
?current_height,
?network_upgrade,
%time_since_last_state_block,
%target_block_spacing,
%max_block_spacing,
?is_syncer_stopped,
"chain updates have stalled, \
state height has not increased for {} minutes. \
Hint: check your network connection, \
and your computer clock and time zone",
time_since_last_state_block_chrono.num_minutes(),
);
// TODO: use add_warn(), but only add each warning once
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: sync has stalled", network_upgrade));
} else if is_syncer_stopped && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
// We've stopped syncing blocks, but we estimate we're a long way from the tip.
//
// TODO: warn after fixing slow syncing near tip (#3375)
info!(
%sync_percent,
?current_height,
?network_upgrade,
?remaining_sync_blocks,
?after_checkpoint_height,
%time_since_last_state_block,
"initial sync is very slow, or estimated tip is wrong. \
Hint: check your network connection, \
and your computer clock and time zone",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!(
"{}: sync is very slow, or estimated tip is wrong",
network_upgrade
));
} else if is_syncer_stopped && current_height <= after_checkpoint_height {
// We've stopped syncing blocks,
// but we're below the minimum height estimated from our checkpoints.
let min_minutes_after_checkpoint_update = div_ceil(
MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE * POST_BLOSSOM_POW_TARGET_SPACING,
60,
);
warn!(
%sync_percent,
?current_height,
?network_upgrade,
?remaining_sync_blocks,
?after_checkpoint_height,
%time_since_last_state_block,
"initial sync is very slow, and state is below the highest checkpoint. \
Hint: check your network connection, \
and your computer clock and time zone. \
Dev Hint: were the checkpoints updated in the last {} minutes?",
min_minutes_after_checkpoint_update,
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: sync is very slow", network_upgrade));
} else if is_syncer_stopped {
// We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
// So we're mostly using gossiped blocks now.
info!(
%sync_percent,
?current_height,
?network_upgrade,
?remaining_sync_blocks,
%time_since_last_state_block,
"finished initial sync to chain tip, using gossiped blocks",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: waiting for next block", network_upgrade));
} else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
// We estimate we're near the tip, but we have been syncing lots of blocks recently.
// We might also be using some gossiped blocks.
info!(
%sync_percent,
?current_height,
?network_upgrade,
?remaining_sync_blocks,
%time_since_last_state_block,
"close to finishing initial sync, \
confirming using syncer and gossiped blocks",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: finishing initial sync", network_upgrade));
} else {
// We estimate we're far from the tip, and we've been syncing lots of blocks.
info!(
%sync_percent,
?current_height,
?network_upgrade,
?remaining_sync_blocks,
%time_since_last_state_block,
"estimated progress to chain tip",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: syncing blocks", network_upgrade));
}
} else {
let sync_percent = format!("{:.SYNC_PERCENT_FRAC_DIGITS$} %", 0.0f64,);
#[cfg(feature = "progress-bar")]
let network_upgrade = NetworkUpgrade::Genesis;
if is_syncer_stopped {
// We've stopped syncing blocks,
// but we haven't downloaded and verified the genesis block.
warn!(
%sync_percent,
current_height = %"None",
"initial sync can't download and verify the genesis block. \
Hint: check your network connection, \
and your computer clock and time zone",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!("{}: can't download genesis block", network_upgrade));
} else {
// We're waiting for the genesis block to be committed to the state,
// before we can estimate the best chain tip.
info!(
%sync_percent,
current_height = %"None",
"initial sync is waiting to download the genesis block",
);
#[cfg(feature = "progress-bar")]
block_bar.desc(format!(
"{}: waiting to download genesis block",
network_upgrade
));
}
}
tokio::time::sleep(min(LOG_INTERVAL, PROGRESS_BAR_INTERVAL)).await;
}
}