zebra_state/service/read/
find.rs

1//! Finding and reading block hashes and headers, in response to peer requests.
2//!
3//! In the functions in this module:
4//!
5//! The block write task commits blocks to the finalized state before updating
6//! `chain` with a cached copy of the best non-finalized chain from
7//! `NonFinalizedState.chain_set`. Then the block commit task can commit additional blocks to
8//! the finalized state after we've cloned the `chain`.
9//!
10//! This means that some blocks can be in both:
11//! - the cached [`Chain`], and
12//! - the shared finalized [`ZebraDb`] reference.
13
14use std::{
15    iter,
16    ops::{RangeBounds, RangeInclusive},
17    sync::Arc,
18};
19
20use chrono::{DateTime, Utc};
21use zebra_chain::{
22    amount::NonNegative,
23    block::{self, Block, Height},
24    serialization::DateTime32,
25    value_balance::ValueBalance,
26};
27
28use crate::{
29    constants,
30    service::{
31        block_iter::any_ancestor_blocks,
32        check::{difficulty::POW_MEDIAN_BLOCK_SPAN, AdjustedDifficulty},
33        finalized_state::ZebraDb,
34        non_finalized_state::{Chain, NonFinalizedState},
35        read::{self, block::block_header, FINALIZED_STATE_QUERY_RETRIES},
36    },
37    BoxError, KnownBlock,
38};
39
40#[cfg(test)]
41mod tests;
42
43/// Returns the tip of the best chain in the non-finalized or finalized state.
44pub fn best_tip(
45    non_finalized_state: &NonFinalizedState,
46    db: &ZebraDb,
47) -> Option<(block::Height, block::Hash)> {
48    tip(non_finalized_state.best_chain(), db)
49}
50
51/// Returns the tip of `chain`.
52/// If there is no chain, returns the tip of `db`.
53pub fn tip<C>(chain: Option<C>, db: &ZebraDb) -> Option<(Height, block::Hash)>
54where
55    C: AsRef<Chain>,
56{
57    // # Correctness
58    //
59    // If there is an overlap between the non-finalized and finalized states,
60    // where the finalized tip is above the non-finalized tip,
61    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
62    // so it is acceptable to return either tip.
63    chain
64        .map(|chain| chain.as_ref().non_finalized_tip())
65        .or_else(|| db.tip())
66}
67
68/// Returns the tip [`Height`] of `chain`.
69/// If there is no chain, returns the tip of `db`.
70pub fn tip_height<C>(chain: Option<C>, db: &ZebraDb) -> Option<Height>
71where
72    C: AsRef<Chain>,
73{
74    tip(chain, db).map(|(height, _hash)| height)
75}
76
77/// Returns the tip [`block::Hash`] of `chain`.
78/// If there is no chain, returns the tip of `db`.
79#[allow(dead_code)]
80pub fn tip_hash<C>(chain: Option<C>, db: &ZebraDb) -> Option<block::Hash>
81where
82    C: AsRef<Chain>,
83{
84    tip(chain, db).map(|(_height, hash)| hash)
85}
86
87/// Returns the tip of `chain` with its [`ValueBalance`].
88/// If there is no chain, returns the tip of `db`.
89pub fn tip_with_value_balance<C>(
90    chain: Option<C>,
91    db: &ZebraDb,
92) -> Result<Option<(Height, block::Hash, ValueBalance<NonNegative>)>, BoxError>
93where
94    C: AsRef<Chain>,
95{
96    match chain.map(|chain| chain.as_ref().non_finalized_tip_with_value_balance()) {
97        tip_with_value_balance @ Some(_) => Ok(tip_with_value_balance),
98        None => {
99            // Retry the finalized state query if it was interrupted by a finalizing block.
100            //
101            // TODO: refactor this into a generic retry(finalized_closure, process_and_check_closure) fn
102            for _ in 0..=FINALIZED_STATE_QUERY_RETRIES {
103                let tip @ Some((tip_height, tip_hash)) = db.tip() else {
104                    return Ok(None);
105                };
106
107                let value_balance = db.finalized_value_pool();
108
109                if tip == db.tip() {
110                    return Ok(Some((tip_height, tip_hash, value_balance)));
111                }
112            }
113
114            Err("Zebra is committing too many blocks to the state, \
115                    wait until it syncs to the chain tip"
116                .into())
117        }
118    }
119}
120
121/// Return the depth of block `hash` from the chain tip.
122/// Searches `chain` for `hash`, then searches `db`.
123pub fn depth<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<u32>
124where
125    C: AsRef<Chain>,
126{
127    let chain = chain.as_ref();
128
129    // # Correctness
130    //
131    // It is ok to do this lookup in two different calls. Finalized state updates
132    // can only add overlapping blocks, and hashes are unique.
133
134    let tip = tip_height(chain, db)?;
135    let height = height_by_hash(chain, db, hash)?;
136
137    Some(tip.0 - height.0)
138}
139
140/// Returns the location of the block if present in the non-finalized state.
141/// Returns None if the block hash is not found in the non-finalized state.
142pub fn non_finalized_state_contains_block_hash(
143    non_finalized_state: &NonFinalizedState,
144    hash: block::Hash,
145) -> Option<KnownBlock> {
146    let mut chains_iter = non_finalized_state.chain_iter();
147    let is_hash_in_chain = |chain: &Arc<Chain>| chain.contains_block_hash(hash);
148
149    // Equivalent to `chain_set.iter().next_back()` in `NonFinalizedState.best_chain()` method.
150    let best_chain = chains_iter.next();
151
152    match best_chain.map(is_hash_in_chain) {
153        Some(true) => Some(KnownBlock::BestChain),
154        Some(false) if chains_iter.any(is_hash_in_chain) => Some(KnownBlock::SideChain),
155        Some(false) | None => None,
156    }
157}
158
159/// Returns the location of the block if present in the finalized state.
160/// Returns None if the block hash is not found in the finalized state.
161pub fn finalized_state_contains_block_hash(db: &ZebraDb, hash: block::Hash) -> Option<KnownBlock> {
162    db.contains_hash(hash).then_some(KnownBlock::BestChain)
163}
164
165/// Return the height for the block at `hash`, if `hash` is in `chain` or `db`.
166pub fn height_by_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<Height>
167where
168    C: AsRef<Chain>,
169{
170    // # Correctness
171    //
172    // Finalized state updates can only add overlapping blocks, and hashes are unique.
173
174    chain
175        .and_then(|chain| chain.as_ref().height_by_hash(hash))
176        .or_else(|| db.height(hash))
177}
178
179/// Return the hash for the block at `height`, if `height` is in `chain` or `db`.
180pub fn hash_by_height<C>(chain: Option<C>, db: &ZebraDb, height: Height) -> Option<block::Hash>
181where
182    C: AsRef<Chain>,
183{
184    // # Correctness
185    //
186    // Finalized state updates can only add overlapping blocks, and heights are unique
187    // in the current `chain`.
188    //
189    // If there is an overlap between the non-finalized and finalized states,
190    // where the finalized tip is above the non-finalized tip,
191    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
192    // so it is acceptable to return hashes from either chain.
193
194    chain
195        .and_then(|chain| chain.as_ref().hash_by_height(height))
196        .or_else(|| db.hash(height))
197}
198
199/// Return true if `hash` is in `chain` or `db`.
200pub fn chain_contains_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> bool
201where
202    C: AsRef<Chain>,
203{
204    // # Correctness
205    //
206    // Finalized state updates can only add overlapping blocks, and hashes are unique.
207    //
208    // If there is an overlap between the non-finalized and finalized states,
209    // where the finalized tip is above the non-finalized tip,
210    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
211    // so it is acceptable to return hashes from either chain.
212
213    chain
214        .map(|chain| chain.as_ref().height_by_hash.contains_key(&hash))
215        .unwrap_or(false)
216        || db.contains_hash(hash)
217}
218
219/// Create a block locator from `chain` and `db`.
220///
221/// A block locator is used to efficiently find an intersection of two node's chains.
222/// It contains a list of block hashes at decreasing heights, skipping some blocks,
223/// so that any intersection can be located, no matter how long or different the chains are.
224pub fn block_locator<C>(chain: Option<C>, db: &ZebraDb) -> Option<Vec<block::Hash>>
225where
226    C: AsRef<Chain>,
227{
228    let chain = chain.as_ref();
229
230    // # Correctness
231    //
232    // It is ok to do these lookups using multiple database calls. Finalized state updates
233    // can only add overlapping blocks, and hashes are unique.
234    //
235    // If there is an overlap between the non-finalized and finalized states,
236    // where the finalized tip is above the non-finalized tip,
237    // Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
238    // so it is acceptable to return a set of hashes from multiple chains.
239    //
240    // Multiple heights can not map to the same hash, even in different chains,
241    // because the block height is covered by the block hash,
242    // via the transaction merkle tree commitments.
243    let tip_height = tip_height(chain, db)?;
244
245    let heights = block_locator_heights(tip_height);
246    let mut hashes = Vec::with_capacity(heights.len());
247
248    for height in heights {
249        if let Some(hash) = hash_by_height(chain, db, height) {
250            hashes.push(hash);
251        }
252    }
253
254    Some(hashes)
255}
256
257/// Get the heights of the blocks for constructing a block_locator list.
258///
259/// Zebra uses a decreasing list of block heights, starting at the tip, and skipping some heights.
260/// See [`block_locator()`] for details.
261pub fn block_locator_heights(tip_height: block::Height) -> Vec<block::Height> {
262    // The initial height in the returned `vec` is the tip height,
263    // and the final height is `MAX_BLOCK_REORG_HEIGHT` below the tip.
264    //
265    // The initial distance between heights is 1, and it doubles between each subsequent height.
266    // So the number of returned heights is approximately `log_2(MAX_BLOCK_REORG_HEIGHT)`.
267
268    // Limit the maximum locator depth.
269    let min_locator_height = tip_height
270        .0
271        .saturating_sub(constants::MAX_BLOCK_REORG_HEIGHT);
272
273    // Create an exponentially decreasing set of heights.
274    let exponential_locators = iter::successors(Some(1u32), |h| h.checked_mul(2))
275        .flat_map(move |step| tip_height.0.checked_sub(step));
276
277    // Start at the tip, add decreasing heights, and end MAX_BLOCK_REORG_HEIGHT below the tip.
278    let locators = iter::once(tip_height.0)
279        .chain(exponential_locators)
280        .take_while(move |&height| height > min_locator_height)
281        .chain(iter::once(min_locator_height))
282        .map(block::Height)
283        .collect();
284
285    tracing::debug!(
286        ?tip_height,
287        ?min_locator_height,
288        ?locators,
289        "created block locator"
290    );
291
292    locators
293}
294
295/// Find the first hash that's in the peer's `known_blocks`, and in `chain` or `db`.
296///
297/// Returns `None` if:
298///   * there is no matching hash in the chain, or
299///   * the state is empty.
300fn find_chain_intersection<C>(
301    chain: Option<C>,
302    db: &ZebraDb,
303    known_blocks: Vec<block::Hash>,
304) -> Option<block::Hash>
305where
306    C: AsRef<Chain>,
307{
308    // We can get a block locator request before we have downloaded the genesis block
309    if chain.is_none() && db.is_empty() {
310        return None;
311    }
312
313    let chain = chain.as_ref();
314
315    known_blocks
316        .iter()
317        .find(|&&hash| chain_contains_hash(chain, db, hash))
318        .cloned()
319}
320
321/// Returns a range of [`Height`]s in the chain,
322/// starting after the `intersection` hash on the chain.
323///
324/// See [`find_chain_hashes()`] for details.
325fn find_chain_height_range<C>(
326    chain: Option<C>,
327    db: &ZebraDb,
328    intersection: Option<block::Hash>,
329    stop: Option<block::Hash>,
330    max_len: u32,
331) -> impl RangeBounds<u32> + Iterator<Item = u32>
332where
333    C: AsRef<Chain>,
334{
335    #[allow(clippy::reversed_empty_ranges)]
336    const EMPTY_RANGE: RangeInclusive<u32> = 1..=0;
337
338    assert!(max_len > 0, "max_len must be at least 1");
339
340    let chain = chain.as_ref();
341
342    // We can get a block locator request before we have downloaded the genesis block
343    let chain_tip_height = if let Some(height) = tip_height(chain, db) {
344        height
345    } else {
346        tracing::debug!(
347            response_len = ?0,
348            "responding to peer GetBlocks or GetHeaders with empty state",
349        );
350
351        return EMPTY_RANGE;
352    };
353
354    // Find the intersection height
355    let intersection_height = match intersection {
356        Some(intersection_hash) => match height_by_hash(chain, db, intersection_hash) {
357            Some(intersection_height) => Some(intersection_height),
358
359            // A recently committed block dropped the intersection we previously found
360            None => {
361                info!(
362                    ?intersection,
363                    ?stop,
364                    ?max_len,
365                    "state found intersection but then dropped it, ignoring request",
366                );
367                return EMPTY_RANGE;
368            }
369        },
370        // There is no intersection
371        None => None,
372    };
373
374    // Now find the start and maximum heights
375    let (start_height, max_height) = match intersection_height {
376        // start after the intersection_height, and return max_len hashes or headers
377        Some(intersection_height) => (
378            Height(intersection_height.0 + 1),
379            Height(intersection_height.0 + max_len),
380        ),
381        // start at genesis, and return max_len hashes or headers
382        None => (Height(0), Height(max_len - 1)),
383    };
384
385    let stop_height = stop.and_then(|hash| height_by_hash(chain, db, hash));
386
387    // Compute the final height, making sure it is:
388    //   * at or below our chain tip, and
389    //   * at or below the height of the stop hash.
390    let final_height = std::cmp::min(max_height, chain_tip_height);
391    let final_height = stop_height
392        .map(|stop_height| std::cmp::min(final_height, stop_height))
393        .unwrap_or(final_height);
394
395    // TODO: implement Step for Height, when Step stabilises
396    //       https://github.com/rust-lang/rust/issues/42168
397    let height_range = start_height.0..=final_height.0;
398    let response_len = height_range.clone().count();
399
400    tracing::debug!(
401        ?start_height,
402        ?final_height,
403        ?response_len,
404        ?chain_tip_height,
405        ?stop_height,
406        ?intersection_height,
407        ?intersection,
408        ?stop,
409        ?max_len,
410        "responding to peer GetBlocks or GetHeaders",
411    );
412
413    // Check the function implements the Find protocol
414    assert!(
415        response_len <= max_len.try_into().expect("fits in usize"),
416        "a Find response must not exceed the maximum response length",
417    );
418
419    height_range
420}
421
422/// Returns a list of [`block::Hash`]es in the chain,
423/// following the `intersection` with the chain.
424///
425/// See [`find_chain_hashes()`] for details.
426fn collect_chain_hashes<C>(
427    chain: Option<C>,
428    db: &ZebraDb,
429    intersection: Option<block::Hash>,
430    stop: Option<block::Hash>,
431    max_len: u32,
432) -> Vec<block::Hash>
433where
434    C: AsRef<Chain>,
435{
436    let chain = chain.as_ref();
437
438    let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
439
440    // All the hashes should be in the chain.
441    // If they are not, we don't want to return them.
442    let hashes: Vec<block::Hash> = height_range.into_iter().map_while(|height| {
443        let hash = hash_by_height(chain, db, Height(height));
444
445        // A recently committed block dropped the intersection we previously found.
446        if hash.is_none() {
447            info!(
448                ?intersection,
449                ?stop,
450                ?max_len,
451                "state found height range, but then partially dropped it, returning partial response",
452            );
453        }
454
455        tracing::trace!(
456            ?hash,
457            ?height,
458            ?intersection,
459            ?stop,
460            ?max_len,
461            "adding hash to peer Find response",
462        );
463
464        hash
465    }).collect();
466
467    // Check the function implements the Find protocol
468    assert!(
469        intersection
470            .map(|hash| !hashes.contains(&hash))
471            .unwrap_or(true),
472        "the list must not contain the intersection hash",
473    );
474
475    if let (Some(stop), Some((_, hashes_except_last))) = (stop, hashes.split_last()) {
476        assert!(
477            !hashes_except_last.contains(&stop),
478            "if the stop hash is in the list, it must be the final hash",
479        );
480    }
481
482    hashes
483}
484
485/// Returns a list of [`block::Header`]s in the chain,
486/// following the `intersection` with the chain.
487///
488/// See [`find_chain_hashes()`] for details.
489fn collect_chain_headers<C>(
490    chain: Option<C>,
491    db: &ZebraDb,
492    intersection: Option<block::Hash>,
493    stop: Option<block::Hash>,
494    max_len: u32,
495) -> Vec<Arc<block::Header>>
496where
497    C: AsRef<Chain>,
498{
499    let chain = chain.as_ref();
500
501    let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
502
503    // We don't check that this function implements the Find protocol,
504    // because fetching extra hashes (or re-calculating hashes) is expensive.
505    // (This was one of the most expensive and longest-running functions in the state.)
506
507    // All the headers should be in the chain.
508    // If they are not, we don't want to return them.
509    height_range.into_iter().map_while(|height| {
510        let header = block_header(chain, db, Height(height).into());
511
512        // A recently committed block dropped the intersection we previously found
513        if header.is_none() {
514            info!(
515                ?intersection,
516                ?stop,
517                ?max_len,
518                "state found height range, but then partially dropped it, returning partial response",
519            );
520        }
521
522        tracing::trace!(
523            ?height,
524            ?intersection,
525            ?stop,
526            ?max_len,
527            "adding header to peer Find response",
528        );
529
530        header
531    }).collect()
532}
533
534/// Finds the first hash that's in the peer's `known_blocks` and the chain.
535/// Returns a list of hashes that follow that intersection, from the chain.
536///
537/// Starts from the first matching hash in the chain, ignoring all other hashes in
538/// `known_blocks`. If there is no matching hash in the chain, starts from the genesis
539/// hash.
540///
541/// Includes finalized and non-finalized blocks.
542///
543/// Stops the list of hashes after:
544///   * adding the tip,
545///   * adding the `stop` hash to the list, if it is in the chain, or
546///   * adding `max_len` hashes to the list.
547///
548/// Returns an empty list if the state is empty,
549/// and a partial or empty list if the found heights are concurrently modified.
550pub fn find_chain_hashes<C>(
551    chain: Option<C>,
552    db: &ZebraDb,
553    known_blocks: Vec<block::Hash>,
554    stop: Option<block::Hash>,
555    max_len: u32,
556) -> Vec<block::Hash>
557where
558    C: AsRef<Chain>,
559{
560    // # Correctness
561    //
562    // See the note in `block_locator()`.
563
564    let chain = chain.as_ref();
565    let intersection = find_chain_intersection(chain, db, known_blocks);
566
567    collect_chain_hashes(chain, db, intersection, stop, max_len)
568}
569
570/// Finds the first hash that's in the peer's `known_blocks` and the chain.
571/// Returns a list of headers that follow that intersection, from the chain.
572///
573/// See [`find_chain_hashes()`] for details.
574pub fn find_chain_headers<C>(
575    chain: Option<C>,
576    db: &ZebraDb,
577    known_blocks: Vec<block::Hash>,
578    stop: Option<block::Hash>,
579    max_len: u32,
580) -> Vec<Arc<block::Header>>
581where
582    C: AsRef<Chain>,
583{
584    // # Correctness
585    //
586    // Headers are looked up by their hashes using a unique mapping,
587    // so it is not possible for multiple hashes to look up the same header,
588    // even across different chains.
589    //
590    // See also the note in `block_locator()`.
591
592    let chain = chain.as_ref();
593    let intersection = find_chain_intersection(chain, db, known_blocks);
594
595    collect_chain_headers(chain, db, intersection, stop, max_len)
596}
597
598/// Returns the median-time-past of the *next* block to be added to the best chain in
599/// `non_finalized_state` or `db`.
600///
601/// # Panics
602///
603/// - If we don't have enough blocks in the state.
604pub fn next_median_time_past(
605    non_finalized_state: &NonFinalizedState,
606    db: &ZebraDb,
607) -> Result<DateTime32, BoxError> {
608    let mut best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
609
610    // Retry the finalized state query if it was interrupted by a finalizing block.
611    //
612    // TODO: refactor this into a generic retry(finalized_closure, process_and_check_closure) fn
613    for _ in 0..FINALIZED_STATE_QUERY_RETRIES {
614        if best_relevant_chain_result.is_ok() {
615            break;
616        }
617
618        best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
619    }
620
621    Ok(calculate_median_time_past(best_relevant_chain_result?))
622}
623
624/// Do a consistency check by checking the finalized tip before and after all other database queries.
625///
626/// Returns recent blocks in reverse height order from the tip.
627/// Returns an error if the tip obtained before and after is not the same.
628///
629/// # Panics
630///
631/// - If we don't have enough blocks in the state.
632fn best_relevant_chain(
633    non_finalized_state: &NonFinalizedState,
634    db: &ZebraDb,
635) -> Result<Vec<Arc<Block>>, BoxError> {
636    let state_tip_before_queries = read::best_tip(non_finalized_state, db).ok_or_else(|| {
637        BoxError::from("Zebra's state is empty, wait until it syncs to the chain tip")
638    })?;
639
640    let best_relevant_chain =
641        any_ancestor_blocks(non_finalized_state, db, state_tip_before_queries.1);
642    let best_relevant_chain: Vec<_> = best_relevant_chain
643        .into_iter()
644        .take(POW_MEDIAN_BLOCK_SPAN)
645        .collect();
646
647    if best_relevant_chain.is_empty() {
648        return Err("missing genesis block, wait until it is committed".into());
649    };
650
651    let state_tip_after_queries =
652        read::best_tip(non_finalized_state, db).expect("already checked for an empty tip");
653
654    if state_tip_before_queries != state_tip_after_queries {
655        return Err("Zebra is committing too many blocks to the state, \
656                    wait until it syncs to the chain tip"
657            .into());
658    }
659
660    Ok(best_relevant_chain)
661}
662
663/// Returns the median-time-past for the provided `relevant_chain`.
664///
665/// The `relevant_chain` has blocks in reverse height order.
666///
667/// See [`next_median_time_past()`] for details.
668pub(crate) fn calculate_median_time_past(relevant_chain: Vec<Arc<Block>>) -> DateTime32 {
669    let relevant_data: Vec<DateTime<Utc>> = relevant_chain
670        .iter()
671        .map(|block| block.header.time)
672        .collect();
673
674    // > Define the median-time-past of a block to be the median of the nTime fields of the
675    // > preceding PoWMedianBlockSpan blocks (or all preceding blocks if there are fewer than
676    // > PoWMedianBlockSpan). The median-time-past of a genesis block is not defined.
677    // https://zips.z.cash/protocol/protocol.pdf#blockheader
678    let median_time_past = AdjustedDifficulty::median_time(relevant_data);
679
680    DateTime32::try_from(median_time_past).expect("valid blocks have in-range times")
681}