zebra_scan/storage/db/
sapling.rs

1//! Sapling-specific database reading and writing.
2//!
3//! The sapling scanner database has the following format:
4//!
5//! | name               | Reading & Writing Key/Values                    |
6//! |--------------------|-------------------------------------------------|
7//! | [`SAPLING_TX_IDS`] | [`SaplingTxIdsCf`] & [`WriteSaplingTxIdsBatch`] |
8//!
9//! And types:
10//! `SaplingScannedResult`: same as `transaction::Hash`, but with bytes in display order.
11//! `None` is stored as a zero-length array of bytes.
12//!
13//! `SaplingScannedDatabaseIndex` = `SaplingScanningKey` | `TransactionLocation`
14//! `TransactionLocation` = `Height` | `TransactionIndex`
15//!
16//! This format allows us to efficiently find all the results for each key, and the latest height
17//! for each key.
18//!
19//! If there are no results for a height, we store `None` as the result for the coinbase
20//! transaction. This allows is to scan each key from the next height after we restart. We also use
21//! this mechanism to store key birthday heights, by storing the height before the birthday as the
22//! "last scanned" block.
23
24use std::{
25    collections::{BTreeMap, HashMap},
26    ops::RangeBounds,
27};
28
29use itertools::Itertools;
30
31use zebra_chain::block::Height;
32use zebra_state::{
33    DiskWriteBatch, SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
34    SaplingScanningKey, TransactionIndex, TransactionLocation, TypedColumnFamily, WriteTypedBatch,
35};
36
37use crate::storage::{Storage, INSERT_CONTROL_INTERVAL};
38
39/// The name of the sapling transaction IDs result column family.
40///
41/// This constant should be used so the compiler can detect typos.
42pub const SAPLING_TX_IDS: &str = "sapling_tx_ids";
43
44/// The type for reading sapling transaction IDs results from the database.
45///
46/// This constant should be used so the compiler can detect incorrectly typed accesses to the
47/// column family.
48pub type SaplingTxIdsCf<'cf> =
49    TypedColumnFamily<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>>;
50
51/// The type for writing sapling transaction IDs results from the database.
52///
53/// This constant should be used so the compiler can detect incorrectly typed accesses to the
54/// column family.
55pub type WriteSaplingTxIdsBatch<'cf> =
56    WriteTypedBatch<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>, DiskWriteBatch>;
57
58impl Storage {
59    // Reading Sapling database entries
60
61    /// Returns the result for a specific database index (key, block height, transaction index).
62    /// Returns `None` if the result is missing or an empty marker for a birthday or progress
63    /// height.
64    pub fn sapling_result_for_index(
65        &self,
66        index: &SaplingScannedDatabaseIndex,
67    ) -> Option<SaplingScannedResult> {
68        self.sapling_tx_ids_cf().zs_get(index).flatten()
69    }
70
71    /// Returns the results for a specific key and block height.
72    pub fn sapling_results_for_key_and_height(
73        &self,
74        sapling_key: &SaplingScanningKey,
75        height: Height,
76    ) -> BTreeMap<TransactionIndex, Option<SaplingScannedResult>> {
77        let kh_min = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height);
78        let kh_max = SaplingScannedDatabaseIndex::max_for_key_and_height(sapling_key, height);
79
80        self.sapling_results_in_range(kh_min..=kh_max)
81            .into_iter()
82            .map(|(result_index, txid)| (result_index.tx_loc.index, txid))
83            .collect()
84    }
85
86    /// Returns all the results for a specific key, indexed by height.
87    pub fn sapling_results_for_key(
88        &self,
89        sapling_key: &SaplingScanningKey,
90    ) -> BTreeMap<Height, Vec<SaplingScannedResult>> {
91        let k_min = SaplingScannedDatabaseIndex::min_for_key(sapling_key);
92        let k_max = SaplingScannedDatabaseIndex::max_for_key(sapling_key);
93
94        // Get an iterator of individual transaction results, and turn it into a HashMap by height
95        let results: HashMap<Height, Vec<Option<SaplingScannedResult>>> = self
96            .sapling_results_in_range(k_min..=k_max)
97            .into_iter()
98            .map(|(index, result)| (index.tx_loc.height, result))
99            .into_group_map();
100
101        // But we want Vec<SaplingScannedResult>, with empty Vecs instead of [None, None, ...]
102        results
103            .into_iter()
104            .map(|(index, vector)| -> (Height, Vec<SaplingScannedResult>) {
105                (index, vector.into_iter().flatten().collect())
106            })
107            .collect()
108    }
109
110    /// Returns all the keys and their last scanned heights.
111    pub fn sapling_keys_and_last_scanned_heights(&self) -> HashMap<SaplingScanningKey, Height> {
112        let sapling_tx_ids = self.sapling_tx_ids_cf();
113        let mut keys = HashMap::new();
114
115        let mut last_stored_record = sapling_tx_ids.zs_last_key_value();
116
117        while let Some((last_stored_record_index, _result)) = last_stored_record {
118            let sapling_key = last_stored_record_index.sapling_key.clone();
119            let height = last_stored_record_index.tx_loc.height;
120
121            let prev_height = keys.insert(sapling_key.clone(), height);
122            assert_eq!(
123                prev_height, None,
124                "unexpected duplicate key: keys must only be inserted once \
125                 last_stored_record_index: {last_stored_record_index:?}",
126            );
127
128            // Skip all the results until the next key.
129            last_stored_record = sapling_tx_ids.zs_prev_key_value_strictly_before(
130                &SaplingScannedDatabaseIndex::min_for_key(&sapling_key),
131            );
132        }
133
134        keys
135    }
136
137    /// Returns the Sapling indexes and results in the supplied range.
138    ///
139    /// Convenience method for accessing raw data with the correct types.
140    fn sapling_results_in_range(
141        &self,
142        range: impl RangeBounds<SaplingScannedDatabaseIndex>,
143    ) -> BTreeMap<SaplingScannedDatabaseIndex, Option<SaplingScannedResult>> {
144        self.sapling_tx_ids_cf().zs_items_in_range_ordered(range)
145    }
146
147    // Column family convenience methods
148
149    /// Returns a typed handle to the `sapling_tx_ids` column family.
150    pub(crate) fn sapling_tx_ids_cf(&self) -> SaplingTxIdsCf {
151        SaplingTxIdsCf::new(&self.db, SAPLING_TX_IDS)
152            .expect("column family was created when database was created")
153    }
154
155    // Writing database entries
156    //
157    // To avoid exposing internal types, and accidentally forgetting to write a batch,
158    // each pub(crate) write method should write an entire batch.
159
160    /// Inserts a batch of scanned sapling result for a key and height.
161    /// If a result already exists for that key, height, and index, it is replaced.
162    pub fn insert_sapling_results(
163        &mut self,
164        sapling_key: &SaplingScanningKey,
165        height: Height,
166        sapling_results: BTreeMap<TransactionIndex, SaplingScannedResult>,
167    ) {
168        // We skip key heights that have one or more results, so the results for each key height
169        // must be in a single batch.
170        let mut batch = self.sapling_tx_ids_cf().new_batch_for_writing();
171
172        // Every `INSERT_CONTROL_INTERVAL` we add a new entry to the scanner database for each key
173        // so we can track progress made in the last interval even if no transaction was yet found.
174        let needs_control_entry =
175            height.0 % INSERT_CONTROL_INTERVAL == 0 && sapling_results.is_empty();
176
177        // Add scanner progress tracking entry for key.
178        // Defensive programming: add the tracking entry first, so that we don't accidentally
179        // overwrite real results with it. (This is currently prevented by the empty check.)
180        if needs_control_entry {
181            batch = batch.insert_sapling_height(sapling_key, height);
182        }
183
184        for (index, sapling_result) in sapling_results {
185            let index = SaplingScannedDatabaseIndex {
186                sapling_key: sapling_key.clone(),
187                tx_loc: TransactionLocation::from_parts(height, index),
188            };
189
190            let entry = SaplingScannedDatabaseEntry {
191                index,
192                value: Some(sapling_result),
193            };
194
195            batch = batch.zs_insert(&entry.index, &entry.value);
196        }
197
198        batch
199            .write_batch()
200            .expect("unexpected database write failure");
201    }
202
203    /// Insert a sapling scanning `key`, and mark all heights before `birthday_height` so they
204    /// won't be scanned.
205    ///
206    /// If a result already exists for the coinbase transaction at the height before the birthday,
207    /// it is replaced with an empty result. This can happen if the user increases the birthday
208    /// height.
209    ///
210    /// TODO: ignore incorrect changes to birthday heights
211    pub(crate) fn insert_sapling_key(
212        &mut self,
213        sapling_key: &SaplingScanningKey,
214        birthday_height: Option<Height>,
215    ) {
216        let min_birthday_height = self.network().sapling_activation_height();
217
218        // The birthday height must be at least the minimum height for that pool.
219        let birthday_height = birthday_height
220            .unwrap_or(min_birthday_height)
221            .max(min_birthday_height);
222        // And we want to skip up to the height before it.
223        let skip_up_to_height = birthday_height.previous().unwrap_or(Height::MIN);
224
225        // It's ok to write some keys and not others during shutdown, so each key can get its own
226        // batch. (They will be re-written on startup anyway.)
227        //
228        // TODO: ignore incorrect changes to birthday heights,
229        //       and redundant birthday heights
230        self.sapling_tx_ids_cf()
231            .new_batch_for_writing()
232            .insert_sapling_height(sapling_key, skip_up_to_height)
233            .write_batch()
234            .expect("unexpected database write failure");
235    }
236
237    /// Delete the sapling keys and their results, if they exist,
238    pub fn delete_sapling_keys(&mut self, keys: Vec<SaplingScanningKey>) {
239        self.sapling_tx_ids_cf()
240            .new_batch_for_writing()
241            .delete_sapling_keys(keys)
242            .write_batch()
243            .expect("unexpected database write failure");
244    }
245
246    /// Delete the results of sapling scanning `keys`, if they exist
247    pub(crate) fn delete_sapling_results(&mut self, keys: Vec<SaplingScanningKey>) {
248        let mut batch = self
249            .sapling_tx_ids_cf()
250            .new_batch_for_writing()
251            .delete_sapling_keys(keys.clone());
252
253        for key in &keys {
254            batch = batch.insert_sapling_height(key, Height::MIN);
255        }
256
257        batch
258            .write_batch()
259            .expect("unexpected database write failure");
260    }
261}
262
263/// Utility trait for inserting sapling heights into a WriteSaplingTxIdsBatch.
264trait InsertSaplingHeight {
265    fn insert_sapling_height(self, sapling_key: &SaplingScanningKey, height: Height) -> Self;
266}
267
268impl InsertSaplingHeight for WriteSaplingTxIdsBatch<'_> {
269    /// Insert sapling height with no results.
270    ///
271    /// If a result already exists for the coinbase transaction at that height,
272    /// it is replaced with an empty result. This should never happen.
273    fn insert_sapling_height(self, sapling_key: &SaplingScanningKey, height: Height) -> Self {
274        let index = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height);
275
276        // TODO: assert that we don't overwrite any entries here.
277        self.zs_insert(&index, &None)
278    }
279}
280
281/// Utility trait for deleting sapling keys in a WriteSaplingTxIdsBatch.
282trait DeleteSaplingKeys {
283    fn delete_sapling_keys(self, sapling_key: Vec<SaplingScanningKey>) -> Self;
284}
285
286impl DeleteSaplingKeys for WriteSaplingTxIdsBatch<'_> {
287    /// Delete sapling keys and their results.
288    fn delete_sapling_keys(mut self, sapling_keys: Vec<SaplingScanningKey>) -> Self {
289        for key in &sapling_keys {
290            let from_index = SaplingScannedDatabaseIndex::min_for_key(key);
291            let until_strictly_before_index = SaplingScannedDatabaseIndex::max_for_key(key);
292
293            self = self
294                .zs_delete_range(&from_index, &until_strictly_before_index)
295                // TODO: convert zs_delete_range() to take std::ops::RangeBounds
296                .zs_delete(&until_strictly_before_index);
297        }
298
299        self
300    }
301}