zebra_chain/block/
merkle.rs

1//! The Bitcoin-inherited Merkle tree of transactions.
2
3use std::{fmt, io::Write};
4
5use hex::{FromHex, ToHex};
6
7use crate::{
8    serialization::sha256d,
9    transaction::{self, Transaction, UnminedTx, UnminedTxId, VerifiedUnminedTx},
10};
11
12#[cfg(any(test, feature = "proptest-impl"))]
13use proptest_derive::Arbitrary;
14
15/// The root of the Bitcoin-inherited transaction Merkle tree, binding the
16/// block header to the transactions in the block.
17///
18/// Note: for V5-onward transactions it does not bind to authorizing data
19/// (signature and proofs) which makes it non-malleable [ZIP-244].
20///
21/// Note that because of a flaw in Bitcoin's design, the `merkle_root` does
22/// not always precisely bind the contents of the block (CVE-2012-2459). It
23/// is sometimes possible for an attacker to create multiple distinct sets of
24/// transactions with the same Merkle root, although only one set will be
25/// valid.
26///
27/// # Malleability
28///
29/// The Bitcoin source code contains the following note:
30///
31/// > WARNING! If you're reading this because you're learning about crypto
32/// > and/or designing a new system that will use merkle trees, keep in mind
33/// > that the following merkle tree algorithm has a serious flaw related to
34/// > duplicate txids, resulting in a vulnerability (CVE-2012-2459).
35/// > The reason is that if the number of hashes in the list at a given time
36/// > is odd, the last one is duplicated before computing the next level (which
37/// > is unusual in Merkle trees). This results in certain sequences of
38/// > transactions leading to the same merkle root. For example, these two
39/// > trees:
40/// >
41/// > ```ascii
42/// >              A               A
43/// >            /  \            /   \
44/// >          B     C         B       C
45/// >         / \    |        / \     / \
46/// >        D   E   F       D   E   F   F
47/// >       / \ / \ / \     / \ / \ / \ / \
48/// >       1 2 3 4 5 6     1 2 3 4 5 6 5 6
49/// > ```
50/// >
51/// > for transaction lists \[1,2,3,4,5,6\] and \[1,2,3,4,5,6,5,6\] (where 5 and
52/// > 6 are repeated) result in the same root hash A (because the hash of both
53/// > of (F) and (F,F) is C).
54/// >
55/// > The vulnerability results from being able to send a block with such a
56/// > transaction list, with the same merkle root, and the same block hash as
57/// > the original without duplication, resulting in failed validation. If the
58/// > receiving node proceeds to mark that block as permanently invalid
59/// > however, it will fail to accept further unmodified (and thus potentially
60/// > valid) versions of the same block. We defend against this by detecting
61/// > the case where we would hash two identical hashes at the end of the list
62/// > together, and treating that identically to the block having an invalid
63/// > merkle root. Assuming no double-SHA256 collisions, this will detect all
64/// > known ways of changing the transactions without affecting the merkle
65/// > root.
66///
67/// This vulnerability does not apply to Zebra, because it does not store invalid
68/// data on disk, and because it does not permanently fail blocks or use an
69/// aggressive anti-DoS mechanism.
70///
71/// [ZIP-244]: https://zips.z.cash/zip-0244
72#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
73#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary, Default))]
74pub struct Root(pub [u8; 32]);
75
76impl fmt::Debug for Root {
77    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
78        f.debug_tuple("Root").field(&hex::encode(self.0)).finish()
79    }
80}
81
82impl From<[u8; 32]> for Root {
83    fn from(hash: [u8; 32]) -> Self {
84        Root(hash)
85    }
86}
87
88impl From<Root> for [u8; 32] {
89    fn from(hash: Root) -> Self {
90        hash.0
91    }
92}
93
94impl Root {
95    /// Return the hash bytes in big-endian byte-order suitable for printing out byte by byte.
96    ///
97    /// Zebra displays transaction and block hashes in big-endian byte-order,
98    /// following the u256 convention set by Bitcoin and zcashd.
99    pub fn bytes_in_display_order(&self) -> [u8; 32] {
100        let mut reversed_bytes = self.0;
101        reversed_bytes.reverse();
102        reversed_bytes
103    }
104
105    /// Convert bytes in big-endian byte-order into a [`merkle::Root`](crate::block::merkle::Root).
106    ///
107    /// Zebra displays transaction and block hashes in big-endian byte-order,
108    /// following the u256 convention set by Bitcoin and zcashd.
109    pub fn from_bytes_in_display_order(bytes_in_display_order: &[u8; 32]) -> Root {
110        let mut internal_byte_order = *bytes_in_display_order;
111        internal_byte_order.reverse();
112
113        Root(internal_byte_order)
114    }
115}
116
117impl ToHex for &Root {
118    fn encode_hex<T: FromIterator<char>>(&self) -> T {
119        self.bytes_in_display_order().encode_hex()
120    }
121
122    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
123        self.bytes_in_display_order().encode_hex_upper()
124    }
125}
126
127impl ToHex for Root {
128    fn encode_hex<T: FromIterator<char>>(&self) -> T {
129        (&self).encode_hex()
130    }
131
132    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
133        (&self).encode_hex_upper()
134    }
135}
136
137impl FromHex for Root {
138    type Error = <[u8; 32] as FromHex>::Error;
139
140    fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {
141        let mut hash = <[u8; 32]>::from_hex(hex)?;
142        hash.reverse();
143
144        Ok(hash.into())
145    }
146}
147
148fn hash(h1: &[u8; 32], h2: &[u8; 32]) -> [u8; 32] {
149    let mut w = sha256d::Writer::default();
150    w.write_all(h1).unwrap();
151    w.write_all(h2).unwrap();
152    w.finish()
153}
154
155fn auth_data_hash(h1: &[u8; 32], h2: &[u8; 32]) -> [u8; 32] {
156    // > Non-leaf hashes in this tree are BLAKE2b-256 hashes personalized by
157    // > the string "ZcashAuthDatHash".
158    // https://zips.z.cash/zip-0244#block-header-changes
159    blake2b_simd::Params::new()
160        .hash_length(32)
161        .personal(b"ZcashAuthDatHash")
162        .to_state()
163        .update(h1)
164        .update(h2)
165        .finalize()
166        .as_bytes()
167        .try_into()
168        .expect("32 byte array")
169}
170
171impl<T> std::iter::FromIterator<T> for Root
172where
173    T: std::convert::AsRef<Transaction>,
174{
175    fn from_iter<I>(transactions: I) -> Self
176    where
177        I: IntoIterator<Item = T>,
178    {
179        transactions
180            .into_iter()
181            .map(|tx| tx.as_ref().hash())
182            .collect()
183    }
184}
185
186impl std::iter::FromIterator<UnminedTx> for Root {
187    fn from_iter<I>(transactions: I) -> Self
188    where
189        I: IntoIterator<Item = UnminedTx>,
190    {
191        transactions
192            .into_iter()
193            .map(|tx| tx.id.mined_id())
194            .collect()
195    }
196}
197
198impl std::iter::FromIterator<UnminedTxId> for Root {
199    fn from_iter<I>(tx_ids: I) -> Self
200    where
201        I: IntoIterator<Item = UnminedTxId>,
202    {
203        tx_ids.into_iter().map(|tx_id| tx_id.mined_id()).collect()
204    }
205}
206
207impl std::iter::FromIterator<VerifiedUnminedTx> for Root {
208    fn from_iter<I>(transactions: I) -> Self
209    where
210        I: IntoIterator<Item = VerifiedUnminedTx>,
211    {
212        transactions
213            .into_iter()
214            .map(|tx| tx.transaction.id.mined_id())
215            .collect()
216    }
217}
218
219impl std::iter::FromIterator<transaction::Hash> for Root {
220    /// # Panics
221    ///
222    /// When there are no transactions in the iterator.
223    /// This is impossible, because every block must have a coinbase transaction.
224    fn from_iter<I>(hashes: I) -> Self
225    where
226        I: IntoIterator<Item = transaction::Hash>,
227    {
228        let mut hashes = hashes.into_iter().map(|hash| hash.0).collect::<Vec<_>>();
229        while hashes.len() > 1 {
230            hashes = hashes
231                .chunks(2)
232                .map(|chunk| match chunk {
233                    [h1, h2] => hash(h1, h2),
234                    [h1] => hash(h1, h1),
235                    _ => unreachable!("chunks(2)"),
236                })
237                .collect();
238        }
239        Self(hashes[0])
240    }
241}
242
243/// The root of the authorizing data Merkle tree, binding the
244/// block header to the authorizing data of the block (signatures, proofs)
245/// as defined in [ZIP-244].
246///
247/// See [`Root`] for an important disclaimer.
248///
249/// [ZIP-244]: https://zips.z.cash/zip-0244
250#[derive(Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
251#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))]
252pub struct AuthDataRoot(pub(crate) [u8; 32]);
253
254impl fmt::Debug for AuthDataRoot {
255    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
256        f.debug_tuple("AuthRoot")
257            .field(&hex::encode(self.0))
258            .finish()
259    }
260}
261
262impl From<[u8; 32]> for AuthDataRoot {
263    fn from(hash: [u8; 32]) -> Self {
264        AuthDataRoot(hash)
265    }
266}
267
268impl From<AuthDataRoot> for [u8; 32] {
269    fn from(hash: AuthDataRoot) -> Self {
270        hash.0
271    }
272}
273
274impl AuthDataRoot {
275    /// Return the hash bytes in big-endian byte-order suitable for printing out byte by byte.
276    ///
277    /// Zebra displays transaction and block hashes in big-endian byte-order,
278    /// following the u256 convention set by Bitcoin and zcashd.
279    pub fn bytes_in_display_order(&self) -> [u8; 32] {
280        let mut reversed_bytes = self.0;
281        reversed_bytes.reverse();
282        reversed_bytes
283    }
284
285    /// Convert bytes in big-endian byte-order into a [`merkle::AuthDataRoot`](crate::block::merkle::AuthDataRoot).
286    ///
287    /// Zebra displays transaction and block hashes in big-endian byte-order,
288    /// following the u256 convention set by Bitcoin and zcashd.
289    pub fn from_bytes_in_display_order(bytes_in_display_order: &[u8; 32]) -> AuthDataRoot {
290        let mut internal_byte_order = *bytes_in_display_order;
291        internal_byte_order.reverse();
292
293        AuthDataRoot(internal_byte_order)
294    }
295}
296
297impl ToHex for &AuthDataRoot {
298    fn encode_hex<T: FromIterator<char>>(&self) -> T {
299        self.bytes_in_display_order().encode_hex()
300    }
301
302    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
303        self.bytes_in_display_order().encode_hex_upper()
304    }
305}
306
307impl ToHex for AuthDataRoot {
308    fn encode_hex<T: FromIterator<char>>(&self) -> T {
309        (&self).encode_hex()
310    }
311
312    fn encode_hex_upper<T: FromIterator<char>>(&self) -> T {
313        (&self).encode_hex_upper()
314    }
315}
316
317impl FromHex for AuthDataRoot {
318    type Error = <[u8; 32] as FromHex>::Error;
319
320    fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error> {
321        let mut hash = <[u8; 32]>::from_hex(hex)?;
322        hash.reverse();
323
324        Ok(hash.into())
325    }
326}
327
328/// The placeholder used for the [`AuthDigest`](transaction::AuthDigest) of pre-v5 transactions.
329///
330/// # Consensus
331///
332/// > For transaction versions before v5, a placeholder value consisting
333/// > of 32 bytes of 0xFF is used in place of the authorizing data commitment.
334/// > This is only used in the tree committed to by hashAuthDataRoot.
335///
336/// <https://zips.z.cash/zip-0244#authorizing-data-commitment>
337pub const AUTH_DIGEST_PLACEHOLDER: transaction::AuthDigest = transaction::AuthDigest([0xFF; 32]);
338
339impl<T> std::iter::FromIterator<T> for AuthDataRoot
340where
341    T: std::convert::AsRef<Transaction>,
342{
343    fn from_iter<I>(transactions: I) -> Self
344    where
345        I: IntoIterator<Item = T>,
346    {
347        transactions
348            .into_iter()
349            .map(|tx| tx.as_ref().auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
350            .collect()
351    }
352}
353
354impl std::iter::FromIterator<UnminedTx> for AuthDataRoot {
355    fn from_iter<I>(transactions: I) -> Self
356    where
357        I: IntoIterator<Item = UnminedTx>,
358    {
359        transactions
360            .into_iter()
361            .map(|tx| tx.id.auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
362            .collect()
363    }
364}
365
366impl std::iter::FromIterator<VerifiedUnminedTx> for AuthDataRoot {
367    fn from_iter<I>(transactions: I) -> Self
368    where
369        I: IntoIterator<Item = VerifiedUnminedTx>,
370    {
371        transactions
372            .into_iter()
373            .map(|tx| {
374                tx.transaction
375                    .id
376                    .auth_digest()
377                    .unwrap_or(AUTH_DIGEST_PLACEHOLDER)
378            })
379            .collect()
380    }
381}
382
383impl std::iter::FromIterator<UnminedTxId> for AuthDataRoot {
384    fn from_iter<I>(tx_ids: I) -> Self
385    where
386        I: IntoIterator<Item = UnminedTxId>,
387    {
388        tx_ids
389            .into_iter()
390            .map(|tx_id| tx_id.auth_digest().unwrap_or(AUTH_DIGEST_PLACEHOLDER))
391            .collect()
392    }
393}
394
395impl std::iter::FromIterator<transaction::AuthDigest> for AuthDataRoot {
396    fn from_iter<I>(hashes: I) -> Self
397    where
398        I: IntoIterator<Item = transaction::AuthDigest>,
399    {
400        let mut hashes = hashes.into_iter().map(|hash| hash.0).collect::<Vec<_>>();
401        // > This new commitment is named hashAuthDataRoot and is the root of a
402        // > binary Merkle tree of transaction authorizing data commitments [...]
403        // > padded with leaves having the "null" hash value [0u8; 32].
404        // https://zips.z.cash/zip-0244#block-header-changes
405        // Pad with enough leaves to make the tree full (a power of 2).
406        let pad_count = hashes.len().next_power_of_two() - hashes.len();
407        hashes.extend(std::iter::repeat_n([0u8; 32], pad_count));
408        assert!(hashes.len().is_power_of_two());
409
410        while hashes.len() > 1 {
411            hashes = hashes
412                .chunks(2)
413                .map(|chunk| match chunk {
414                    [h1, h2] => auth_data_hash(h1, h2),
415                    _ => unreachable!("number of nodes is always even since tree is full"),
416                })
417                .collect();
418        }
419
420        Self(hashes[0])
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    use crate::{block::Block, serialization::ZcashDeserialize, transaction::AuthDigest};
429
430    #[test]
431    fn block_test_vectors() {
432        for block_bytes in zebra_test::vectors::BLOCKS.iter() {
433            let block = Block::zcash_deserialize(&**block_bytes).unwrap();
434            let merkle_root = block.transactions.iter().collect::<Root>();
435            assert_eq!(
436                merkle_root,
437                block.header.merkle_root,
438                "block: {:?} {:?} transaction hashes: {:?}",
439                block.coinbase_height().unwrap(),
440                block.hash(),
441                block
442                    .transactions
443                    .iter()
444                    .map(|tx| tx.hash())
445                    .collect::<Vec<_>>()
446            );
447        }
448    }
449
450    #[test]
451    fn auth_digest() {
452        for block_bytes in zebra_test::vectors::BLOCKS.iter() {
453            let block = Block::zcash_deserialize(&**block_bytes).unwrap();
454            let _auth_root = block.transactions.iter().collect::<AuthDataRoot>();
455            // No test vectors for now, so just check it computes without panicking
456        }
457    }
458
459    #[test]
460    fn auth_data_padding() {
461        // Compute the root of a 3-leaf tree with arbitrary leaves
462        let mut v = vec![
463            AuthDigest([0x42; 32]),
464            AuthDigest([0xAA; 32]),
465            AuthDigest([0x77; 32]),
466        ];
467        let root_3 = v.iter().copied().collect::<AuthDataRoot>();
468
469        // Compute the root a 4-leaf tree with the same leaves as before and
470        // an additional all-zeroes leaf.
471        // Since this is the same leaf used as padding in the previous tree,
472        // then both trees must have the same root.
473        v.push(AuthDigest([0x00; 32]));
474        let root_4 = v.iter().copied().collect::<AuthDataRoot>();
475
476        assert_eq!(root_3, root_4);
477    }
478
479    #[test]
480    fn auth_data_pre_v5() {
481        // Compute the AuthDataRoot for a single transaction of an arbitrary pre-V5 block
482        let block =
483            Block::zcash_deserialize(&**zebra_test::vectors::BLOCK_MAINNET_1046400_BYTES).unwrap();
484        let auth_root = block.transactions.iter().take(1).collect::<AuthDataRoot>();
485
486        // Compute the AuthDataRoot with a single [0xFF; 32] digest.
487        // Since ZIP-244 specifies that this value must be used as the auth digest of
488        // pre-V5 transactions, then the roots must match.
489        let expect_auth_root = [AuthDigest([0xFF; 32])]
490            .iter()
491            .copied()
492            .collect::<AuthDataRoot>();
493
494        assert_eq!(auth_root, expect_auth_root);
495    }
496}