1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
//! Substrings proofs based on commitments.

use crate::{
    commitment::{
        Commitment, CommitmentId, CommitmentInfo, CommitmentKind, CommitmentOpening,
        TranscriptCommitments,
    },
    merkle::MerkleProof,
    transcript::get_value_ids,
    Direction, EncodingId, RedactedTranscript, SessionHeader, Transcript, TranscriptSlice,
    MAX_TOTAL_COMMITTED_DATA,
};
use mpz_circuits::types::ValueType;
use mpz_garble_core::Encoder;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use utils::range::{RangeDisjoint, RangeSet, RangeUnion, ToRangeSet};

/// An error for [`SubstringsProofBuilder`]
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum SubstringsProofBuilderError {
    /// Invalid commitment id.
    #[error("invalid commitment id: {0:?}")]
    InvalidCommitmentId(CommitmentId),
    /// Missing commitment.
    #[error("missing commitment")]
    MissingCommitment,
    /// Invalid commitment type.
    #[error("commitment {0:?} is not a substrings commitment")]
    InvalidCommitmentType(CommitmentId),
    /// Attempted to add a commitment with a duplicate id.
    #[error("commitment with id {0:?} already exists")]
    DuplicateCommitmentId(CommitmentId),
}

/// A builder for [`SubstringsProof`]
pub struct SubstringsProofBuilder<'a> {
    commitments: &'a TranscriptCommitments,
    transcript_tx: &'a Transcript,
    transcript_rx: &'a Transcript,
    openings: HashMap<CommitmentId, (CommitmentInfo, CommitmentOpening)>,
}

opaque_debug::implement!(SubstringsProofBuilder<'_>);

impl<'a> SubstringsProofBuilder<'a> {
    /// Creates a new builder.
    pub fn new(
        commitments: &'a TranscriptCommitments,
        transcript_tx: &'a Transcript,
        transcript_rx: &'a Transcript,
    ) -> Self {
        Self {
            commitments,
            transcript_tx,
            transcript_rx,
            openings: HashMap::default(),
        }
    }

    /// Returns a reference to the commitments.
    pub fn commitments(&self) -> &TranscriptCommitments {
        self.commitments
    }

    /// Reveals data corresponding to the provided ranges in the sent direction.
    pub fn reveal_sent(
        &mut self,
        ranges: &dyn ToRangeSet<usize>,
        commitment_kind: CommitmentKind,
    ) -> Result<&mut Self, SubstringsProofBuilderError> {
        self.reveal(ranges, Direction::Sent, commitment_kind)
    }

    /// Reveals data corresponding to the provided transcript subsequence in the received direction.
    pub fn reveal_recv(
        &mut self,
        ranges: &dyn ToRangeSet<usize>,
        commitment_kind: CommitmentKind,
    ) -> Result<&mut Self, SubstringsProofBuilderError> {
        self.reveal(ranges, Direction::Received, commitment_kind)
    }

    /// Reveals data corresponding to the provided ranges and direction.
    pub fn reveal(
        &mut self,
        ranges: &dyn ToRangeSet<usize>,
        direction: Direction,
        commitment_kind: CommitmentKind,
    ) -> Result<&mut Self, SubstringsProofBuilderError> {
        let com = self
            .commitments
            .get_id_by_info(commitment_kind, &ranges.to_range_set(), direction)
            .ok_or(SubstringsProofBuilderError::MissingCommitment)?;

        self.reveal_by_id(com)
    }

    /// Reveals data corresponding to the provided commitment id
    pub fn reveal_by_id(
        &mut self,
        id: CommitmentId,
    ) -> Result<&mut Self, SubstringsProofBuilderError> {
        let commitment = self
            .commitments()
            .get(&id)
            .ok_or(SubstringsProofBuilderError::InvalidCommitmentId(id))?;

        let info = self
            .commitments()
            .get_info(&id)
            .expect("info exists if commitment exists");

        #[allow(irrefutable_let_patterns)]
        let Commitment::Blake3(commitment) = commitment
        else {
            return Err(SubstringsProofBuilderError::InvalidCommitmentType(id));
        };

        let transcript = match info.direction() {
            Direction::Sent => self.transcript_tx,
            Direction::Received => self.transcript_rx,
        };

        let data = transcript.get_bytes_in_ranges(info.ranges());

        // add commitment to openings and return an error if it is already present
        if self
            .openings
            .insert(id, (info.clone(), commitment.open(data).into()))
            .is_some()
        {
            return Err(SubstringsProofBuilderError::DuplicateCommitmentId(id));
        }

        Ok(self)
    }

    /// Builds the [`SubstringsProof`]
    pub fn build(self) -> Result<SubstringsProof, SubstringsProofBuilderError> {
        let Self {
            commitments,
            openings,
            ..
        } = self;

        let mut indices = openings
            .keys()
            .map(|id| id.to_inner() as usize)
            .collect::<Vec<_>>();
        indices.sort();

        let inclusion_proof = commitments.merkle_tree().proof(&indices);

        Ok(SubstringsProof {
            openings,
            inclusion_proof,
        })
    }
}

/// An error relating to [`SubstringsProof`]
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum SubstringsProofError {
    /// The proof contains more data than the maximum allowed.
    #[error(
        "substrings proof opens more data than the maximum allowed: {0} > {}",
        MAX_TOTAL_COMMITTED_DATA
    )]
    MaxDataExceeded(usize),
    /// The proof contains duplicate transcript data.
    #[error("proof contains duplicate transcript data")]
    DuplicateData(Direction, RangeSet<usize>),
    /// Range of the opening is out of bounds.
    #[error("range of opening {0:?} is out of bounds: {1}")]
    RangeOutOfBounds(CommitmentId, usize),
    /// The proof contains an invalid commitment opening.
    #[error("invalid opening for commitment id: {0:?}")]
    InvalidOpening(CommitmentId),
    /// The proof contains an invalid inclusion proof.
    #[error("invalid inclusion proof: {0}")]
    InvalidInclusionProof(String),
}

/// A substring proof using commitments
///
/// This substring proof contains the commitment openings and a proof
/// that the corresponding commitments are present in the merkle tree.
#[derive(Serialize, Deserialize)]
pub struct SubstringsProof {
    openings: HashMap<CommitmentId, (CommitmentInfo, CommitmentOpening)>,
    inclusion_proof: MerkleProof,
}

opaque_debug::implement!(SubstringsProof);

impl SubstringsProof {
    /// Verifies this proof and, if successful, returns the redacted sent and received transcripts.
    ///
    /// # Arguments
    ///
    /// * `header` - The session header.
    pub fn verify(
        self,
        header: &SessionHeader,
    ) -> Result<(RedactedTranscript, RedactedTranscript), SubstringsProofError> {
        let Self {
            openings,
            inclusion_proof,
        } = self;

        let mut indices = Vec::with_capacity(openings.len());
        let mut expected_hashes = Vec::with_capacity(openings.len());
        let mut sent = vec![0u8; header.sent_len()];
        let mut recv = vec![0u8; header.recv_len()];
        let mut sent_ranges = RangeSet::default();
        let mut recv_ranges = RangeSet::default();
        let mut total_opened = 0u128;
        for (id, (info, opening)) in openings {
            let CommitmentInfo {
                ranges, direction, ..
            } = info;

            let opened_len = ranges.len();

            // Make sure the amount of data being proved is bounded.
            total_opened += opened_len as u128;
            if total_opened > MAX_TOTAL_COMMITTED_DATA as u128 {
                return Err(SubstringsProofError::MaxDataExceeded(total_opened as usize));
            }

            // Make sure the opening length matches the ranges length.
            if opening.data().len() != opened_len {
                return Err(SubstringsProofError::InvalidOpening(id));
            }

            // Make sure duplicate data is not opened.
            match direction {
                Direction::Sent => {
                    if !sent_ranges.is_disjoint(&ranges) {
                        return Err(SubstringsProofError::DuplicateData(direction, ranges));
                    }
                    sent_ranges = sent_ranges.union(&ranges);
                }
                Direction::Received => {
                    if !recv_ranges.is_disjoint(&ranges) {
                        return Err(SubstringsProofError::DuplicateData(direction, ranges));
                    }
                    recv_ranges = recv_ranges.union(&ranges);
                }
            }

            // Make sure the ranges are within the bounds of the transcript
            let max = ranges
                .max()
                .ok_or(SubstringsProofError::InvalidOpening(id))?;
            let transcript_len = match direction {
                Direction::Sent => header.sent_len(),
                Direction::Received => header.recv_len(),
            };

            if max > transcript_len {
                return Err(SubstringsProofError::RangeOutOfBounds(id, max));
            }

            // Generate the expected encodings for the purported data in the opening.
            let encodings = get_value_ids(&ranges, direction)
                .map(|id| {
                    header
                        .encoder()
                        .encode_by_type(EncodingId::new(&id).to_inner(), &ValueType::U8)
                })
                .collect::<Vec<_>>();

            // Compute the expected hash of the commitment to make sure it is
            // present in the merkle tree.
            indices.push(id.to_inner() as usize);
            expected_hashes.push(opening.recover(&encodings).hash());

            // Make sure the length of data from the opening matches the commitment.
            let mut data = opening.into_data();
            if data.len() != ranges.len() {
                return Err(SubstringsProofError::InvalidOpening(id));
            }

            let dest = match direction {
                Direction::Sent => &mut sent,
                Direction::Received => &mut recv,
            };

            // Iterate over the ranges backwards, copying the data from the opening
            // then truncating it.
            for range in ranges.iter_ranges().rev() {
                let start = data.len() - range.len();
                dest[range].copy_from_slice(&data[start..]);
                data.truncate(start);
            }
        }

        // Verify that the expected hashes are present in the merkle tree.
        //
        // This proves the Prover committed to the purported data prior to the encoder
        // seed being revealed.
        inclusion_proof
            .verify(header.merkle_root(), &indices, &expected_hashes)
            .map_err(|e| SubstringsProofError::InvalidInclusionProof(e.to_string()))?;

        // Iterate over the unioned ranges and create TranscriptSlices for each.
        // This ensures that the slices are sorted and disjoint.
        let sent_slices = sent_ranges
            .iter_ranges()
            .map(|range| TranscriptSlice::new(range.clone(), sent[range].to_vec()))
            .collect();
        let recv_slices = recv_ranges
            .iter_ranges()
            .map(|range| TranscriptSlice::new(range.clone(), recv[range].to_vec()))
            .collect();

        Ok((
            RedactedTranscript::new(header.sent_len(), sent_slices),
            RedactedTranscript::new(header.recv_len(), recv_slices),
        ))
    }
}