alpm_mtree/
parser.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
use std::path::PathBuf;

use alpm_types::{Md5Checksum, Sha256Checksum};
use winnow::{
    ModalResult,
    Parser as WinnowParser,
    ascii::{digit1, line_ending, space0},
    combinator::{
        alt,
        cut_err,
        eof,
        fail,
        preceded,
        repeat_till,
        separated,
        separated_pair,
        terminated,
    },
    error::{StrContext, StrContextValue},
    stream::AsChar,
    token::{take_until, take_while},
};

use crate::path_decoder::decode_utf8_chars;

/// Each line represents a line in a .MTREE file.
#[derive(Debug, Clone)]
pub enum Statement<'a> {
    /// All lines that're irrelevant and don't contribute anything to the actual mtree file.
    ///
    /// Includes the following:
    /// - Empty lines
    /// - Lines that start with `#` (e.g. `#mtree` line and comments)
    Ignored,
    /// A `/set` command followed by some properties.
    Set(Vec<SetProperty<'a>>),
    /// A `/unset` command followed by some properties.
    Unset(Vec<UnsetProperty>),
    /// Any path statement followed by some properties.
    Path {
        path: PathBuf,
        properties: Vec<PathProperty<'a>>,
    },
}

/// Represents the properties that may be set in `/set` lines.
#[derive(Debug, Clone)]
pub enum SetProperty<'a> {
    Uid(usize),
    Gid(usize),
    Mode(&'a str),
    Type(PathType),
}

/// Represents the properties that can be unset by `/unset` lines.
#[derive(Debug, Clone)]
pub enum UnsetProperty {
    Uid,
    Gid,
    Mode,
    Type,
}

/// This type is used in a path line to define properties for that path.
#[derive(Debug, Clone)]
pub enum PathProperty<'a> {
    Uid(usize),
    Gid(usize),
    Mode(&'a str),
    Type(PathType),
    Size(usize),
    Link(PathBuf),
    Md5Digest(Md5Checksum),
    Sha256Digest(Sha256Checksum),
    Time(usize),
}

/// All allowed kinds of path types.
#[derive(Debug, Clone, Copy)]
pub enum PathType {
    Dir,
    File,
    Link,
}

/// Parse a single `/set` property.
fn set_property<'s>(input: &mut &'s str) -> ModalResult<SetProperty<'s>> {
    // First off, get the type of the property.
    let property_type = cut_err(alt(("uid", "gid", "type", "mode")))
        .context(StrContext::Label("property"))
        .context(StrContext::Expected(StrContextValue::Description(
            "'uid', 'gid' or 'type', 'mode'",
        )))
        .parse_next(input)?;

    // Expect the `=` separator between the key-value pair
    let _ = "=".parse_next(input)?;

    // Now we continue parsing based on the type of the property.
    let property = match property_type {
        "type" => alt(("dir", "file", "link"))
            .map(|value| match value {
                "dir" => SetProperty::Type(PathType::Dir),
                "file" => SetProperty::Type(PathType::File),
                "link" => SetProperty::Type(PathType::Link),
                _ => unreachable!(),
            })
            .context(StrContext::Label("property file type"))
            .context(StrContext::Expected(StrContextValue::Description(
                "'dir', 'file' or 'link'",
            )))
            .parse_next(input)?,
        "uid" => SetProperty::Uid(system_id("user id", input)?),
        "gid" => SetProperty::Gid(system_id("group id", input)?),
        "mode" => SetProperty::Mode(mode(input)?),
        _ => unreachable!(),
    };

    Ok(property)
}

/// Parse a single `/unset` property.
fn unset_property(input: &mut &str) -> ModalResult<UnsetProperty> {
    // First off, get the type of the property.
    let property_type = cut_err(alt(("uid", "gid", "type", "mode")))
        .context(StrContext::Label("property"))
        .context(StrContext::Expected(StrContextValue::Description(
            "'uid', 'gid' or 'type', 'mode'",
        )))
        .parse_next(input)?;

    // Map the parsed property type to the correct enum variant.
    let property = match property_type {
        "type" => UnsetProperty::Type,
        "uid" => UnsetProperty::Uid,
        "gid" => UnsetProperty::Gid,
        "mode" => UnsetProperty::Mode,
        _ => unreachable!(),
    };

    Ok(property)
}

/// Parse a simple system id as usize.
fn system_id(id_type: &'static str, input: &mut &str) -> ModalResult<usize> {
    cut_err(digit1.parse_to())
        .context(StrContext::Label(id_type))
        .context(StrContext::Expected(StrContextValue::Description(
            "a system id.",
        )))
        .parse_next(input)
}

/// Parse a Unix timestamp.
///
/// In mtree, this is a float for some reason, even though the decimal place is always a `0`.
fn timestamp(input: &mut &str) -> ModalResult<usize> {
    let (timestamp, _) = cut_err(separated_pair(digit1.parse_to(), '.', digit1))
        .context(StrContext::Label("unix epoch"))
        .context(StrContext::Expected(StrContextValue::Description(
            "A unix epoch in float notation.",
        )))
        .parse_next(input)?;

    Ok(timestamp)
}

/// Parse a filesystem mode.
///
/// Should be between 3-5 octal numbers **without** a `0o` prefix.
fn mode<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
    cut_err(take_while(3..5, AsChar::is_oct_digit))
        .context(StrContext::Label("file mode"))
        .context(StrContext::Expected(StrContextValue::Description(
            "octal string of length 3-5.",
        )))
        .parse_next(input)
}

/// Parse a SHA-256 hash.
fn sha256(input: &mut &str) -> ModalResult<Sha256Checksum> {
    cut_err(take_while(64.., AsChar::is_hex_digit).parse_to())
        .context(StrContext::Label("sha256 hash"))
        .context(StrContext::Expected(StrContextValue::Description(
            "64 char long hexadecimal string",
        )))
        .parse_next(input)
}

/// Parse an MD5 hash.
fn md5(input: &mut &str) -> ModalResult<Md5Checksum> {
    cut_err(take_while(32.., AsChar::is_hex_digit).parse_to())
        .context(StrContext::Label("md5 hash"))
        .context(StrContext::Expected(StrContextValue::Description(
            "32 char long hexadecimal string",
        )))
        .parse_next(input)
}

/// Consume all chars of a link until a newline or space is hit.
///
/// Check [`decode_utf8_chars`] for more info on how special chars in paths are escaped.
fn link(input: &mut &str) -> ModalResult<String> {
    take_while(0.., |c| c != ' ' && c != '\n')
        .and_then(decode_utf8_chars)
        .parse_next(input)
}

/// Get a string representing a size by consuming all integers.
fn size(input: &mut &str) -> ModalResult<usize> {
    cut_err(take_while(0.., |c| c != ' ' && c != '\n').parse_to())
        .context(StrContext::Label("file size"))
        .context(StrContext::Expected(StrContextValue::Description(
            "a positive integer representing the file's size.",
        )))
        .parse_next(input)
}

/// Parse a single property.
fn property<'s>(input: &mut &'s str) -> ModalResult<PathProperty<'s>> {
    // First off, get the type of the property.
    let property_type = cut_err(alt((
        "type",
        "uid",
        "gid",
        "mode",
        "size",
        "link",
        "md5digest",
        "sha256digest",
        "time",
    )))
    .context(StrContext::Label("file property type"))
    .context(StrContext::Expected(StrContextValue::Description(
        "'type', 'uid', 'gid', 'mode', 'size', 'link', 'md5digest', 'sha256digest' or 'time'",
    )))
    .parse_next(input)?;

    // Expect the `=` separator between the key-value pair
    let _ = "=".parse_next(input)?;

    // Now we continue parsing based on the type of the property.
    let property = match property_type {
        "type" => alt(("dir", "file", "link"))
            .map(|value| match value {
                "dir" => PathProperty::Type(PathType::Dir),
                "file" => PathProperty::Type(PathType::File),
                "link" => PathProperty::Type(PathType::Link),
                _ => unreachable!(),
            })
            .context(StrContext::Label("property file type"))
            .context(StrContext::Expected(StrContextValue::Description(
                "'dir', 'file' or 'link'",
            )))
            .parse_next(input)?,
        "uid" => PathProperty::Uid(system_id("user id", input)?),
        "gid" => PathProperty::Gid(system_id("group id", input)?),
        "mode" => PathProperty::Mode(mode(input)?),
        "size" => PathProperty::Size(size.parse_next(input)?),
        "link" => PathProperty::Link(PathBuf::from(link.parse_next(input)?)),
        "md5digest" => PathProperty::Md5Digest(md5(input)?),
        "sha256digest" => PathProperty::Sha256Digest(sha256(input)?),
        "time" => PathProperty::Time(timestamp(input)?),
        _ => unreachable!(),
    };

    Ok(property)
}

/// Parse all path related properties that follow after a path declaration.
///
/// An example without all possible properties:
/// E.g. `./some_path uid=0 gid=0 type=file`
///                   ↑                   ↑
///                         This part
fn properties<'s>(input: &mut &'s str) -> ModalResult<Vec<PathProperty<'s>>> {
    cut_err(terminated(separated(0.., property, " "), line_ending)).parse_next(input)
}

/// Parse all properties that follow a `/set` command.
///
/// E.g. `/set uid=0 gid=0`
///            ↑         ↑
///             This part
fn set_properties<'s>(input: &mut &'s str) -> ModalResult<Vec<SetProperty<'s>>> {
    cut_err(terminated(separated(0.., set_property, " "), line_ending)).parse_next(input)
}

/// Parse all properties that follow an `/unset` command.
//////
/// E.g. `/unset uid gid`
///              ↑     ↑
///             This part
fn unset_properties(input: &mut &str) -> ModalResult<Vec<UnsetProperty>> {
    cut_err(terminated(separated(0.., unset_property, " "), line_ending)).parse_next(input)
}

/// Parse the next statement in the file.
fn statement<'s>(input: &mut &'s str) -> ModalResult<Statement<'s>> {
    // First, we figure out what kind of line we're looking at.
    let statement_type: String = alt((
        // A Path statement line
        //
        // Path statements may be preceded with whitespaces.
        // Otherwise read the line until terminated by the first space or newline.
        // Whitespace characters are encoded as `\s' (space), `\t' (tab), and `\n' (new line)
        // which is why we can simply ignore those while parsing the path.
        preceded(
            space0,
            terminated((".", take_until(0.., " ")).take(), alt((' ', '\n'))),
        ).and_then(decode_utf8_chars),
        terminated("/set", " ").map(|s: &str| s.to_string()),
        terminated("/unset", " ").map(|s: &str| s.to_string()),
        // A comment line that starts with `#`.
        preceded(("#", take_until(0.., "\n")), line_ending).map(|s: &str| s.to_string()),
        // An empty line that possibly contains spaces.
        preceded(space0, line_ending).map(|s: &str| s.to_string()),
        // If none of the above match, fail hard with a correct error message.
        fail.context(StrContext::Label("statement"))
        .context(StrContext::Expected(StrContextValue::Description(
            "'/set', '/unset', or a relative local path (./some/path) followed by their respective properties.",
        )))
    ))
    .parse_next(input)?;

    // Ignore comments and empty lines.
    if statement_type.trim().is_empty() {
        return Ok(Statement::Ignored);
    }

    // Now parse the properties based on the statement type until the end of line.
    let statement = match statement_type.as_str() {
        "/set" => Statement::Set(set_properties.parse_next(input)?),
        "/unset" => Statement::Unset(unset_properties.parse_next(input)?),
        path => Statement::Path {
            path: PathBuf::from(path),
            properties: properties.parse_next(input)?,
        },
    };

    Ok(statement)
}

/// Parse a given .MTREE file.
///
/// Empty lines and comment lines are returned as `Statement::Ignored`.
/// This is to provide a proper line-based representation of the file, so we can later on provide
/// proper context in error messages during the interpretation step.
///
/// # Errors
///
/// - `Error::ParseError` if a malformed MTREE file is encountered.
pub fn mtree<'s>(input: &mut &'s str) -> ModalResult<Vec<Statement<'s>>> {
    let (statements, _eof): (Vec<Statement<'s>>, _) =
        repeat_till(0.., statement, eof).parse_next(input)?;

    Ok(statements)
}