alpm_mtree/
parser.rs

1use std::path::PathBuf;
2
3use alpm_parsers::iter_str_context;
4use alpm_types::{Md5Checksum, Sha256Checksum};
5use winnow::{
6    ModalResult,
7    Parser as WinnowParser,
8    ascii::{digit1, line_ending, space0},
9    combinator::{
10        alt,
11        cut_err,
12        eof,
13        fail,
14        preceded,
15        repeat_till,
16        separated,
17        separated_pair,
18        terminated,
19    },
20    error::{StrContext, StrContextValue},
21    stream::AsChar,
22    token::{take_until, take_while},
23};
24
25use crate::path_decoder::decode_utf8_chars;
26
27/// Each line represents a line in a .MTREE file.
28#[derive(Clone, Debug)]
29pub enum Statement<'a> {
30    /// All lines that're irrelevant and don't contribute anything to the actual mtree file.
31    ///
32    /// Includes the following:
33    /// - Empty lines
34    /// - Lines that start with `#` (e.g. `#mtree` line and comments)
35    Ignored,
36    /// A `/set` command followed by some properties.
37    Set(Vec<SetProperty<'a>>),
38    /// A `/unset` command followed by some properties.
39    Unset(Vec<UnsetProperty>),
40    /// Any path statement followed by some properties.
41    Path {
42        /// The path.
43        path: PathBuf,
44        /// The properties of the path.
45        properties: Vec<PathProperty<'a>>,
46    },
47}
48
49/// Represents the properties that may be set in `/set` lines.
50#[derive(Clone, Debug)]
51pub enum SetProperty<'a> {
52    /// A user ID.
53    Uid(u32),
54    /// A group ID.
55    Gid(u32),
56    /// A file mode.
57    Mode(&'a str),
58    /// A path type.
59    Type(PathType),
60}
61
62/// Represents the properties that can be unset by `/unset` lines.
63#[derive(Clone, Debug)]
64pub enum UnsetProperty {
65    /// A user ID.
66    Uid,
67    /// A group ID.
68    Gid,
69    /// A file mode.
70    Mode,
71    /// A path type.
72    Type,
73}
74
75/// This type is used in a path line to define properties for that path.
76#[derive(Clone, Debug)]
77pub enum PathProperty<'a> {
78    /// A user ID.
79    Uid(u32),
80    /// A group ID.
81    Gid(u32),
82    /// A file mode.
83    Mode(&'a str),
84    /// A path type.
85    Type(PathType),
86    /// A file size.
87    Size(u64),
88    /// The target of a symlink.
89    Link(PathBuf),
90    /// An MD-5 hash digest.
91    Md5Digest(Md5Checksum),
92    /// A SHA-256 hash digest.
93    Sha256Digest(Sha256Checksum),
94    /// A point in time in seconds since the epoch.
95    Time(i64),
96}
97
98/// All allowed kinds of path types.
99#[derive(Clone, Copy, Debug)]
100pub enum PathType {
101    /// A directory.
102    Dir,
103    /// A file.
104    File,
105    /// A symlink.
106    Link,
107}
108
109/// Parse a single `/set` property.
110fn set_property<'s>(input: &mut &'s str) -> ModalResult<SetProperty<'s>> {
111    // First off, get the type of the property.
112    let keywords = ["uid", "gid", "type", "mode"];
113    let property_type = cut_err(alt(keywords))
114        .context(StrContext::Label("property"))
115        .context_with(iter_str_context!([keywords]))
116        .parse_next(input)?;
117
118    // Expect the `=` separator between the key-value pair
119    let _ = "=".parse_next(input)?;
120
121    // Now we continue parsing based on the type of the property.
122    let property = match property_type {
123        "type" => {
124            let path_types = ["dir", "file", "link"];
125            alt(path_types)
126                .map(|value| match value {
127                    "dir" => SetProperty::Type(PathType::Dir),
128                    "file" => SetProperty::Type(PathType::File),
129                    "link" => SetProperty::Type(PathType::Link),
130                    _ => unreachable!(),
131                })
132                .context(StrContext::Label("property file type"))
133                .context_with(iter_str_context!([path_types]))
134                .parse_next(input)?
135        }
136        "uid" => SetProperty::Uid(system_id("user id", input)?),
137        "gid" => SetProperty::Gid(system_id("group id", input)?),
138        "mode" => SetProperty::Mode(mode(input)?),
139        _ => unreachable!(),
140    };
141
142    Ok(property)
143}
144
145/// Parse a single `/unset` property.
146fn unset_property(input: &mut &str) -> ModalResult<UnsetProperty> {
147    // First off, get the type of the property.
148    let keywords = ["uid", "gid", "type", "mode"];
149    let property_type = cut_err(alt(keywords))
150        .context(StrContext::Label("property"))
151        .context_with(iter_str_context!([keywords]))
152        .parse_next(input)?;
153
154    // Map the parsed property type to the correct enum variant.
155    let property = match property_type {
156        "type" => UnsetProperty::Type,
157        "uid" => UnsetProperty::Uid,
158        "gid" => UnsetProperty::Gid,
159        "mode" => UnsetProperty::Mode,
160        _ => unreachable!(),
161    };
162
163    Ok(property)
164}
165
166/// Parse a simple system id as usize.
167fn system_id(id_type: &'static str, input: &mut &str) -> ModalResult<u32> {
168    cut_err(digit1.parse_to())
169        .context(StrContext::Label(id_type))
170        .context(StrContext::Expected(StrContextValue::Description(
171            "a system id.",
172        )))
173        .parse_next(input)
174}
175
176/// Parse a Unix timestamp.
177///
178/// In mtree, this is a float for some reason, even though the decimal place is always a `0`.
179fn timestamp(input: &mut &str) -> ModalResult<i64> {
180    let (timestamp, _) = cut_err(separated_pair(digit1.parse_to(), '.', digit1))
181        .context(StrContext::Label("unix epoch"))
182        .context(StrContext::Expected(StrContextValue::Description(
183            "A unix epoch in float notation.",
184        )))
185        .parse_next(input)?;
186
187    Ok(timestamp)
188}
189
190/// Parse a filesystem mode.
191///
192/// Should be between 3-5 octal numbers **without** a `0o` prefix.
193fn mode<'s>(input: &mut &'s str) -> ModalResult<&'s str> {
194    cut_err(take_while(3..5, AsChar::is_oct_digit))
195        .context(StrContext::Label("file mode"))
196        .context(StrContext::Expected(StrContextValue::Description(
197            "octal string of length 3-5.",
198        )))
199        .parse_next(input)
200}
201
202/// Parse a SHA-256 hash.
203fn sha256(input: &mut &str) -> ModalResult<Sha256Checksum> {
204    cut_err(take_while(64.., AsChar::is_hex_digit).parse_to())
205        .context(StrContext::Label("sha256 hash"))
206        .context(StrContext::Expected(StrContextValue::Description(
207            "64 char long hexadecimal string",
208        )))
209        .parse_next(input)
210}
211
212/// Parse an MD5 hash.
213fn md5(input: &mut &str) -> ModalResult<Md5Checksum> {
214    cut_err(take_while(32.., AsChar::is_hex_digit).parse_to())
215        .context(StrContext::Label("md5 hash"))
216        .context(StrContext::Expected(StrContextValue::Description(
217            "32 char long hexadecimal string",
218        )))
219        .parse_next(input)
220}
221
222/// Consume all chars of a link until a newline or space is hit.
223///
224/// Check [`decode_utf8_chars`] for more info on how special chars in paths are escaped.
225fn link(input: &mut &str) -> ModalResult<String> {
226    take_while(0.., |c| c != ' ' && c != '\n')
227        .and_then(decode_utf8_chars)
228        .parse_next(input)
229}
230
231/// Get a string representing a size by consuming all integers.
232fn size(input: &mut &str) -> ModalResult<u64> {
233    cut_err(take_while(0.., |c| c != ' ' && c != '\n').parse_to())
234        .context(StrContext::Label("file size"))
235        .context(StrContext::Expected(StrContextValue::Description(
236            "a positive integer representing the file's size.",
237        )))
238        .parse_next(input)
239}
240
241/// Parse a single property.
242fn property<'s>(input: &mut &'s str) -> ModalResult<PathProperty<'s>> {
243    // First off, get the type of the property.
244    let keywords = [
245        "type",
246        "uid",
247        "gid",
248        "mode",
249        "size",
250        "link",
251        "md5digest",
252        "sha256digest",
253        "time",
254    ];
255    let property_type = cut_err(alt(keywords))
256        .context(StrContext::Label("file property type"))
257        .context_with(iter_str_context!([keywords]))
258        .parse_next(input)?;
259
260    // Expect the `=` separator between the key-value pair
261    let _ = "=".parse_next(input)?;
262
263    // Now we continue parsing based on the type of the property.
264    let property = match property_type {
265        "type" => alt(("dir", "file", "link"))
266            .map(|value| match value {
267                "dir" => PathProperty::Type(PathType::Dir),
268                "file" => PathProperty::Type(PathType::File),
269                "link" => PathProperty::Type(PathType::Link),
270                _ => unreachable!(),
271            })
272            .context(StrContext::Label("property file type"))
273            .context(StrContext::Expected(StrContextValue::Description(
274                "'dir', 'file' or 'link'",
275            )))
276            .parse_next(input)?,
277        "uid" => PathProperty::Uid(system_id("user id", input)?),
278        "gid" => PathProperty::Gid(system_id("group id", input)?),
279        "mode" => PathProperty::Mode(mode(input)?),
280        "size" => PathProperty::Size(size.parse_next(input)?),
281        "link" => PathProperty::Link(PathBuf::from(link.parse_next(input)?)),
282        "md5digest" => PathProperty::Md5Digest(md5(input)?),
283        "sha256digest" => PathProperty::Sha256Digest(sha256(input)?),
284        "time" => PathProperty::Time(timestamp(input)?),
285        _ => unreachable!(),
286    };
287
288    Ok(property)
289}
290
291/// Parse all path related properties that follow after a path declaration.
292///
293/// An example without all possible properties:
294/// E.g. `./some_path uid=0 gid=0 type=file`
295///                   ↑                   ↑
296///                         This part
297fn properties<'s>(input: &mut &'s str) -> ModalResult<Vec<PathProperty<'s>>> {
298    cut_err(terminated(separated(0.., property, " "), line_ending)).parse_next(input)
299}
300
301/// Parse all properties that follow a `/set` command.
302///
303/// E.g. `/set uid=0 gid=0`
304///            ↑         ↑
305///             This part
306fn set_properties<'s>(input: &mut &'s str) -> ModalResult<Vec<SetProperty<'s>>> {
307    cut_err(terminated(separated(0.., set_property, " "), line_ending)).parse_next(input)
308}
309
310/// Parse all properties that follow an `/unset` command.
311//////
312/// E.g. `/unset uid gid`
313///              ↑     ↑
314///             This part
315fn unset_properties(input: &mut &str) -> ModalResult<Vec<UnsetProperty>> {
316    cut_err(terminated(separated(0.., unset_property, " "), line_ending)).parse_next(input)
317}
318
319/// Parse the next statement in the file.
320fn statement<'s>(input: &mut &'s str) -> ModalResult<Statement<'s>> {
321    // First, we figure out what kind of line we're looking at.
322    let statement_type: String = alt((
323        // A Path statement line
324        //
325        // Path statements may be preceded with whitespaces.
326        // Otherwise read the line until terminated by the first space or newline.
327        // Whitespace characters are encoded as `\s' (space), `\t' (tab), and `\n' (new line)
328        // which is why we can simply ignore those while parsing the path.
329        preceded(
330            space0,
331            terminated((".", take_until(0.., " ")).take(), alt((' ', '\n'))),
332        ).and_then(decode_utf8_chars),
333        terminated("/set", " ").map(|s: &str| s.to_string()),
334        terminated("/unset", " ").map(|s: &str| s.to_string()),
335        // A comment line that starts with `#`.
336        preceded(("#", take_until(0.., "\n")), line_ending).map(|s: &str| s.to_string()),
337        // An empty line that possibly contains spaces.
338        preceded(space0, line_ending).map(|s: &str| s.to_string()),
339        // If none of the above match, fail hard with a correct error message.
340        fail.context(StrContext::Label("statement"))
341        .context(StrContext::Expected(StrContextValue::Description(
342            "'/set', '/unset', or a relative local path (./some/path) followed by their respective properties.",
343        )))
344    ))
345    .parse_next(input)?;
346
347    // Ignore comments and empty lines.
348    if statement_type.trim().is_empty() {
349        return Ok(Statement::Ignored);
350    }
351
352    // Now parse the properties based on the statement type until the end of line.
353    let statement = match statement_type.as_str() {
354        "/set" => Statement::Set(set_properties.parse_next(input)?),
355        "/unset" => Statement::Unset(unset_properties.parse_next(input)?),
356        path => Statement::Path {
357            path: PathBuf::from(path),
358            properties: properties.parse_next(input)?,
359        },
360    };
361
362    Ok(statement)
363}
364
365/// Parse a given .MTREE file.
366///
367/// Empty lines and comment lines are returned as `Statement::Ignored`.
368/// This is to provide a proper line-based representation of the file, so we can later on provide
369/// proper context in error messages during the interpretation step.
370///
371/// # Errors
372///
373/// - `Error::ParseError` if a malformed MTREE file is encountered.
374pub fn mtree<'s>(input: &mut &'s str) -> ModalResult<Vec<Statement<'s>>> {
375    let (statements, _eof): (Vec<Statement<'s>>, _) =
376        repeat_till(0.., statement, eof).parse_next(input)?;
377
378    Ok(statements)
379}