alpm_repo_db/files/
v1.rs

1//! The representation of [alpm-repo-files] files (version 1).
2//!
3//! [alpm-repo-files]: https://alpm.archlinux.page/specifications/alpm-repo-files.5.html
4
5use std::{collections::HashSet, fmt::Display, path::PathBuf, str::FromStr};
6
7use alpm_common::relative_files;
8use alpm_types::RelativePath;
9use fluent_i18n::t;
10use winnow::{
11    ModalResult,
12    Parser,
13    ascii::{line_ending, multispace0, space1, till_line_ending},
14    combinator::{alt, cut_err, eof, fail, not, opt, repeat, terminated},
15    error::{StrContext, StrContextValue},
16};
17
18use crate::files::Error;
19
20/// The raw data section in [alpm-repo-files] data.
21///
22/// [alpm-repo-files]: https://alpm.archlinux.page/specifications/alpm-repo-files.5.html
23#[derive(Debug)]
24pub(crate) struct FilesSection(Vec<RelativePath>);
25
26impl FilesSection {
27    /// The section keyword ("%FILES%").
28    pub(crate) const SECTION_KEYWORD: &str = "%FILES%";
29
30    /// Recognizes a [`RelativePath`] in a single line.
31    ///
32    /// # Note
33    ///
34    /// This parser only consumes till the end of a line and attempts to parse a [`RelativePath`]
35    /// from it. Trailing line endings and EOF are handled.
36    ///
37    /// # Errors
38    ///
39    /// Returns an error if a [`RelativePath`] cannot be created from the line, or something other
40    /// than a line ending or EOF is encountered afterwards.
41    fn parse_path(input: &mut &str) -> ModalResult<RelativePath> {
42        // Parse until the end of the line and attempt conversion to RelativePath.
43        // Make sure that the string is not empty!
44        alt((
45            (space1, line_ending)
46                .take()
47                .and_then(cut_err(fail))
48                .context(StrContext::Expected(StrContextValue::Description(
49                    "relative path not consisting of whitespaces and/or tabs",
50                ))),
51            till_line_ending,
52        ))
53        .verify(|s: &str| !s.is_empty())
54        .context(StrContext::Label("relative path"))
55        .parse_to()
56        .parse_next(input)
57    }
58
59    /// Recognizes [alpm-repo-files] data in a string slice.
60    ///
61    /// # Errors
62    ///
63    /// Returns an error, if
64    ///
65    /// - the first line does not contain the required section header "%FILES%",
66    /// - or there are lines following the section header, but they cannot be parsed as a [`Vec`] of
67    ///   [`RelativePath`].
68    ///
69    /// [alpm-repo-files]: https://alpm.archlinux.page/specifications/alpm-repo-files.5.html
70    pub(crate) fn parser(input: &mut &str) -> ModalResult<Self> {
71        // Consume the required section header "%FILES%".
72        // Optionally consume one following line ending.
73        cut_err(terminated(Self::SECTION_KEYWORD, alt((line_ending, eof))))
74            .context(StrContext::Label("alpm-repo-files section header"))
75            .context(StrContext::Expected(StrContextValue::Description(
76                Self::SECTION_KEYWORD,
77            )))
78            .parse_next(input)?;
79
80        // Return early if there is only the section header.
81        if input.is_empty() {
82            return Ok(Self(Vec::new()));
83        }
84
85        // Consider all following lines as paths.
86        // Optionally consume one following line ending.
87        let paths: Vec<RelativePath> =
88            repeat(0.., terminated(Self::parse_path, alt((line_ending, eof)))).parse_next(input)?;
89
90        // Consume any trailing whitespaces or new lines.
91        multispace0.parse_next(input)?;
92
93        // Fail if there are any further non-whitespace characters.
94        let _opt: Option<&str> =
95            opt(not(eof)
96                .take()
97                .and_then(cut_err(fail).context(StrContext::Expected(
98                    StrContextValue::Description("no further path after newline"),
99                ))))
100            .parse_next(input)?;
101
102        Ok(Self(paths))
103    }
104
105    /// Returns the paths.
106    pub fn paths(self) -> Vec<PathBuf> {
107        self.0.into_iter().map(RelativePath::into_inner).collect()
108    }
109}
110
111/// A collection of paths that are invalid in the context of a [`RepoFilesV1`].
112///
113/// A [`RepoFilesV1`] must not contain duplicate paths or (non top-level) paths that do not have a
114/// parent in the same set of paths.
115#[derive(Clone, Debug, Eq, PartialEq)]
116pub(crate) struct RepoFilesV1PathErrors {
117    pub(crate) absolute: HashSet<PathBuf>,
118    pub(crate) without_parent: HashSet<PathBuf>,
119    pub(crate) duplicate: HashSet<PathBuf>,
120}
121
122impl RepoFilesV1PathErrors {
123    /// Creates a new [`RepoFilesV1PathErrors`].
124    pub(crate) fn new() -> Self {
125        Self {
126            absolute: HashSet::new(),
127            without_parent: HashSet::new(),
128            duplicate: HashSet::new(),
129        }
130    }
131
132    /// Adds a new absolute path.
133    pub(crate) fn add_absolute(&mut self, path: PathBuf) -> bool {
134        self.absolute.insert(path)
135    }
136
137    /// Adds a new (non top-level) path that does not have a parent.
138    pub(crate) fn add_without_parent(&mut self, path: PathBuf) -> bool {
139        self.without_parent.insert(path)
140    }
141
142    /// Adds a new duplicate path.
143    pub(crate) fn add_duplicate(&mut self, path: PathBuf) -> bool {
144        self.duplicate.insert(path)
145    }
146
147    /// Fails if `self` tracks any invalid paths.
148    pub(crate) fn fail(&self) -> Result<(), Error> {
149        if !(self.absolute.is_empty()
150            && self.without_parent.is_empty()
151            && self.duplicate.is_empty())
152        {
153            Err(Error::InvalidFilesPaths {
154                message: self.to_string(),
155            })
156        } else {
157            Ok(())
158        }
159    }
160}
161
162impl Display for RepoFilesV1PathErrors {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        fn write_invalid_set(
165            f: &mut std::fmt::Formatter<'_>,
166            message: String,
167            set: &HashSet<PathBuf>,
168        ) -> std::fmt::Result {
169            if !set.is_empty() {
170                writeln!(f, "{message}:")?;
171                let mut set = set.iter().collect::<Vec<_>>();
172                set.sort();
173                for path in set.iter() {
174                    writeln!(f, "{}", path.as_path().display())?;
175                }
176            }
177            Ok(())
178        }
179
180        write_invalid_set(f, t!("filesv1-path-errors-absolute-paths"), &self.absolute)?;
181        write_invalid_set(
182            f,
183            t!("filesv1-path-errors-paths-without-a-parent"),
184            &self.without_parent,
185        )?;
186        write_invalid_set(
187            f,
188            t!("filesv1-path-errors-duplicate-paths"),
189            &self.duplicate,
190        )?;
191
192        Ok(())
193    }
194}
195
196/// The representation of [alpm-repo-files] data (version 1).
197///
198/// [alpm-repo-files]: https://alpm.archlinux.page/specifications/alpm-repo-files.5.html
199#[derive(Clone, Debug, serde::Serialize)]
200pub struct RepoFilesV1(Vec<PathBuf>);
201
202impl AsRef<[PathBuf]> for RepoFilesV1 {
203    /// Returns a reference to the inner [`Vec`] of [`PathBuf`]s.
204    fn as_ref(&self) -> &[PathBuf] {
205        &self.0
206    }
207}
208
209impl Display for RepoFilesV1 {
210    /// Returns the [`String`] representation of the [`RepoFilesV1`].
211    ///
212    /// # Examples
213    ///
214    /// ```
215    /// use std::path::PathBuf;
216    ///
217    /// use alpm_repo_db::files::RepoFilesV1;
218    ///
219    /// # fn main() -> Result<(), alpm_repo_db::files::Error> {
220    /// // An empty alpm-repo-files.
221    /// let expected = "%FILES%\n";
222    /// let files = RepoFilesV1::try_from(Vec::new())?;
223    /// assert_eq!(files.to_string(), expected);
224    ///
225    /// // An alpm-repo-files with entries.
226    /// let expected = r#"%FILES%
227    /// usr/
228    /// usr/bin/
229    /// usr/bin/foo
230    /// "#;
231    /// let files = RepoFilesV1::try_from(vec![
232    ///     PathBuf::from("usr/"),
233    ///     PathBuf::from("usr/bin/"),
234    ///     PathBuf::from("usr/bin/foo"),
235    /// ])?;
236    /// assert_eq!(files.to_string(), expected);
237    /// # Ok(())
238    /// # }
239    /// ```
240    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241        let mut output = String::new();
242
243        output.push_str(FilesSection::SECTION_KEYWORD);
244        output.push('\n');
245
246        for path in self.0.iter() {
247            output.push_str(&format!("{}", path.to_string_lossy()));
248            output.push('\n');
249        }
250
251        write!(f, "{output}")
252    }
253}
254
255impl FromStr for RepoFilesV1 {
256    type Err = Error;
257
258    /// Creates a new [`RepoFilesV1`] from a string slice.
259    ///
260    /// # Note
261    ///
262    /// Delegates to the [`TryFrom`] [`Vec`] of [`PathBuf`] implementation, after the string slice
263    /// has been parsed as a [`Vec`] of [`PathBuf`].
264    ///
265    /// # Errors
266    ///
267    /// Returns an error, if
268    ///
269    /// - the first line does not contain the section header ("%FILES%"),
270    /// - there are lines following the section header, but they cannot be parsed as a [`Vec`] of
271    ///   [`PathBuf`],
272    /// - or [`Self::try_from`] [`Vec`] of [`PathBuf`] fails.
273    ///
274    /// # Examples
275    ///
276    /// ```
277    /// use std::{path::PathBuf, str::FromStr};
278    ///
279    /// use alpm_repo_db::files::RepoFilesV1;
280    ///
281    /// # fn main() -> Result<(), alpm_repo_db::files::Error> {
282    /// // The section header is required; empty input is invalid.
283    /// let data = "";
284    /// assert!(RepoFilesV1::from_str(data).is_err());
285    ///
286    /// # let expected: Vec<PathBuf> = Vec::new();
287    /// // No files according to alpm-repo-files.
288    /// let data = "%FILES%";
289    /// let files = RepoFilesV1::from_str(data)?;
290    /// # assert_eq!(files.as_ref(), expected);
291    /// let data = "%FILES%\n";
292    /// let files = RepoFilesV1::from_str(data)?;
293    /// # assert_eq!(files.as_ref(), expected);
294    ///
295    /// # let expected: Vec<PathBuf> = vec![
296    /// #     PathBuf::from("usr/"),
297    /// #     PathBuf::from("usr/bin/"),
298    /// #     PathBuf::from("usr/bin/foo"),
299    /// # ];
300    /// // Files according to alpm-repo-files.
301    /// let data = r#"%FILES%
302    /// usr/
303    /// usr/bin/
304    /// usr/bin/foo"#;
305    /// let files = RepoFilesV1::from_str(data)?;
306    /// # assert_eq!(files.as_ref(), expected);
307    ///
308    /// // Files according to alpm-repo-files.
309    /// let data = r#"%FILES%
310    /// usr/
311    /// usr/bin/
312    /// usr/bin/foo
313    /// "#;
314    /// let files = RepoFilesV1::from_str(data)?;
315    /// # assert_eq!(files.as_ref(), expected.as_slice());
316    /// # Ok(())
317    /// # }
318    /// ```
319    fn from_str(s: &str) -> Result<Self, Self::Err> {
320        let files_section = FilesSection::parser.parse(s)?;
321        RepoFilesV1::try_from(files_section.paths())
322    }
323}
324
325impl TryFrom<PathBuf> for RepoFilesV1 {
326    type Error = Error;
327
328    /// Creates a new [`RepoFilesV1`] from all files and directories in a directory.
329    ///
330    /// # Note
331    ///
332    /// Delegates to [`alpm_common::relative_files`] to get a sorted list of all files and
333    /// directories in the directory `value` (relative to `value`).
334    /// Afterwards, tries to construct a [`RepoFilesV1`] from this list.
335    ///
336    /// # Errors
337    ///
338    /// Returns an error if
339    ///
340    /// - [`alpm_common::relative_files`] fails,
341    /// - or [`TryFrom`] [`Vec`] of [`PathBuf`] for [`RepoFilesV1`] fails.
342    ///
343    /// # Examples
344    ///
345    /// ```
346    /// use std::{
347    ///     fs::{File, create_dir_all},
348    ///     path::PathBuf,
349    /// };
350    ///
351    /// use alpm_repo_db::files::RepoFilesV1;
352    /// use tempfile::tempdir;
353    ///
354    /// # fn main() -> testresult::TestResult {
355    /// let temp_dir = tempdir()?;
356    /// let path = temp_dir.path();
357    /// create_dir_all(path.join("usr/bin/"))?;
358    /// File::create(path.join("usr/bin/foo"))?;
359    ///
360    /// let files = RepoFilesV1::try_from(path.to_path_buf())?;
361    /// assert_eq!(
362    ///     files.as_ref(),
363    ///     vec![
364    ///         PathBuf::from("usr/"),
365    ///         PathBuf::from("usr/bin/"),
366    ///         PathBuf::from("usr/bin/foo")
367    ///     ]
368    /// );
369    /// # Ok(())
370    /// # }
371    /// ```
372    fn try_from(value: PathBuf) -> Result<Self, Self::Error> {
373        RepoFilesV1::try_from(relative_files(value, &[])?)
374    }
375}
376
377impl TryFrom<Vec<PathBuf>> for RepoFilesV1 {
378    type Error = Error;
379
380    /// Creates a new [`RepoFilesV1`] from a [`Vec`] of [`PathBuf`].
381    ///
382    /// The provided `value` is sorted and checked for non top-level paths without a parent, as well
383    /// as any duplicate paths.
384    ///
385    /// # Errors
386    ///
387    /// Returns an error if
388    ///
389    /// - `value` contains absolute paths,
390    /// - `value` contains (non top-level) paths without a parent directory present in `value`,
391    /// - or `value` contains duplicate paths.
392    ///
393    /// # Examples
394    ///
395    /// ```
396    /// use std::path::PathBuf;
397    ///
398    /// use alpm_repo_db::files::RepoFilesV1;
399    ///
400    /// # fn main() -> Result<(), alpm_repo_db::files::Error> {
401    /// let paths: Vec<PathBuf> = vec![
402    ///     PathBuf::from("usr/"),
403    ///     PathBuf::from("usr/bin/"),
404    ///     PathBuf::from("usr/bin/foo"),
405    /// ];
406    /// let files = RepoFilesV1::try_from(paths)?;
407    ///
408    /// // Absolute paths are not allowed.
409    /// let paths: Vec<PathBuf> = vec![
410    ///     PathBuf::from("/usr/"),
411    ///     PathBuf::from("/usr/bin/"),
412    ///     PathBuf::from("/usr/bin/foo"),
413    /// ];
414    /// assert!(RepoFilesV1::try_from(paths).is_err());
415    ///
416    /// // Every path (excluding top-level paths) must have a parent.
417    /// let paths: Vec<PathBuf> = vec![PathBuf::from("usr/bin/"), PathBuf::from("usr/bin/foo")];
418    /// assert!(RepoFilesV1::try_from(paths).is_err());
419    ///
420    /// // Every path must be unique.
421    /// let paths: Vec<PathBuf> = vec![
422    ///     PathBuf::from("usr/"),
423    ///     PathBuf::from("usr/"),
424    ///     PathBuf::from("usr/bin/"),
425    ///     PathBuf::from("usr/bin/foo"),
426    /// ];
427    /// assert!(RepoFilesV1::try_from(paths).is_err());
428    /// # Ok(())
429    /// # }
430    /// ```
431    fn try_from(value: Vec<PathBuf>) -> Result<Self, Self::Error> {
432        let mut paths = value;
433        paths.sort_unstable();
434
435        let mut errors = RepoFilesV1PathErrors::new();
436        let mut path_set = HashSet::new();
437        let empty_parent = PathBuf::from("");
438        let root_parent = PathBuf::from("/");
439
440        for path in paths.iter() {
441            let path = path.as_path();
442
443            // Add absolute paths as errors.
444            if path.is_absolute() {
445                errors.add_absolute(path.to_path_buf());
446            }
447
448            // Add non top-level, relative paths without a parent as errors.
449            if let Some(parent) = path.parent() {
450                if parent != empty_parent && parent != root_parent && !path_set.contains(parent) {
451                    errors.add_without_parent(path.to_path_buf());
452                }
453            }
454
455            // Add duplicates as errors.
456            if !path_set.insert(path) {
457                errors.add_duplicate(path.to_path_buf());
458            }
459        }
460
461        errors.fail()?;
462
463        Ok(Self(paths))
464    }
465}
466
467#[cfg(test)]
468mod tests {
469    use std::{
470        fs::{File, create_dir_all},
471        str::FromStr,
472    };
473
474    use rstest::rstest;
475    use tempfile::tempdir;
476    use testresult::TestResult;
477
478    use super::*;
479
480    /// Ensures that a [`RepoFilesV1`] can be successfully created from a directory.
481    #[test]
482    fn filesv1_try_from_pathbuf_succeeds() -> TestResult {
483        let temp_dir = tempdir()?;
484        let path = temp_dir.path();
485        create_dir_all(path.join("usr/bin/"))?;
486        File::create(path.join("usr/bin/foo"))?;
487
488        let files = RepoFilesV1::try_from(path.to_path_buf())?;
489
490        assert_eq!(
491            files.as_ref(),
492            vec![
493                PathBuf::from("usr/"),
494                PathBuf::from("usr/bin/"),
495                PathBuf::from("usr/bin/foo")
496            ]
497        );
498
499        Ok(())
500    }
501
502    #[rstest]
503    #[case::dirs_and_files(vec![PathBuf::from("usr/"), PathBuf::from("usr/bin/"), PathBuf::from("usr/bin/foo")], 3)]
504    #[case::empty(Vec::new(), 0)]
505    fn filesv1_try_from_pathbufs_succeeds(
506        #[case] paths: Vec<PathBuf>,
507        #[case] len: usize,
508    ) -> TestResult {
509        let files = RepoFilesV1::try_from(paths)?;
510
511        assert_eq!(files.as_ref().len(), len);
512
513        Ok(())
514    }
515
516    /// Ensures that missing section headers result in parse errors.
517    #[test]
518    fn filesv1_from_str_fails_without_header() {
519        let result = RepoFilesV1::from_str("");
520
521        assert!(matches!(result, Err(Error::ParseError(_))));
522    }
523
524    #[rstest]
525    #[case::absolute_paths(
526        vec![
527            PathBuf::from("/usr/"), PathBuf::from("/usr/bin/"), PathBuf::from("/usr/bin/foo")
528        ],
529        RepoFilesV1PathErrors{
530            absolute: HashSet::from_iter([
531                PathBuf::from("/usr/"),
532                PathBuf::from("/usr/bin/"),
533                PathBuf::from("/usr/bin/foo"),
534            ]),
535            without_parent: HashSet::new(),
536            duplicate: HashSet::new(),
537        }
538    )]
539    #[case::without_parents(
540        vec![PathBuf::from("usr/bin/"), PathBuf::from("usr/bin/foo")],
541        RepoFilesV1PathErrors{
542            absolute: HashSet::new(),
543            without_parent: HashSet::from_iter([
544                PathBuf::from("usr/bin/"),
545            ]),
546            duplicate: HashSet::new(),
547        }
548    )]
549    #[case::duplicates(
550        vec![PathBuf::from("usr/"), PathBuf::from("usr/")],
551        RepoFilesV1PathErrors{
552            absolute: HashSet::new(),
553            without_parent: HashSet::new(),
554            duplicate: HashSet::from_iter([
555                PathBuf::from("usr/"),
556            ]),
557        }
558    )]
559    fn filesv1_try_from_pathbufs_fails(
560        #[case] paths: Vec<PathBuf>,
561        #[case] expected_errors: RepoFilesV1PathErrors,
562    ) -> TestResult {
563        let result = RepoFilesV1::try_from(paths);
564        let errors = match result {
565            Ok(files) => panic!(
566                "Should have failed with an Error::InvalidFilesPaths, but succeeded to create a RepoFilesV1: {files:?}"
567            ),
568            Err(Error::InvalidFilesPaths { message }) => message,
569            Err(error) => panic!("Expected an Error::InvalidFilesPaths, but got: {error}"),
570        };
571
572        eprintln!("{errors}");
573        assert_eq!(errors, expected_errors.to_string());
574
575        Ok(())
576    }
577
578    #[test]
579    fn filesv1_from_str_rejects_absolute_paths() -> TestResult {
580        let data = "%FILES%\n/usr/bin/foo\n";
581
582        match RepoFilesV1::from_str(data) {
583            Err(Error::ParseError(_)) => Ok(()),
584            Err(error) => panic!("expected ParseError, got {error}"),
585            Ok(files) => panic!("expected parse failure, got {files:?}"),
586        }
587    }
588}