alpm_compress/tarball/
reader.rs

1//! Reading tarballs.
2
3use std::{
4    fmt,
5    fmt::Debug,
6    fs::File,
7    io::Read,
8    path::{Path, PathBuf},
9};
10
11use tar::{Archive, Entries, Entry, EntryType};
12
13use crate::{
14    Error,
15    decompression::{CompressionDecoder, DecompressionSettings},
16};
17
18/// A generic tarball reader that can be used to read both compressed tarballs
19/// `.tar.*` and uncompressed tar archives `.tar`.
20///
21/// Can be created from a [`Path`] or [`PathBuf`],
22/// which will automatically detect the optional compression algorithm based on the file extension.
23///
24/// # Note
25///
26/// The lifetime `'c` is for [`CompressionDecoder`] of the underlying [`Archive`].
27pub struct TarballReader<'c> {
28    archive: Archive<CompressionDecoder<'c>>,
29}
30
31impl Debug for TarballReader<'_> {
32    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33        f.debug_struct("TarballReader")
34            .field("archive", &"Archive<CompressionDecoder>")
35            .finish()
36    }
37}
38
39impl<'c> TarballReader<'c> {
40    /// Creates a new [`TarballReader`] that reads from a [`CompressionDecoder`].
41    pub fn new(decoder: CompressionDecoder<'c>) -> Self {
42        Self {
43            archive: Archive::new(decoder),
44        }
45    }
46
47    /// Returns an iterator over the entries in the tarball.
48    ///
49    /// # Errors
50    ///
51    /// Returns an error if [`Archive::entries`] fails.
52    pub fn entries<'a>(&'a mut self) -> Result<TarballEntries<'a, 'c>, Error> {
53        let raw_entries = self.archive.entries().map_err(|source| Error::IoRead {
54            context: "reading archive entries",
55            source,
56        })?;
57        Ok(raw_entries.into())
58    }
59
60    /// Reads a specific [`TarballEntry`] by its path.
61    ///
62    /// Returns [`None`] if the entry does not exist.
63    ///
64    /// # Errors
65    ///
66    /// Returns an error if [`Self::entries`] or reading an entry fails.
67    pub fn read_entry<'a, P: AsRef<Path>>(
68        &'a mut self,
69        path: P,
70    ) -> Result<Option<TarballEntry<'a, 'c>>, Error> {
71        for entry in self.entries()? {
72            let entry = entry?;
73            if entry.path() == path.as_ref() {
74                return Ok(Some(entry));
75            }
76        }
77        Ok(None)
78    }
79}
80
81impl TryFrom<&Path> for TarballReader<'_> {
82    type Error = Error;
83
84    /// Creates a new [`TarballReader`] from a [`Path`].
85    ///
86    /// # Errors
87    ///
88    /// Returns an error if
89    ///
90    /// - the file at `path` cannot be opened for reading,
91    /// - the file extension is neither a `.tar` nor an extension of a supported compression
92    ///   algorithm,
93    /// - a [`CompressionDecoder`] cannot be created from the file and [`DecompressionSettings`].
94    fn try_from(path: &Path) -> Result<Self, Self::Error> {
95        let file = File::open(path).map_err(|source| Error::IoRead {
96            context: "opening archive for reading",
97            source,
98        })?;
99        let settings = match DecompressionSettings::try_from(path) {
100            Ok(settings) => settings,
101            Err(err) => {
102                // Check if it's a plain .tar archive without compression.
103                if let Some(extension) = path.extension()
104                    && extension.eq_ignore_ascii_case("tar")
105                {
106                    DecompressionSettings::None
107                } else {
108                    return Err(err);
109                }
110            }
111        };
112        let decoder = CompressionDecoder::new(file, settings)?;
113        Ok(Self::new(decoder))
114    }
115}
116
117impl TryFrom<PathBuf> for TarballReader<'_> {
118    type Error = Error;
119
120    /// Creates a [`TarballReader`] from [`PathBuf`].
121    ///
122    /// Delegates to the [`TryFrom`] implementation for [`Path`].
123    ///
124    /// # Errors
125    ///
126    /// Returns an error if the [`TryFrom`] implementation for [`Path`] fails.
127    fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
128        Self::try_from(path.as_path())
129    }
130}
131
132/// An entry in a tarball.
133pub struct TarballEntry<'a, 'c> {
134    /// The path of the entry in the archive.
135    path: PathBuf,
136    /// The raw tar entry.
137    entry: Entry<'a, CompressionDecoder<'c>>,
138}
139
140impl Debug for TarballEntry<'_, '_> {
141    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
142        f.debug_struct("TarballEntry")
143            .field("path", &self.path)
144            .field("entry", &"tar::Entry<CompressionDecoder>")
145            .finish()
146    }
147}
148
149impl<'a, 'c> TarballEntry<'a, 'c> {
150    /// Returns the path of the entry in the archive.
151    pub fn path(&self) -> &Path {
152        &self.path
153    }
154
155    /// Returns the content of the entry.
156    ///
157    /// # Errors
158    ///
159    /// Returns an error if [`Entry::read_to_end`] fails.
160    pub fn content(&mut self) -> Result<Vec<u8>, Error> {
161        let mut buffer = Vec::new();
162        self.entry
163            .read_to_end(&mut buffer)
164            .map_err(|source| crate::Error::IoRead {
165                context: "reading archive entry content",
166                source,
167            })?;
168        Ok(buffer)
169    }
170
171    /// Checks whether the [`TarballEntry`] represents a directory.
172    ///
173    /// Returns `true` if the [`TarballEntry`] represents a directory, `false` otherwise.
174    ///
175    /// # Note
176    ///
177    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
178    /// contained in the [`TarballEntry`] with [`EntryType::Directory`].
179    pub fn is_dir(&self) -> bool {
180        self.entry.header().entry_type() == EntryType::Directory
181    }
182
183    /// Checks whether the [`TarballEntry`] represents a regular file.
184    ///
185    /// Returns `true` if the [`TarballEntry`] represents a regular file, `false` otherwise.
186    ///
187    /// # Note
188    ///
189    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
190    /// contained in the [`TarballEntry`] with [`EntryType::Regular`].
191    pub fn is_file(&self) -> bool {
192        self.entry.header().entry_type() == EntryType::Regular
193    }
194
195    /// Checks whether the [`TarballEntry`] represents a symlink.
196    ///
197    /// Returns `true` if the [`TarballEntry`] represents a symlink, `false` otherwise.
198    ///
199    /// # Note
200    ///
201    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
202    /// contained in the [`TarballEntry`] with [`EntryType::Symlink`].
203    pub fn is_symlink(&self) -> bool {
204        self.entry.header().entry_type() == EntryType::Symlink
205    }
206
207    /// Returns the access permissions that apply for the [`TarballEntry`].
208    ///
209    /// # Notes
210    ///
211    /// - This is a convenience method for retrieving the mode of the [`Entry::header`] contained in
212    ///   the [`TarballEntry`].
213    /// - It returns the mode masked with `0o7777` to ensure only the permission bits are returned.
214    ///
215    /// # Errors
216    ///
217    /// Returns an error if retrieving the mode from the entry's header fails.
218    pub fn permissions(&self) -> Result<u32, Error> {
219        Ok(self.entry.header().mode().map_err(|source| Error::IoRead {
220            context: "retrieving permissions of archive entry",
221            source,
222        })? & 0o7777)
223    }
224
225    /// Returns a reference to the underlying tar [`Entry`].
226    ///
227    /// This is useful for accessing metadata of the entry, such as its header or path.
228    pub fn raw(&self) -> &Entry<'a, CompressionDecoder<'c>> {
229        &self.entry
230    }
231}
232
233impl Read for TarballEntry<'_, '_> {
234    /// Reads data from the entry into the provided buffer.
235    ///
236    /// Delegates to [`Entry::read`].
237    ///
238    /// # Errors
239    ///
240    /// Returns an error if reading from the entry fails.
241    fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
242        self.entry.read(buf)
243    }
244}
245
246/// An iterator over the entries in a tarball.
247///
248/// # Notes
249///
250/// Uses two lifetimes for the `inner` field:
251/// - `'a` for the internal reference of the [`Archive`] in [`Entries::fields`] (of type
252///   [`Entries`]).
253/// - `'c` for the [`CompressionDecoder`]
254pub struct TarballEntries<'a, 'c> {
255    inner: Entries<'a, CompressionDecoder<'c>>,
256}
257
258impl Debug for TarballEntries<'_, '_> {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        f.debug_struct("TarballEntries")
261            .field("inner", &"Entries<CompressionDecoder>")
262            .finish()
263    }
264}
265
266impl<'a, 'c> Iterator for TarballEntries<'a, 'c> {
267    type Item = Result<TarballEntry<'a, 'c>, Error>;
268
269    fn next(&mut self) -> Option<Self::Item> {
270        self.inner.next().map(|entry| {
271            let entry = entry.map_err(|source| Error::IoRead {
272                context: "reading archive entry",
273                source,
274            })?;
275
276            let path = entry
277                .path()
278                .map_err(|source| Error::IoRead {
279                    context: "retrieving path of archive entry",
280                    source,
281                })?
282                .to_path_buf();
283
284            Ok(TarballEntry { path, entry })
285        })
286    }
287}
288
289impl<'a, 'c> From<Entries<'a, CompressionDecoder<'c>>> for TarballEntries<'a, 'c> {
290    fn from(inner: Entries<'a, CompressionDecoder<'c>>) -> Self {
291        Self { inner }
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use std::io::Write;
298
299    use rstest::{fixture, rstest};
300    use tempfile::NamedTempFile;
301    use testresult::TestResult;
302
303    use super::*;
304    use crate::{
305        compression::{
306            Bzip2CompressionLevel,
307            CompressionSettings,
308            GzipCompressionLevel,
309            XzCompressionLevel,
310            ZstdCompressionLevel,
311            ZstdThreads,
312        },
313        tarball::builder::TarballBuilder,
314    };
315
316    /// Fixture creating a temporary file with some content.
317    /// Returns the file and its content for verification.
318    #[fixture]
319    fn test_data() -> TestResult<(NamedTempFile, [u8; 9])> {
320        const TEST_CONTENT: &[u8; 9] = b"alpm4ever";
321        let file = NamedTempFile::new()?;
322        {
323            let mut f = file.reopen()?;
324            f.write_all(TEST_CONTENT)?;
325            f.flush()?;
326        }
327        Ok((file, *TEST_CONTENT))
328    }
329
330    #[rstest]
331    #[case::bzip2(".tar.bz2", CompressionSettings::Bzip2 {
332        compression_level: Bzip2CompressionLevel::default()
333    })]
334    #[case::gzip(".tar.gz", CompressionSettings::Gzip {
335        compression_level: GzipCompressionLevel::default()
336    })]
337    #[case::xz(".tar.xz", CompressionSettings::Xz {
338        compression_level: XzCompressionLevel::default()
339    })]
340    #[case::zstd(".tar.zst", CompressionSettings::Zstd {
341        compression_level: ZstdCompressionLevel::default(),
342        threads: ZstdThreads::new(0),
343    })]
344    #[case::no_compression(".tar", CompressionSettings::None)]
345    fn test_tarball_reader_roundtrip_read_entry(
346        #[case] extension: String,
347        #[case] compression_settings: CompressionSettings,
348        test_data: TestResult<(NamedTempFile, [u8; 9])>,
349    ) -> TestResult {
350        let (test_file, test_file_content) = test_data?;
351        let test_file_path = test_file.path();
352        let test_file_name = test_file_path.file_name().unwrap();
353
354        // Prepare archive containing the test_file
355        let archive = NamedTempFile::with_suffix(extension)?;
356        {
357            let file = archive.reopen()?;
358            let mut builder = TarballBuilder::new(file, &compression_settings)?;
359            builder
360                .inner_mut()
361                .append_path_with_name(test_file_path, test_file_name)?;
362            builder.finish()?;
363        }
364
365        // Read the archive
366        let mut reader = TarballReader::try_from(archive.path())?;
367        let entry = reader.read_entry(test_file_name)?;
368
369        assert!(entry.is_some());
370        let mut entry = entry.unwrap();
371        let content = entry.content()?;
372
373        // Check data integrity
374        assert_eq!(content, test_file_content);
375        Ok(())
376    }
377}