alpm_compress/tarball/
reader.rs

1//! Reading tarballs.
2
3use std::{
4    fmt,
5    fmt::Debug,
6    fs::File,
7    io::Read,
8    path::{Path, PathBuf},
9};
10
11use fluent_i18n::t;
12use tar::{Archive, Entries, Entry, EntryType};
13
14use crate::{
15    Error,
16    decompression::{CompressionDecoder, DecompressionSettings},
17};
18
19/// A generic tarball reader that can be used to read both compressed tarballs
20/// `.tar.*` and uncompressed tar archives `.tar`.
21///
22/// Can be created from a [`Path`] or [`PathBuf`],
23/// which will automatically detect the optional compression algorithm based on the file extension.
24///
25/// # Note
26///
27/// The lifetime `'c` is for [`CompressionDecoder`] of the underlying [`Archive`].
28pub struct TarballReader<'c> {
29    archive: Archive<CompressionDecoder<'c>>,
30}
31
32impl Debug for TarballReader<'_> {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        f.debug_struct("TarballReader")
35            .field("archive", &"Archive<CompressionDecoder>")
36            .finish()
37    }
38}
39
40impl<'c> TarballReader<'c> {
41    /// Creates a new [`TarballReader`] that reads from a [`CompressionDecoder`].
42    pub fn new(decoder: CompressionDecoder<'c>) -> Self {
43        Self {
44            archive: Archive::new(decoder),
45        }
46    }
47
48    /// Returns an iterator over the entries in the tarball.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if [`Archive::entries`] fails.
53    pub fn entries<'a>(&'a mut self) -> Result<TarballEntries<'a, 'c>, Error> {
54        let raw_entries = self.archive.entries().map_err(|source| Error::IoRead {
55            context: t!("error-io-read-archive-entries"),
56            source,
57        })?;
58        Ok(raw_entries.into())
59    }
60
61    /// Reads a specific [`TarballEntry`] by its path.
62    ///
63    /// Returns [`None`] if the entry does not exist.
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if [`Self::entries`] or reading an entry fails.
68    pub fn read_entry<'a, P: AsRef<Path>>(
69        &'a mut self,
70        path: P,
71    ) -> Result<Option<TarballEntry<'a, 'c>>, Error> {
72        for entry in self.entries()? {
73            let entry = entry?;
74            if entry.path() == path.as_ref() {
75                return Ok(Some(entry));
76            }
77        }
78        Ok(None)
79    }
80}
81
82impl TryFrom<&Path> for TarballReader<'_> {
83    type Error = Error;
84
85    /// Creates a new [`TarballReader`] from a [`Path`].
86    ///
87    /// # Errors
88    ///
89    /// Returns an error if
90    ///
91    /// - the file at `path` cannot be opened for reading,
92    /// - the file extension is neither a `.tar` nor an extension of a supported compression
93    ///   algorithm,
94    /// - a [`CompressionDecoder`] cannot be created from the file and [`DecompressionSettings`].
95    fn try_from(path: &Path) -> Result<Self, Self::Error> {
96        let file = File::open(path).map_err(|source| Error::IoRead {
97            context: t!("error-io-open-archive"),
98            source,
99        })?;
100        let settings = match DecompressionSettings::try_from(path) {
101            Ok(settings) => settings,
102            Err(err) => {
103                // Check if it's a plain .tar archive without compression.
104                if let Some(extension) = path.extension()
105                    && extension.eq_ignore_ascii_case("tar")
106                {
107                    DecompressionSettings::None
108                } else {
109                    return Err(err);
110                }
111            }
112        };
113        let decoder = CompressionDecoder::new(file, settings)?;
114        Ok(Self::new(decoder))
115    }
116}
117
118impl TryFrom<PathBuf> for TarballReader<'_> {
119    type Error = Error;
120
121    /// Creates a [`TarballReader`] from [`PathBuf`].
122    ///
123    /// Delegates to the [`TryFrom`] implementation for [`Path`].
124    ///
125    /// # Errors
126    ///
127    /// Returns an error if the [`TryFrom`] implementation for [`Path`] fails.
128    fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
129        Self::try_from(path.as_path())
130    }
131}
132
133/// An entry in a tarball.
134pub struct TarballEntry<'a, 'c> {
135    /// The path of the entry in the archive.
136    path: PathBuf,
137    /// The raw tar entry.
138    entry: Entry<'a, CompressionDecoder<'c>>,
139}
140
141impl Debug for TarballEntry<'_, '_> {
142    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143        f.debug_struct("TarballEntry")
144            .field("path", &self.path)
145            .field("entry", &"tar::Entry<CompressionDecoder>")
146            .finish()
147    }
148}
149
150impl<'a, 'c> TarballEntry<'a, 'c> {
151    /// Returns the path of the entry in the archive.
152    pub fn path(&self) -> &Path {
153        &self.path
154    }
155
156    /// Returns the content of the entry.
157    ///
158    /// # Errors
159    ///
160    /// Returns an error if [`Entry::read_to_end`] fails.
161    pub fn content(&mut self) -> Result<Vec<u8>, Error> {
162        let mut buffer = Vec::new();
163        self.entry
164            .read_to_end(&mut buffer)
165            .map_err(|source| crate::Error::IoRead {
166                context: t!("error-io-read-archive-entry-content"),
167                source,
168            })?;
169        Ok(buffer)
170    }
171
172    /// Checks whether the [`TarballEntry`] represents a directory.
173    ///
174    /// Returns `true` if the [`TarballEntry`] represents a directory, `false` otherwise.
175    ///
176    /// # Note
177    ///
178    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
179    /// contained in the [`TarballEntry`] with [`EntryType::Directory`].
180    pub fn is_dir(&self) -> bool {
181        self.entry.header().entry_type() == EntryType::Directory
182    }
183
184    /// Checks whether the [`TarballEntry`] represents a regular file.
185    ///
186    /// Returns `true` if the [`TarballEntry`] represents a regular file, `false` otherwise.
187    ///
188    /// # Note
189    ///
190    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
191    /// contained in the [`TarballEntry`] with [`EntryType::Regular`].
192    pub fn is_file(&self) -> bool {
193        self.entry.header().entry_type() == EntryType::Regular
194    }
195
196    /// Checks whether the [`TarballEntry`] represents a symlink.
197    ///
198    /// Returns `true` if the [`TarballEntry`] represents a symlink, `false` otherwise.
199    ///
200    /// # Note
201    ///
202    /// This is a convenience method for comparing the [`EntryType`] of the [`Entry::header`]
203    /// contained in the [`TarballEntry`] with [`EntryType::Symlink`].
204    pub fn is_symlink(&self) -> bool {
205        self.entry.header().entry_type() == EntryType::Symlink
206    }
207
208    /// Returns the access permissions that apply for the [`TarballEntry`].
209    ///
210    /// # Notes
211    ///
212    /// - This is a convenience method for retrieving the mode of the [`Entry::header`] contained in
213    ///   the [`TarballEntry`].
214    /// - It returns the mode masked with `0o7777` to ensure only the permission bits are returned.
215    ///
216    /// # Errors
217    ///
218    /// Returns an error if retrieving the mode from the entry's header fails.
219    pub fn permissions(&self) -> Result<u32, Error> {
220        Ok(self.entry.header().mode().map_err(|source| Error::IoRead {
221            context: t!("error-io-read-archive-entry-mode"),
222            source,
223        })? & 0o7777)
224    }
225
226    /// Returns a reference to the underlying tar [`Entry`].
227    ///
228    /// This is useful for accessing metadata of the entry, such as its header or path.
229    pub fn raw(&self) -> &Entry<'a, CompressionDecoder<'c>> {
230        &self.entry
231    }
232}
233
234impl Read for TarballEntry<'_, '_> {
235    /// Reads data from the entry into the provided buffer.
236    ///
237    /// Delegates to [`Entry::read`].
238    ///
239    /// # Errors
240    ///
241    /// Returns an error if reading from the entry fails.
242    fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
243        self.entry.read(buf)
244    }
245}
246
247/// An iterator over the entries in a tarball.
248///
249/// # Notes
250///
251/// Uses two lifetimes for the `inner` field:
252/// - `'a` for the internal reference of the [`Archive`] in [`Entries::fields`] (of type
253///   [`Entries`]).
254/// - `'c` for the [`CompressionDecoder`]
255pub struct TarballEntries<'a, 'c> {
256    inner: Entries<'a, CompressionDecoder<'c>>,
257}
258
259impl Debug for TarballEntries<'_, '_> {
260    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
261        f.debug_struct("TarballEntries")
262            .field("inner", &"Entries<CompressionDecoder>")
263            .finish()
264    }
265}
266
267impl<'a, 'c> Iterator for TarballEntries<'a, 'c> {
268    type Item = Result<TarballEntry<'a, 'c>, Error>;
269
270    fn next(&mut self) -> Option<Self::Item> {
271        self.inner.next().map(|entry| {
272            let entry = entry.map_err(|source| Error::IoRead {
273                context: t!("error-io-read-archive-entry"),
274                source,
275            })?;
276
277            let path = entry
278                .path()
279                .map_err(|source| Error::IoRead {
280                    context: t!("error-io-read-archive-entry-path"),
281                    source,
282                })?
283                .to_path_buf();
284
285            Ok(TarballEntry { path, entry })
286        })
287    }
288}
289
290impl<'a, 'c> From<Entries<'a, CompressionDecoder<'c>>> for TarballEntries<'a, 'c> {
291    fn from(inner: Entries<'a, CompressionDecoder<'c>>) -> Self {
292        Self { inner }
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use std::io::Write;
299
300    use rstest::{fixture, rstest};
301    use tempfile::NamedTempFile;
302    use testresult::TestResult;
303
304    use super::*;
305    use crate::{
306        compression::{
307            Bzip2CompressionLevel,
308            CompressionSettings,
309            GzipCompressionLevel,
310            XzCompressionLevel,
311            ZstdCompressionLevel,
312            ZstdThreads,
313        },
314        tarball::builder::TarballBuilder,
315    };
316
317    /// Fixture creating a temporary file with some content.
318    /// Returns the file and its content for verification.
319    #[fixture]
320    fn test_data() -> TestResult<(NamedTempFile, [u8; 9])> {
321        const TEST_CONTENT: &[u8; 9] = b"alpm4ever";
322        let file = NamedTempFile::new()?;
323        {
324            let mut f = file.reopen()?;
325            f.write_all(TEST_CONTENT)?;
326            f.flush()?;
327        }
328        Ok((file, *TEST_CONTENT))
329    }
330
331    #[rstest]
332    #[case::bzip2(".tar.bz2", CompressionSettings::Bzip2 {
333        compression_level: Bzip2CompressionLevel::default()
334    })]
335    #[case::gzip(".tar.gz", CompressionSettings::Gzip {
336        compression_level: GzipCompressionLevel::default()
337    })]
338    #[case::xz(".tar.xz", CompressionSettings::Xz {
339        compression_level: XzCompressionLevel::default()
340    })]
341    #[case::zstd(".tar.zst", CompressionSettings::Zstd {
342        compression_level: ZstdCompressionLevel::default(),
343        threads: ZstdThreads::new(0),
344    })]
345    #[case::no_compression(".tar", CompressionSettings::None)]
346    fn test_tarball_reader_roundtrip_read_entry(
347        #[case] extension: String,
348        #[case] compression_settings: CompressionSettings,
349        test_data: TestResult<(NamedTempFile, [u8; 9])>,
350    ) -> TestResult {
351        let (test_file, test_file_content) = test_data?;
352        let test_file_path = test_file.path();
353        let test_file_name = test_file_path.file_name().unwrap();
354
355        // Prepare archive containing the test_file
356        let archive = NamedTempFile::with_suffix(extension)?;
357        {
358            let file = archive.reopen()?;
359            let mut builder = TarballBuilder::new(file, &compression_settings)?;
360            builder
361                .inner_mut()
362                .append_path_with_name(test_file_path, test_file_name)?;
363            builder.finish()?;
364        }
365
366        // Read the archive
367        let mut reader = TarballReader::try_from(archive.path())?;
368        let entry = reader.read_entry(test_file_name)?;
369
370        assert!(entry.is_some());
371        let mut entry = entry.unwrap();
372        let content = entry.content()?;
373
374        // Check data integrity
375        assert_eq!(content, test_file_content);
376        Ok(())
377    }
378}