dev_scripts/sync/mirror/
mod.rs

1//! All logic for downloading data from an Arch Linux package mirror.
2//!
3//! This includes the database files or packages.
4
5mod rsync_changes;
6
7use std::{
8    collections::HashSet,
9    fs::{create_dir_all, read_dir, remove_dir_all},
10    path::{Path, PathBuf},
11    process::Command,
12    str::FromStr,
13};
14
15use alpm_types::{INSTALL_SCRIPTLET_FILE_NAME, MetadataFileName, PackageFileName};
16use log::{debug, info, trace};
17use rayon::iter::{IntoParallelIterator, ParallelIterator};
18
19use super::{PackageRepositories, filenames_in_dir};
20use crate::{
21    CacheDir,
22    Error,
23    cmd::ensure_success,
24    consts::{DATABASES_DIR, DOWNLOAD_DIR, PACKAGES_DIR},
25    sync::mirror::rsync_changes::Report,
26    ui::get_progress_bar,
27};
28
29/// The entry point for downloading any data from package mirrors.
30#[derive(Clone, Debug)]
31pub struct MirrorDownloader {
32    /// The destination folder into which files should be downloaded.
33    pub cache_dir: CacheDir,
34    /// The mirror url from which files will be downloaded.
35    pub mirror: String,
36    /// The repositories that should be downloaded.
37    pub repositories: Vec<PackageRepositories>,
38    /// Whether to extract all packages (regardless of changes).
39    pub extract_all: bool,
40}
41
42impl MirrorDownloader {
43    /// Download all official repository file databases and unpack them.
44    /// They contain the following files:
45    ///
46    /// - `desc`
47    /// - `files`
48    pub fn sync_remote_databases(&self) -> Result<(), Error> {
49        let download_dir = self
50            .cache_dir
51            .as_ref()
52            .join(DOWNLOAD_DIR)
53            .join(DATABASES_DIR);
54        let target_dir = self.cache_dir.as_ref().join(DATABASES_DIR);
55
56        create_dir_all(&download_dir).map_err(|source| Error::IoPath {
57            path: download_dir.clone(),
58            context: "recursively creating the directory".to_string(),
59            source,
60        })?;
61
62        create_dir_all(&target_dir).map_err(|source| Error::IoPath {
63            path: target_dir.clone(),
64            context: "recursively creating the directory".to_string(),
65            source,
66        })?;
67
68        for repo in self.repositories.iter() {
69            let name = repo.to_string();
70            info!("Downloading database for repository {name}");
71
72            let filename = format!("{name}.files");
73            let file_source = format!("rsync://{}/{name}/os/x86_64/{filename}", self.mirror);
74
75            let download_dest = download_dir.join(filename);
76
77            // Download the db from the mirror
78            let mut db_sync_command = Command::new("rsync");
79            db_sync_command
80                .args([
81                    "--recursive",
82                    "--perms",
83                    "--times",
84                    // Report changes status
85                    "--itemize-changes",
86                    // Copy files instead of symlinks
87                    // Symlinks may point to files up the tree of where we're looking at,
88                    // which is why normal symlinks would be invalid.
89                    "--copy-links",
90                ])
91                .arg(file_source)
92                .arg(&download_dest);
93
94            trace!("Running command: {db_sync_command:?}");
95            let output = db_sync_command.output().map_err(|source| Error::Io {
96                context: format!("synchronizing repository database for {name}"),
97                source,
98            })?;
99
100            ensure_success(
101                &output,
102                format!("synchronizing repository database for {name}"),
103            )?;
104
105            trace!(
106                "Rsync reports: {}",
107                String::from_utf8_lossy(&output.stdout).trim()
108            );
109
110            let repo_target_dir = target_dir.join(&name);
111            if repo_target_dir.exists() {
112                if !self.extract_all
113                    && Report::parser(&output.stdout)
114                        .map_err(|source| Error::Parser(source.to_string()))?
115                        .file_content_updated()?
116                        .is_none()
117                {
118                    debug!("Database {name} is unchanged upstream, skipping extraction");
119                    continue;
120                } else {
121                    // There are old versions of the files, remove them.
122                    remove_dir_all(&repo_target_dir).map_err(|source| Error::IoPath {
123                        path: repo_target_dir.clone(),
124                        context: "recursively removing the directory".to_string(),
125                        source,
126                    })?;
127                }
128            }
129            create_dir_all(&repo_target_dir).map_err(|source| Error::IoPath {
130                path: repo_target_dir.clone(),
131                context: "recursively creating the directory".to_string(),
132                source,
133            })?;
134
135            debug!("Extracting db to {repo_target_dir:?}");
136
137            // Extract the db into the target folder.
138            let mut tar_command = Command::new("tar");
139            tar_command
140                .arg("-x")
141                .arg("-f")
142                .arg(&download_dest)
143                .arg("-C")
144                .arg(&repo_target_dir);
145
146            trace!("Running command: {tar_command:?}");
147            let output = tar_command.output().map_err(|source| Error::Io {
148                context: format!("extracting the repository database for {name}"),
149                source,
150            })?;
151            ensure_success(
152                &output,
153                format!("Extracting the repository database for {name}"),
154            )?;
155        }
156
157        Ok(())
158    }
159
160    /// Download all official repository packages and extract all files that're interesting to us.
161    /// Specifically:
162    ///
163    ///  - `.BUILDINFO`
164    ///  - `.MTREE`
165    ///  - `.PKGINFO`
166    ///  - `.INSTALL` (Optional)
167    pub fn sync_remote_packages(&self) -> Result<(), Error> {
168        let download_dir = self
169            .cache_dir
170            .as_ref()
171            .join(DOWNLOAD_DIR)
172            .join(PACKAGES_DIR);
173        let target_dir = self.cache_dir.as_ref().join(PACKAGES_DIR);
174
175        create_dir_all(&download_dir).map_err(|source| Error::IoPath {
176            path: download_dir.clone(),
177            context: "recursively creating the directory".to_string(),
178            source,
179        })?;
180
181        create_dir_all(&target_dir).map_err(|source| Error::IoPath {
182            path: target_dir.clone(),
183            context: "recursively creating the directory".to_string(),
184            source,
185        })?;
186
187        for repo in self.repositories.iter() {
188            let repo_name = repo.to_string();
189            info!("Downloading packages for repository {repo_name}");
190
191            let file_source = format!("rsync://{}/{repo_name}/os/x86_64/", self.mirror);
192            let download_dest = download_dir.join(&repo_name);
193            let changed = self.download_packages(&repo_name, file_source, &download_dest)?;
194
195            let packages: Vec<PathBuf> = if self.extract_all {
196                let files: Vec<_> = read_dir(&download_dest)
197                    .map_err(|source| Error::IoPath {
198                        path: download_dest.to_path_buf(),
199                        context: "reading entries in directory".to_string(),
200                        source,
201                    })?
202                    .map(|result| {
203                        result.map_err(|source| Error::IoPath {
204                            path: download_dest.to_path_buf(),
205                            context: "reading a directory entry".to_string(),
206                            source,
207                        })
208                    })
209                    .collect::<Result<_, Error>>()?;
210                files
211                    .into_iter()
212                    .map(|entry| entry.path().to_owned())
213                    .collect::<Vec<_>>()
214            } else {
215                changed
216                    .into_iter()
217                    .map(|pkg| download_dest.join(pkg))
218                    .collect()
219            }
220            .into_iter()
221            // Filter out any dotfiles.
222            // Those might be temporary download artifacts from previous rsync runs.
223            .filter(|entry| {
224                if let Some(path) = entry.to_str() {
225                    !path.starts_with('.')
226                } else {
227                    false
228                }
229            })
230            .collect();
231
232            info!("Extracting packages for repository {repo_name}");
233            let progress_bar = get_progress_bar(packages.len() as u64);
234            packages
235                .into_par_iter()
236                .filter(|file| {
237                    file.extension()
238                        .is_none_or(|ext| ext.to_str().is_none_or(|ext| ext != "sig"))
239                })
240                .map(|pkg| {
241                    // Extract all files that we're interested in.
242                    let result = extract_pkg_files(&pkg, &target_dir, &repo_name);
243                    progress_bar.inc(1);
244                    result
245                })
246                .collect::<Result<Vec<()>, Error>>()?;
247            // Finish the progress_bar
248            progress_bar.finish_with_message("Finished extracting files for repository {repo}.");
249        }
250
251        // Clean up package data of packages that're no longer on the mirror.
252        for repo in self.repositories.iter() {
253            let mirror_packages = filenames_in_dir(&download_dir.join(repo.to_string()))?
254                .into_iter()
255                .filter(|file| !file.ends_with(".sig"))
256                .map(remove_tarball_suffix)
257                .collect::<Result<HashSet<String>, Error>>()?;
258
259            let local_packages = filenames_in_dir(&target_dir.join(repo.to_string()))?;
260
261            // Get the packages that no longer exist on the mirror.
262            let removed_pkgs: Vec<&String> = local_packages.difference(&mirror_packages).collect();
263
264            // Delete the package data
265            if !removed_pkgs.is_empty() {
266                info!("Found {} packages for cleanup:", removed_pkgs.len());
267                for removed in removed_pkgs {
268                    debug!("Removing local package: {removed}");
269                    remove_dir_all(target_dir.join(repo.to_string()).join(removed)).map_err(
270                        |source| Error::IoPath {
271                            path: target_dir.join(repo.to_string()).join(removed),
272                            context: "recursively removing the directory".to_string(),
273                            source,
274                        },
275                    )?;
276                }
277            }
278        }
279
280        Ok(())
281    }
282
283    /// Downloads all packages and signatures of a package repository to a local directory.
284    fn download_packages(
285        &self,
286        repo_name: &str,
287        file_source: String,
288        download_dest: &PathBuf,
289    ) -> Result<Vec<PathBuf>, Error> {
290        let mut cmd = Command::new("rsync");
291        cmd.args([
292            "--recursive",
293            "--perms",
294            "--times",
295            "--delete",
296            "--hard-links",
297            // Copy actual files instead of symlinks.
298            // Most symlinks point to files up the tree of where we're looking at,
299            // which is why normal symlinks would be invalid.
300            "--copy-links",
301            // Check for deletions once everything has been transferred
302            "--delete-after",
303            // Only overwrite updated files in the very end.
304            // This allows for a somewhat "atomic" update process.
305            "--delay-updates",
306            // Print structured change information to be parsed
307            "--itemize-changes",
308        ]);
309
310        // Don't download any files related to repository sync databases.
311        for variation in [
312            ".db",
313            ".db.sig",
314            ".db.tar.*",
315            ".db.tar.*.sig",
316            ".db.tar.*.old",
317            ".db.tar.*.old.sig",
318            ".links.tar.*",
319            ".links.tar.*.sig",
320            ".files",
321            ".files.sig",
322            ".files.tar.*",
323            ".files.tar.*.sig",
324            ".files.tar.*.old",
325            ".files.tar.*.old.sig",
326        ] {
327            cmd.arg(format!("--exclude={repo_name}{variation}"));
328        }
329
330        trace!("Running command: {cmd:?}");
331        let output = cmd
332            .arg(file_source)
333            .arg(download_dest)
334            .output()
335            .map_err(|source| Error::Io {
336                context: format!(
337                    "syncing all package and signature files for repository {repo_name}"
338                ),
339                source,
340            })?;
341
342        ensure_success(
343            &output,
344            format!("Syncing all package and signature files for repository {repo_name}"),
345        )?;
346
347        let mut changed_files = Vec::new();
348
349        for line in output.stdout.split(|&b| b == b'\n') {
350            if let Some(path) = Report::parser(line)
351                .map_err(|source| Error::Parser(source.to_string()))?
352                .file_content_updated()?
353            {
354                trace!("File at {path:?} changed, marking for extraction");
355                changed_files.push(path.to_owned());
356            }
357        }
358
359        Ok(changed_files)
360    }
361}
362
363/// Get the list of all files inside a given compressed tarball.
364///
365/// This function provides data which is necessary to determine which subset of files should be
366/// extracted.
367fn get_tar_file_list(pkg: &Path) -> Result<HashSet<String>, Error> {
368    let mut tar_command = Command::new("tar");
369    tar_command.arg("-tf").arg(pkg);
370    trace!("Running command: {tar_command:?}");
371    let peek_output = tar_command.output().map_err(|source| Error::Io {
372        context: format!("list contents of tar file {pkg:?}"),
373        source,
374    })?;
375    ensure_success(
376        &peek_output,
377        format!("Listing contents of tar file {pkg:?}"),
378    )?;
379
380    Ok(String::from_utf8_lossy(&peek_output.stdout)
381        .lines()
382        .map(|line| line.to_string())
383        .collect())
384}
385
386/// Use `tar` to extract relevant package metadata and script files from packages files.
387///
388/// This function attempts to extract ".MTREE", ".BUILDINFO", ".PKGINFO" and ".INSTALL" files.
389/// Extracted files are placed in a directory structure that reflects the package's association with
390/// a package repository.
391///
392/// ## Note
393///
394/// Since some files are optional, we have to take a look at the files in that tarball to determine
395/// which of the files need to be actually extracted.
396///
397/// # Panics
398///
399/// Panics if `pkg` points to a directory.
400fn extract_pkg_files(pkg: &Path, target_dir: &Path, repo_name: &str) -> Result<(), Error> {
401    let pkg_file_name = pkg
402        .file_name()
403        .expect("got directory when expecting file")
404        .to_string_lossy()
405        .to_string();
406    let pkg_name = remove_tarball_suffix(pkg_file_name)?;
407
408    // Peek into the pkg tar to see what kind of files we need to extract.
409    let files = get_tar_file_list(pkg)?;
410
411    // Create the target directory where all the files should be extracted to.
412    let pkg_target_dir = target_dir.join(repo_name).join(pkg_name);
413    create_dir_all(&pkg_target_dir).map_err(|source| Error::IoPath {
414        path: pkg_target_dir.clone(),
415        context: "recursively creating the directory".to_string(),
416        source,
417    })?;
418
419    let mut cmd_args = vec![
420        "-C".to_string(),
421        pkg_target_dir.to_string_lossy().to_string(),
422        "-xf".to_string(),
423        pkg.to_string_lossy().to_string(),
424    ];
425
426    // Check for each of the known filetypes, whether it exists in the package.
427    // If it does, add it to the tar command for extraction.
428    for filetype in [
429        MetadataFileName::Mtree.as_ref(),
430        MetadataFileName::BuildInfo.as_ref(),
431        MetadataFileName::PackageInfo.as_ref(),
432        INSTALL_SCRIPTLET_FILE_NAME,
433    ] {
434        if files.contains(filetype) {
435            cmd_args.push(filetype.to_string());
436        }
437    }
438
439    // Run the extraction command
440    let mut tar_command = Command::new("tar");
441    tar_command.args(cmd_args);
442
443    trace!("Running command: {tar_command:?}");
444    let output = tar_command.output().map_err(|source| Error::IoPath {
445        path: pkg.to_path_buf(),
446        context: "extracting files".to_string(),
447        source,
448    })?;
449    ensure_success(&output, format!("Extracting files from tar file {pkg:?}"))?;
450
451    Ok(())
452}
453
454/// A small helper function that removes the `.pkg.tar.*` suffix of a tarball.
455/// This is necessary to get the actual package name from a packages full file name.
456pub fn remove_tarball_suffix(pkg_name: String) -> Result<String, Error> {
457    let package_file_name = PackageFileName::from_str(&pkg_name)?;
458
459    Ok(format!(
460        "{}-{}-{}",
461        package_file_name.name(),
462        package_file_name.version(),
463        package_file_name.architecture()
464    ))
465}