dev_scripts/sync/
pkgsrc.rs

1//! Handles the download of package source repositories.
2//!
3//! This requires interaction with `git` and the official Arch Linux Gitlab, where all of the
4//! package source repositories for the official packages are located.
5
6use std::{
7    collections::{HashMap, HashSet},
8    fs::{copy, create_dir_all, remove_dir_all},
9    path::Path,
10    process::Command,
11};
12
13use alpm_types::{PKGBUILD_FILE_NAME, SRCINFO_FILE_NAME};
14use log::{error, info, trace};
15use rayon::prelude::*;
16use reqwest::blocking::get;
17
18use super::filenames_in_dir;
19use crate::{
20    CacheDir,
21    Error,
22    cmd::ensure_success,
23    consts::{DOWNLOAD_DIR, PKGSRC_DIR},
24    ui::get_progress_bar,
25};
26
27const PKGBASE_MAINTAINER_URL: &str = "https://archlinux.org/packages/pkgbase-maintainer";
28const SSH_HOST: &str = "git@gitlab.archlinux.org";
29const REPO_BASE_URL: &str = "archlinux/packaging/packages";
30
31/// Some package repositories' names differ from the name of the package.
32/// These are only few and need to be handled separately.
33const PACKAGE_REPO_RENAMES: [(&str, &str); 3] = [
34    ("gtk2+extra", "gtk2-extra"),
35    ("dvd+rw-tools", "dvd-rw-tools"),
36    ("tree", "unix-tree"),
37];
38
39/// This struct is the entry point for downloading package source repositories from ArchLinux's
40/// Gitlab.
41///
42/// Look at [Self::download_package_source_repositories] for more information.
43#[derive(Clone, Debug)]
44pub struct PkgSrcDownloader {
45    /// The destination folder into which files should be downloaded.
46    pub cache_dir: CacheDir,
47}
48
49impl PkgSrcDownloader {
50    /// Download all official package source git repositories.
51    pub fn download_package_source_repositories(&self) -> Result<(), Error> {
52        // Query the arch web API to get a list all official active repositories
53        // The returned json is a map where the keys are the package names
54        // and the value is a list of maintainer names.
55        let repos = get(PKGBASE_MAINTAINER_URL)
56            .map_err(|source| Error::HttpQueryFailed {
57                context: "retrieving the list of pkgbases".to_string(),
58                source,
59            })?
60            .json::<HashMap<String, Vec<String>>>()
61            .map_err(|source| Error::HttpQueryFailed {
62                context: "deserializing the response as JSON".to_string(),
63                source,
64            })?;
65
66        let all_repo_names: Vec<String> = repos.keys().map(String::from).collect();
67        info!("Found {} official packages.", all_repo_names.len());
68
69        let download_dir = self.cache_dir.as_ref().join(DOWNLOAD_DIR).join(PKGSRC_DIR);
70
71        // Remove all old repos before trying to update them.
72        self.remove_old_repos(&all_repo_names, &download_dir)?;
73
74        // Copy all .SRCINFO files to the target directory
75        self.parallel_update_or_clone(&all_repo_names, &download_dir)?;
76
77        // Copy .SRCINFO and PKGBUILD files to the target directory
78        for repo in all_repo_names {
79            let download_path = download_dir.join(&repo);
80            for file in [SRCINFO_FILE_NAME, PKGBUILD_FILE_NAME] {
81                if download_path.join(file).exists() {
82                    let target_dir = self.cache_dir.as_ref().join(PKGSRC_DIR).join(&repo);
83                    create_dir_all(&target_dir).map_err(|source| Error::IoPath {
84                        path: target_dir.to_path_buf(),
85                        context: "recursively creating a directory".to_string(),
86                        source,
87                    })?;
88                    copy(download_path.join(file), target_dir.join(file)).map_err(|source| {
89                        Error::IoPath {
90                            path: download_path.join(file),
91                            context: "copying the file to the target directory".to_string(),
92                            source,
93                        }
94                    })?;
95                }
96            }
97        }
98
99        Ok(())
100    }
101
102    /// Remove all local repositories for packages that no longer exist in the official
103    /// repositories.
104    ///
105    /// Get the list of all locally available pkgsrc repositories.
106    /// If we find any that are not in the list of official packages, we remove them.
107    fn remove_old_repos(&self, repos: &[String], download_dir: &Path) -> Result<(), Error> {
108        // First up, read the names of all repositories in the local download folder.
109        let local_repositories = filenames_in_dir(download_dir)?;
110
111        // Get the list of packages that no longer exist on the mirrors (and thereby archweb).
112        let remote_pkgs: HashSet<String> = HashSet::from_iter(repos.iter().map(String::from));
113
114        // Now remove all local repositories for which there's no longer an entry in the archweb
115        // response, as those packages are no longer served by the official mirrors.
116        let removed_pkgs: Vec<&String> = local_repositories.difference(&remote_pkgs).collect();
117
118        // Delete the repositories
119        if !removed_pkgs.is_empty() {
120            info!("Found {} repositories for cleanup:", removed_pkgs.len());
121            for removed in removed_pkgs {
122                remove_dir_all(download_dir.join(removed)).map_err(|source| Error::IoPath {
123                    path: download_dir.join(removed),
124                    context: "removing the file".to_string(),
125                    source,
126                })?;
127            }
128        }
129
130        Ok(())
131    }
132
133    /// Update/clone all git repositories in parallel with rayon.
134    ///
135    /// A progress bar is added for progress indication.
136    fn parallel_update_or_clone(&self, repos: &[String], download_dir: &Path) -> Result<(), Error> {
137        let progress_bar = get_progress_bar(repos.len() as u64);
138
139        // Prepare a ssh session for better performance.
140        warmup_ssh_session()?;
141
142        // Clone/update all repositories in parallel
143        let results: Vec<Result<(), Error>> = repos
144            .par_iter()
145            .map(|repo| {
146                let target_dir = download_dir.join(repo);
147
148                // If the repo already exists, only pull it.
149                // Otherwise do a clone.
150                let result = if target_dir.exists() {
151                    update_repo(repo, &target_dir)
152                } else {
153                    clone_repo(repo.to_string(), &target_dir)
154                };
155
156                // Increment the counter
157                progress_bar.inc(1);
158                result
159            })
160            .collect();
161
162        // Finish the spinner
163        progress_bar.finish_with_message("All repositories cloned or updated.");
164
165        // Display any errors during cloning/updating to the user.
166        let mut error_iter = results.into_iter().filter_map(Result::err).peekable();
167        if error_iter.peek().is_some() {
168            error!("The command failed for the following repositories:");
169            for error in error_iter {
170                error!("{error}");
171            }
172        }
173
174        Ok(())
175    }
176}
177
178/// Create a new ssh connection that doesn't get bound to a given session.
179/// This allows that session to be reused, effectively eliminating the need to authenticate every
180/// time a git repository is cloned/pulled.
181///
182/// This is especially necessary for users that have their SSH key on a physical device, such as a
183/// NitroKey, as authentications with such devices are sequential and take quite some time.
184pub fn warmup_ssh_session() -> Result<(), Error> {
185    let mut ssh_command = Command::new("ssh");
186    ssh_command.args(vec!["-T", SSH_HOST]);
187    trace!("Running command: {ssh_command:?}");
188    let output = &ssh_command.output().map_err(|source| Error::Io {
189        context: "running the SSH warmup command".to_string(),
190        source,
191    })?;
192
193    ensure_success(output, "Failed to run ssh warmup command".to_string())
194}
195
196/// Update a local git repository to the newest state.
197/// Resets any local changes in case in each repository beforehand to prevent any conflicts.
198fn update_repo(repo: &str, target_dir: &Path) -> Result<(), Error> {
199    // Reset any possible local changes.
200    let output = Command::new("git")
201        .current_dir(target_dir)
202        .args(vec!["reset", "--hard"])
203        .output()
204        .map_err(|source| Error::Io {
205            context: format!("resetting the package source repository \"{repo}\""),
206            source,
207        })?;
208
209    ensure_success(
210        &output,
211        format!("Resetting the package source repository \"{repo}\""),
212    )?;
213
214    let output = &Command::new("git")
215        .current_dir(target_dir)
216        .args(["pull", "--force"])
217        .output()
218        .map_err(|source| Error::Io {
219            context: format!("pulling the package source repository \"{repo}\""),
220            source,
221        })?;
222
223    ensure_success(
224        output,
225        format!("Pulling the package source repository \"{repo}\""),
226    )
227}
228
229/// Clone a git repository into a target directory.
230fn clone_repo(mut repo: String, target_dir: &Path) -> Result<(), Error> {
231    // Check if this is one of the few packages that needs to be replaced.
232    for (to_replace, replace_with) in PACKAGE_REPO_RENAMES {
233        if repo == to_replace {
234            repo = replace_with.to_string();
235        }
236    }
237
238    // Arch linux replaces the literal `+` chars with spelled out `plus` equivalents in their
239    // repository urls. This is to prevent any issues with external tooling and such.
240    repo = repo.replace("+", "plus");
241
242    let ssh_url = format!("{SSH_HOST}:{REPO_BASE_URL}/{repo}.git");
243
244    let output = &Command::new("git")
245        .arg("clone")
246        .arg(&ssh_url)
247        .arg(target_dir)
248        .output()
249        .map_err(|source| Error::Io {
250            context: format!("cloning the package source repository \"{repo}\""),
251            source,
252        })?;
253
254    ensure_success(
255        output,
256        format!("Cloning the package source repository \"{repo}\""),
257    )
258}