dev_scripts/sync/
pkgsrc.rs

1//! Handles the download of package source repositories.
2//!
3//! This requires interaction with `git` and the official Arch Linux Gitlab, where all of the
4//! package source repositories for the official packages are located.
5
6use std::{
7    collections::{HashMap, HashSet},
8    fs::remove_dir_all,
9    path::{Path, PathBuf},
10    process::Command,
11};
12
13use anyhow::{Context, Result};
14use log::{error, info, trace};
15use rayon::prelude::*;
16use strum::Display;
17
18use super::filenames_in_dir;
19use crate::{cmd::ensure_success, ui::get_progress_bar};
20
21const PKGBASE_MAINTAINER_URL: &str = "https://archlinux.org/packages/pkgbase-maintainer";
22const SSH_HOST: &str = "git@gitlab.archlinux.org";
23const REPO_BASE_URL: &str = "archlinux/packaging/packages";
24
25/// Some package repositories' names differ from the name of the package.
26/// These are only few and need to be handled separately.
27const PACKAGE_REPO_RENAMES: [(&str, &str); 3] = [
28    ("gtk2+extra", "gtk2-extra"),
29    ("dvd+rw-tools", "dvd-rw-tools"),
30    ("tree", "unix-tree"),
31];
32
33/// This struct is the entry point for downloading package source repositories from ArchLinux's
34/// Gitlab.
35///
36/// Look at [Self::download_package_source_repositories] for more information.
37#[derive(Clone, Debug)]
38pub struct PkgSrcDownloader {
39    /// The destination folder into which files should be downloaded.
40    pub dest: PathBuf,
41}
42
43impl PkgSrcDownloader {
44    /// Download all official package source git repositories.
45    pub fn download_package_source_repositories(&self) -> Result<()> {
46        // Query the arch web API to get a list all official active repositories
47        // The returned json is a map where the keys are the package names
48        // and the value is a list of maintainer names.
49        let repos = reqwest::blocking::get(PKGBASE_MAINTAINER_URL)
50            .context("Failed to query pkgbase url.")?
51            .json::<HashMap<String, Vec<String>>>()
52            .context("Failed to deserialize archweb pkglist.")?;
53
54        let all_repo_names: Vec<String> = repos.keys().map(String::from).collect();
55        info!("Found {} official packages.", all_repo_names.len());
56
57        let download_dir = self.dest.join("download/pkgsrc");
58
59        // Remove all old repos before trying to update them.
60        self.remove_old_repos(&all_repo_names, &download_dir)?;
61
62        // Copy all .SRCINFO files to the target directory
63        self.parallel_update_or_clone(&all_repo_names, &download_dir)?;
64
65        // Copy .SRCINFO and PKGBUILD files to the target directory
66        for repo in all_repo_names {
67            let download_path = download_dir.join(&repo);
68            for file in [".SRCINFO", "PKGBUILD"] {
69                if download_path.join(file).exists() {
70                    let target_dir = self.dest.join("pkgsrc").join(&repo);
71                    std::fs::create_dir_all(&target_dir)?;
72                    std::fs::copy(download_path.join(file), target_dir.join(file))?;
73                }
74            }
75        }
76
77        Ok(())
78    }
79
80    /// Remove all local repositories for packages that no longer exist in the official
81    /// repositories.
82    ///
83    /// Get the list of all locally available pkgsrc repositories.
84    /// If we find any that are not in the list of official packages, we remove them.
85    fn remove_old_repos(&self, repos: &[String], download_dir: &Path) -> Result<()> {
86        // First up, read the names of all repositories in the local download folder.
87        let local_repositories = filenames_in_dir(download_dir)?;
88
89        // Get the list of packages that no longer exist on the mirrors (and thereby archweb).
90        let remote_pkgs: HashSet<String> = HashSet::from_iter(repos.iter().map(String::from));
91
92        // Now remove all local repositories for which there's no longer an entry in the archweb
93        // response, as those packages are no longer served by the official mirrors.
94        let removed_pkgs: Vec<&String> = local_repositories.difference(&remote_pkgs).collect();
95
96        // Delete the repositories
97        if !removed_pkgs.is_empty() {
98            info!("Found {} repositories for cleanup:", removed_pkgs.len());
99            for removed in removed_pkgs {
100                remove_dir_all(download_dir.join(removed))
101                    .context("Failed to remove local repository {removed}")?;
102            }
103        }
104
105        Ok(())
106    }
107
108    /// Update/clone all git repositories in parallel with rayon.
109    ///
110    /// A progress bar is added for progress indication.
111    fn parallel_update_or_clone(&self, repos: &[String], download_dir: &Path) -> Result<()> {
112        let progress_bar = get_progress_bar(repos.len() as u64);
113
114        // Prepare a ssh session for better performance.
115        warmup_ssh_session()?;
116
117        // Clone/update all repositories in parallel
118        let results: Vec<Result<(), RepoUpdateError>> = repos
119            .par_iter()
120            .map(|repo| {
121                let target_dir = download_dir.join(repo);
122
123                // If the repo already exists, only pull it.
124                // Otherwise do a clone.
125                let result = if target_dir.exists() {
126                    update_repo(repo, &target_dir)
127                } else {
128                    clone_repo(repo.to_string(), &target_dir)
129                };
130
131                // Increment the counter
132                progress_bar.inc(1);
133                result
134            })
135            .collect();
136
137        // Finish the spinner
138        progress_bar.finish_with_message("All repositories cloned or updated.");
139
140        // Display any errors during cloning/updating to the user.
141        let mut error_iter = results.into_iter().filter_map(Result::err).peekable();
142        if error_iter.peek().is_some() {
143            error!("The command failed for the following repositories:");
144            for error in error_iter {
145                error!(
146                    "{} failed for repo {} with error:\n{:?}",
147                    error.operation, error.repo, error.inner
148                );
149            }
150        }
151
152        Ok(())
153    }
154}
155
156/// Create a new ssh connection that doesn't get bound to a given session.
157/// This allows that session to be reused, effectively eliminating the need to authenticate every
158/// time a git repository is cloned/pulled.
159///
160/// This is especially necessary for users that have their SSH key on a physical device, such as a
161/// NitroKey, as authentications with such devices are sequential and take quite some time.
162pub fn warmup_ssh_session() -> Result<()> {
163    let mut ssh_command = Command::new("ssh");
164    ssh_command.args(vec!["-T", SSH_HOST]);
165    trace!("Running command: {ssh_command:?}");
166    let output = &ssh_command
167        .output()
168        .context("Failed to start ssh warmup command")?;
169
170    ensure_success(output).context("Failed to run ssh warmup command:")
171}
172
173#[derive(Display)]
174enum RepoUpdateOperation {
175    Clone,
176    Update,
177}
178
179struct RepoUpdateError {
180    repo: String,
181    operation: RepoUpdateOperation,
182    inner: anyhow::Error,
183}
184
185/// Update a local git repository to the newest state.
186/// Resets any local changes in case in each repository beforehand to prevent any conflicts.
187fn update_repo(repo: &str, target_dir: &Path) -> Result<(), RepoUpdateError> {
188    // Reset any possible local changes.
189    let output = Command::new("git")
190        .current_dir(target_dir)
191        .args(vec!["reset", "--hard"])
192        .output()
193        .map_err(|err| RepoUpdateError {
194            repo: repo.to_string(),
195            operation: RepoUpdateOperation::Update,
196            inner: err.into(),
197        })?;
198
199    ensure_success(&output).map_err(|err| RepoUpdateError {
200        repo: repo.to_string(),
201        operation: RepoUpdateOperation::Update,
202        inner: err,
203    })?;
204
205    let output = &Command::new("git")
206        .current_dir(target_dir)
207        .args(["pull", "--force"])
208        .output()
209        .map_err(|err| RepoUpdateError {
210            repo: repo.to_string(),
211            operation: RepoUpdateOperation::Update,
212            inner: err.into(),
213        })?;
214
215    ensure_success(output).map_err(|err| RepoUpdateError {
216        repo: repo.to_string(),
217        operation: RepoUpdateOperation::Update,
218        inner: err,
219    })
220}
221
222/// Clone a git repository into a target directory.
223fn clone_repo(mut repo: String, target_dir: &Path) -> Result<(), RepoUpdateError> {
224    // Check if this is one of the few packages that needs to be replaced.
225    for (to_replace, replace_with) in PACKAGE_REPO_RENAMES {
226        if repo == to_replace {
227            repo = replace_with.to_string();
228        }
229    }
230
231    // Arch linux replaces the literal `+` chars with spelled out `plus` equivalents in their
232    // repository urls. This is to prevent any issues with external tooling and such.
233    repo = repo.replace("+", "plus");
234
235    let ssh_url = format!("{SSH_HOST}:{REPO_BASE_URL}/{repo}.git");
236
237    let output = &Command::new("git")
238        .arg("clone")
239        .arg(&ssh_url)
240        .arg(target_dir)
241        .output()
242        .map_err(|err| RepoUpdateError {
243            repo: repo.to_string(),
244            operation: RepoUpdateOperation::Clone,
245            inner: err.into(),
246        })?;
247
248    ensure_success(output).map_err(|err| RepoUpdateError {
249        repo: repo.to_string(),
250        operation: RepoUpdateOperation::Clone,
251        inner: err,
252    })
253}