dev_scripts/sync/
pkgsrc.rs

1use std::{
2    collections::{HashMap, HashSet},
3    fs::remove_dir_all,
4    path::{Path, PathBuf},
5    process::Command,
6};
7
8use anyhow::{Context, Result};
9use log::{error, info};
10use rayon::prelude::*;
11use strum::Display;
12
13use super::filenames_in_dir;
14use crate::{cmd::ensure_success, ui::get_progress_bar};
15
16const PKGBASE_MAINTAINER_URL: &str = "https://archlinux.org/packages/pkgbase-maintainer";
17const SSH_HOST: &str = "git@gitlab.archlinux.org";
18const REPO_BASE_URL: &str = "archlinux/packaging/packages";
19
20/// Some package repositories' names differ from the name of the package.
21/// These are only few and need to be handled separately.
22const PACKAGE_REPO_RENAMES: [(&str, &str); 3] = [
23    ("gtk2+extra", "gtk2-extra"),
24    ("dvd+rw-tools", "dvd-rw-tools"),
25    ("tree", "unix-tree"),
26];
27
28/// This struct is the entry point for downloading package source repositories from ArchLinux's
29/// Gitlab.
30///
31/// Look at [Self::download_package_source_repositories] for more information.
32pub struct PkgSrcDownloader {
33    /// The destination folder into which files should be downloaded.
34    pub dest: PathBuf,
35}
36
37impl PkgSrcDownloader {
38    /// Download all official package source git repositories.
39    pub fn download_package_source_repositories(&self) -> Result<()> {
40        // Query the arch web API to get a list all official active repositories
41        // The returned json is a map where the keys are the package names
42        // and the value is a list of maintainer names.
43        let repos = reqwest::blocking::get(PKGBASE_MAINTAINER_URL)
44            .context("Failed to query pkgbase url.")?
45            .json::<HashMap<String, Vec<String>>>()
46            .context("Failed to deserialize archweb pkglist.")?;
47
48        let all_repo_names: Vec<String> = repos.keys().map(String::from).collect();
49        info!("Found {} official packages.", all_repo_names.len());
50
51        let download_dir = self.dest.join("download/pkgsrc");
52        self.parallel_update_or_clone(&all_repo_names, &download_dir)?;
53
54        self.remove_old_repos(&all_repo_names, &download_dir)?;
55
56        // Copy all .SRCINFO files to the target directory
57        for repo in all_repo_names {
58            let download_path = download_dir.join(&repo);
59            if download_path.join(".SRCINFO").exists() {
60                let target_dir = self.dest.join("pkgsrc").join(&repo);
61                std::fs::create_dir_all(&target_dir)?;
62                std::fs::copy(download_path.join(".SRCINFO"), target_dir.join(".SRCINFO"))?;
63            }
64        }
65
66        Ok(())
67    }
68
69    /// Remove all local repositories for packages that no longer exist in the official
70    /// repositories.
71    ///
72    /// Get the list of all locally available pkgsrc repositories.
73    /// If we find any that are not in the list of official packages, we remove them.
74    fn remove_old_repos(&self, repos: &[String], download_dir: &Path) -> Result<()> {
75        // First up, read the names of all repositories in the local download folder.
76        let local_repositories = filenames_in_dir(download_dir)?;
77
78        // Get the list of packages that no longer exist on the mirrors (and thereby archweb).
79        let remote_pkgs: HashSet<String> = HashSet::from_iter(repos.iter().map(String::from));
80
81        // Now remove all local repositories for which there's no longer an entry in the archweb
82        // response, as those packages are no longer served by the official mirrors.
83        let removed_pkgs: Vec<&String> = local_repositories.difference(&remote_pkgs).collect();
84
85        // Delete the repositories
86        if !removed_pkgs.is_empty() {
87            info!("Found {} repositories for cleanup:", removed_pkgs.len());
88            for removed in removed_pkgs {
89                remove_dir_all(download_dir.join(removed))
90                    .context("Failed to remove local repository {removed}")?;
91            }
92        }
93
94        Ok(())
95    }
96
97    /// Update/clone all git repositories in parallel with rayon.
98    ///
99    /// A progress bar is added for progress indication.
100    fn parallel_update_or_clone(&self, repos: &[String], download_dir: &Path) -> Result<()> {
101        let progress_bar = get_progress_bar(repos.len() as u64);
102
103        // Prepare a ssh session for better performance.
104        warmup_ssh_session()?;
105
106        // Clone/update all repositories in parallel
107        let results: Vec<Result<(), RepoUpdateError>> = repos
108            .par_iter()
109            .map(|repo| {
110                let target_dir = download_dir.join(repo);
111
112                // If the repo already exists, only pull it.
113                // Otherwise do a clone.
114                let result = if target_dir.exists() {
115                    update_repo(repo, &target_dir)
116                } else {
117                    clone_repo(repo.to_string(), &target_dir)
118                };
119
120                // Increment the counter
121                progress_bar.inc(1);
122                result
123            })
124            .collect();
125
126        // Finish the spinner
127        progress_bar.finish_with_message("All repositories cloned or updated.");
128
129        // Display any errors during cloning/updating to the user.
130        let mut error_iter = results.into_iter().filter_map(Result::err).peekable();
131        if error_iter.peek().is_some() {
132            error!("The command failed for the following repositories:");
133            for error in error_iter {
134                error!(
135                    "{} failed for repo {} with error:\n{:?}",
136                    error.operation, error.repo, error.inner
137                );
138            }
139        }
140
141        Ok(())
142    }
143}
144
145/// Create a new ssh connection that doesn't get bound to a given session.
146/// This allows that session to be reused, effectively eliminating the need to authenticate every
147/// time a git repository is cloned/pulled.
148///
149/// This is especially necessary for users that have their SSH key on a physical device, such as a
150/// NitroKey, as authentications with such devices are sequential and take quite some time.
151pub fn warmup_ssh_session() -> Result<()> {
152    let output = &Command::new("ssh")
153        .args(vec!["-T", SSH_HOST])
154        .output()
155        .context("Failed to start ssh warmup command")?;
156
157    ensure_success(output).context("Failed to run ssh warmup command:")
158}
159
160#[derive(Display)]
161enum RepoUpdateOperation {
162    Clone,
163    Update,
164}
165
166struct RepoUpdateError {
167    repo: String,
168    operation: RepoUpdateOperation,
169    inner: anyhow::Error,
170}
171
172/// Update a local git repository to the newest state.
173/// Resets any local changes in case in each repository beforehand to prevent any conflicts.
174fn update_repo(repo: &str, target_dir: &Path) -> Result<(), RepoUpdateError> {
175    // Reset any possible local changes.
176    let output = &Command::new("git")
177        .current_dir(target_dir)
178        .args(vec!["reset", "--hard"])
179        .output()
180        .map_err(|err| RepoUpdateError {
181            repo: repo.to_string(),
182            operation: RepoUpdateOperation::Update,
183            inner: err.into(),
184        })?;
185
186    ensure_success(output).map_err(|err| RepoUpdateError {
187        repo: repo.to_string(),
188        operation: RepoUpdateOperation::Update,
189        inner: err,
190    })?;
191
192    let output = Command::new("git")
193        .current_dir(target_dir)
194        .args(["pull", "--force"])
195        .output()
196        .map_err(|err| RepoUpdateError {
197            repo: repo.to_string(),
198            operation: RepoUpdateOperation::Update,
199            inner: err.into(),
200        })?;
201
202    ensure_success(&output).map_err(|err| RepoUpdateError {
203        repo: repo.to_string(),
204        operation: RepoUpdateOperation::Update,
205        inner: err,
206    })
207}
208
209/// Clone a git repository into a target directory.
210fn clone_repo(mut repo: String, target_dir: &Path) -> Result<(), RepoUpdateError> {
211    // Check if this is one of the few packages that needs to be replaced.
212    for (to_replace, replace_with) in PACKAGE_REPO_RENAMES {
213        if repo == to_replace {
214            repo = replace_with.to_string();
215        }
216    }
217
218    // Arch linux replaces the literal `+` chars with spelled out `plus` equivalents in their
219    // repository urls. This is to prevent any issues with external tooling and such.
220    repo = repo.replace("+", "plus");
221
222    let ssh_url = format!("{SSH_HOST}:{REPO_BASE_URL}/{repo}.git");
223
224    let output = &Command::new("git")
225        .arg("clone")
226        .arg(&ssh_url)
227        .arg(target_dir)
228        .output()
229        .map_err(|err| RepoUpdateError {
230            repo: repo.to_string(),
231            operation: RepoUpdateOperation::Clone,
232            inner: err.into(),
233        })?;
234
235    ensure_success(output).map_err(|err| RepoUpdateError {
236        repo: repo.to_string(),
237        operation: RepoUpdateOperation::Clone,
238        inner: err,
239    })
240}