dev_scripts/sync/
pkgsrc.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
use std::{
    collections::{HashMap, HashSet},
    fs::remove_dir_all,
    path::{Path, PathBuf},
    process::Command,
};

use anyhow::{Context, Result};
use log::{error, info};
use rayon::prelude::*;
use strum::Display;

use super::filenames_in_dir;
use crate::{cmd::ensure_success, ui::get_progress_bar};

const PKGBASE_MAINTAINER_URL: &str = "https://archlinux.org/packages/pkgbase-maintainer";
const SSH_HOST: &str = "git@gitlab.archlinux.org";
const SSH_BASE_URL: &str = "git@gitlab.archlinux.org:archlinux/packaging/packages";

/// This struct is the entry point for downloading package source repositories from ArchLinux's
/// Gitlab.
///
/// Look at [Self::download_package_source_repositories] for more information.
pub struct PkgSrcDownloader {
    /// The destination folder into which files should be downloaded.
    pub dest: PathBuf,
}

impl PkgSrcDownloader {
    /// Download all official package source git repositories.
    pub fn download_package_source_repositories(&self) -> Result<()> {
        // Query the arch web API to get a list all official active repositories
        // The returned json is a map where the keys are the package names
        // and the value is a list of maintainer names.
        let repos = reqwest::blocking::get(PKGBASE_MAINTAINER_URL)
            .context("Failed to query pkgbase url.")?
            .json::<HashMap<String, Vec<String>>>()
            .context("Failed to deserialize archweb pkglist.")?;
        let all_repo_names: Vec<String> = repos.keys().map(String::from).collect();
        info!("Found {} official packages.", all_repo_names.len());

        let download_dir = self.dest.join("download/pkgsrc");
        self.parallel_update_or_clone(&all_repo_names, &download_dir)?;

        self.remove_old_repos(&all_repo_names, &download_dir)?;

        // Copy all .SRCINFO files to the target directory
        for repo in all_repo_names {
            let download_path = download_dir.join(&repo);
            if download_path.join(".SRCINFO").exists() {
                let target_dir = self.dest.join("pkgsrc").join(&repo);
                std::fs::create_dir_all(&target_dir)?;
                std::fs::copy(download_path.join(".SRCINFO"), target_dir.join(".SRCINFO"))?;
            }
        }

        Ok(())
    }

    /// Remove all local repositories for packages that no longer exist in the official
    /// repositories.
    ///
    /// Get the list of all locally available pkgsrc repositories.
    /// If we find any that are not in the list of official packages, we remove them.
    fn remove_old_repos(&self, repos: &[String], download_dir: &Path) -> Result<()> {
        // First up, read the names of all repositories in the local download folder.
        let local_repositories = filenames_in_dir(download_dir)?;

        // Get the list of packages that no longer exist on the mirrors (and thereby archweb).
        let remote_pkgs: HashSet<String> = HashSet::from_iter(repos.iter().map(String::from));

        // Now remove all local repositories for which there's no longer an entry in the archweb
        // response, as those packages are no longer served by the official mirrors.
        let removed_pkgs: Vec<&String> = local_repositories.difference(&remote_pkgs).collect();

        // Delete the repositories
        if !removed_pkgs.is_empty() {
            info!("Found {} repositories for cleanup:", removed_pkgs.len());
            for removed in removed_pkgs {
                remove_dir_all(download_dir.join(removed))
                    .context("Failed to remove local repository {removed}")?;
            }
        }

        Ok(())
    }

    /// Update/clone all git repositories in parallel with rayon.
    ///
    /// A progress bar is added for progress indication.
    fn parallel_update_or_clone(&self, repos: &[String], download_dir: &Path) -> Result<()> {
        let progress_bar = get_progress_bar(repos.len() as u64);

        // Prepare a ssh session for better performance.
        warmup_ssh_session()?;

        // Clone/update all repositories in parallel
        let results: Vec<Result<(), RepoUpdateError>> = repos
            .par_iter()
            .map(|repo| {
                let target_dir = download_dir.join(repo);

                // If the repo already exists, only pull it.
                // Otherwise do a clone.
                let result = if target_dir.exists() {
                    update_repo(repo, &target_dir)
                } else {
                    clone_repo(repo, &target_dir)
                };

                // Increment the counter
                progress_bar.inc(1);
                result
            })
            .collect();

        // Finish the spinner
        progress_bar.finish_with_message("All repositories cloned or updated.");

        // Display any errors during cloning/updating to the user.
        let mut error_iter = results.into_iter().filter_map(Result::err).peekable();
        if error_iter.peek().is_some() {
            error!("The command failed for the following repositories:");
            for error in error_iter {
                error!(
                    "{} failed for repo {} with error:\n{:?}",
                    error.operation, error.repo, error.inner
                );
            }
        }

        Ok(())
    }
}

/// Create a new ssh connection that doesn't get bound to a given session.
/// This allows that session to be reused, effectively eliminating the need to authenticate every
/// time a git repository is cloned/pulled.
///
/// This is especially necessary for users that have their SSH key on a physical device, such as a
/// NitroKey, as authentications with such devices are sequential and take quite some time.
pub fn warmup_ssh_session() -> Result<()> {
    let output = &Command::new("ssh")
        .args(vec!["-T", SSH_HOST])
        .output()
        .context("Failed to start ssh warmup command")?;

    ensure_success(output).context("Failed to run ssh warmup command:")
}

#[derive(Display)]
enum RepoUpdateOperation {
    Clone,
    Update,
}

struct RepoUpdateError {
    repo: String,
    operation: RepoUpdateOperation,
    inner: anyhow::Error,
}

/// Update a local git repository to the newest state.
/// Resets any local changes in case in each repository beforehand to prevent any conflicts.
fn update_repo(repo: &str, target_dir: &Path) -> Result<(), RepoUpdateError> {
    // Reset any possible local changes.
    let output = &Command::new("git")
        .current_dir(target_dir)
        .args(vec!["reset", "--hard"])
        .output()
        .map_err(|err| RepoUpdateError {
            repo: repo.to_string(),
            operation: RepoUpdateOperation::Update,
            inner: err.into(),
        })?;

    ensure_success(output).map_err(|err| RepoUpdateError {
        repo: repo.to_string(),
        operation: RepoUpdateOperation::Update,
        inner: err,
    })?;

    let output = Command::new("git")
        .current_dir(target_dir)
        .args(["pull", "--force"])
        .output()
        .map_err(|err| RepoUpdateError {
            repo: repo.to_string(),
            operation: RepoUpdateOperation::Update,
            inner: err.into(),
        })?;

    ensure_success(&output).map_err(|err| RepoUpdateError {
        repo: repo.to_string(),
        operation: RepoUpdateOperation::Update,
        inner: err,
    })
}

/// Clone a git repository into a target directory.
fn clone_repo(repo: &str, target_dir: &Path) -> Result<(), RepoUpdateError> {
    let ssh_url = format!("{SSH_HOST}{SSH_BASE_URL}/{repo}.git");
    let output = &Command::new("git")
        .arg("clone")
        .arg(&ssh_url)
        .arg(target_dir)
        .output()
        .map_err(|err| RepoUpdateError {
            repo: repo.to_string(),
            operation: RepoUpdateOperation::Clone,
            inner: err.into(),
        })?;

    ensure_success(output).map_err(|err| RepoUpdateError {
        repo: repo.to_string(),
        operation: RepoUpdateOperation::Clone,
        inner: err,
    })
}