dev_scripts/sync/mirror/
mod.rs1mod rsync_changes;
6
7use std::{
8 collections::HashSet,
9 fs::{create_dir_all, remove_dir_all},
10 path::{Path, PathBuf},
11 process::Command,
12};
13
14use anyhow::{Context, Result, anyhow, bail};
15use log::{debug, info, trace};
16use rayon::iter::{IntoParallelIterator, ParallelIterator};
17
18use super::{PackageRepositories, filenames_in_dir};
19use crate::{cmd::ensure_success, ui::get_progress_bar};
20
21#[derive(Clone, Debug)]
23pub struct MirrorDownloader {
24 pub dest: PathBuf,
26 pub mirror: String,
28 pub repositories: Vec<PackageRepositories>,
30 pub extract_all: bool,
32}
33
34impl MirrorDownloader {
35 pub fn sync_remote_databases(&self) -> Result<()> {
41 let download_dir = self.dest.join("download/databases/");
42 let target_dir = self.dest.join("databases");
43
44 if !download_dir.exists() {
45 create_dir_all(&download_dir).context("Failed to create download directory")?;
46 }
47
48 if !target_dir.exists() {
49 create_dir_all(&target_dir)
50 .context("Failed to create pacman cache target directory")?;
51 }
52
53 for repo in self.repositories.iter() {
54 let name = repo.to_string();
55 info!("Downloading database for repository {name}");
56
57 let filename = format!("{name}.files");
58 let file_source = format!("rsync://{}/{name}/os/x86_64/{filename}", self.mirror);
59
60 let download_dest = download_dir.join(filename);
61
62 let mut db_sync_command = Command::new("rsync");
64 db_sync_command
65 .args([
66 "--recursive",
67 "--perms",
68 "--times",
69 "--itemize-changes",
71 "--copy-links",
75 ])
76 .arg(file_source)
77 .arg(&download_dest);
78
79 trace!("Running command: {db_sync_command:?}");
80 let output = db_sync_command
81 .output()
82 .context(format!("Failed to run rsync for pacman db {name}"))?;
83
84 if !output.status.success() {
85 bail!("rsync failed for pacman db {name}");
86 }
87
88 trace!(
89 "Rsync reports: {}",
90 String::from_utf8_lossy(&output.stdout).trim()
91 );
92
93 let repo_target_dir = target_dir.join(&name);
94 if repo_target_dir.exists() {
95 if !self.extract_all
96 && rsync_changes::Report::parser(&output.stdout)
97 .map_err(|e| anyhow!("{e}"))?
98 .file_content_updated()?
99 .is_none()
100 {
101 debug!("Database {name} is unchanged upstream, skipping extraction");
102 continue;
103 } else {
104 remove_dir_all(&repo_target_dir).context(format!(
106 "Failed to remove old repository: {repo_target_dir:?}"
107 ))?;
108 }
109 }
110 create_dir_all(&repo_target_dir)?;
111
112 debug!("Extracting db to {repo_target_dir:?}");
113
114 let mut tar_command = Command::new("tar");
116 tar_command
117 .arg("-x")
118 .arg("-f")
119 .arg(&download_dest)
120 .arg("-C")
121 .arg(&repo_target_dir);
122
123 trace!("Running command: {tar_command:?}");
124 let output = tar_command
125 .output()
126 .context(format!("Failed to start tar to extract pacman dbs {name}"))?;
127 ensure_success(&output)?;
128 }
129
130 Ok(())
131 }
132
133 pub fn sync_remote_packages(&self) -> Result<()> {
141 let download_dir = self.dest.join("download/packages");
142 let target_dir = self.dest.join("packages");
143
144 if !download_dir.exists() {
145 create_dir_all(&download_dir).context("Failed to create download directory")?;
146 }
147
148 if !target_dir.exists() {
149 create_dir_all(&target_dir)
150 .context("Failed to create pacman cache target directory")?;
151 }
152
153 for repo in self.repositories.iter() {
154 let repo_name = repo.to_string();
155 info!("Downloading packages for repository {repo_name}");
156
157 let file_source = format!("rsync://{}/{repo_name}/os/x86_64/", self.mirror);
158 let download_dest = download_dir.join(&repo_name);
159 let changed = self.download_packages(&repo_name, file_source, &download_dest)?;
160
161 let packages: Vec<PathBuf> = if self.extract_all {
162 let files: Vec<_> =
163 std::fs::read_dir(&download_dest)?.collect::<Result<_, std::io::Error>>()?;
164 files
165 .into_iter()
166 .map(|entry| entry.path().to_owned())
167 .collect::<Vec<_>>()
168 } else {
169 changed
170 .into_iter()
171 .map(|pkg| download_dest.join(pkg))
172 .collect()
173 }
174 .into_iter()
175 .filter(|entry| {
178 if let Some(path) = entry.to_str() {
179 !path.starts_with('.')
180 } else {
181 false
182 }
183 })
184 .collect();
185
186 info!("Extracting packages for repository {repo_name}");
187 let progress_bar = get_progress_bar(packages.len() as u64);
188 packages
189 .into_par_iter()
190 .map(|pkg| {
191 let result = extract_pkg_files(&pkg, &target_dir, &repo_name);
193 progress_bar.inc(1);
194 result
195 })
196 .collect::<Result<Vec<()>>>()?;
197 progress_bar.finish_with_message("Finished extracting files for repository {repo}.");
199 }
200
201 for repo in self.repositories.iter() {
203 let mirror_packages = filenames_in_dir(&download_dir.join(repo.to_string()))?
204 .into_iter()
205 .map(remove_tarball_suffix)
206 .collect::<Result<HashSet<String>>>()?;
207
208 let local_packages = filenames_in_dir(&target_dir.join(repo.to_string()))?;
209
210 let removed_pkgs: Vec<&String> = local_packages.difference(&mirror_packages).collect();
212
213 if !removed_pkgs.is_empty() {
215 info!("Found {} packages for cleanup:", removed_pkgs.len());
216 for removed in removed_pkgs {
217 debug!("Removing local package: {removed}");
218 remove_dir_all(target_dir.join(repo.to_string()).join(removed)).context(
219 format!(
220 "Failed to remove local package {:?}",
221 target_dir.join(repo.to_string()).join(removed)
222 ),
223 )?;
224 }
225 }
226 }
227
228 Ok(())
229 }
230
231 fn download_packages(
233 &self,
234 repo_name: &str,
235 file_source: String,
236 download_dest: &PathBuf,
237 ) -> Result<Vec<PathBuf>> {
238 let mut cmd = Command::new("rsync");
239 cmd.args([
240 "--recursive",
241 "--perms",
242 "--times",
243 "--delete",
244 "--hard-links",
245 "--copy-links",
249 "--delete-after",
251 "--delay-updates",
254 "--itemize-changes",
256 "--exclude=*.sig",
258 ]);
259
260 for variation in [
263 ".db",
264 ".db.tar.gz",
265 ".db.tar.gz.old",
266 ".links.tar.gz",
267 ".files",
268 ".files.tar.gz",
269 ".files.tar.gz.old",
270 ] {
271 cmd.arg(format!("--exclude={repo_name}{variation}"));
272 }
273
274 trace!("Running command: {cmd:?}");
275 let output = cmd
276 .arg(file_source)
277 .arg(download_dest)
278 .output()
279 .context(format!(
280 "Failed to start package rsync for pacman db {repo_name}"
281 ))?;
282
283 if !output.status.success() {
284 bail!("Package rsync failed for pacman db {repo_name}");
285 }
286
287 let mut changed_files = Vec::new();
288
289 for line in output.stdout.split(|&b| b == b'\n') {
290 if let Some(path) = rsync_changes::Report::parser(line)
291 .map_err(|e| anyhow!("{e}"))?
292 .file_content_updated()?
293 {
294 trace!("File at {path:?} changed, marking for extraction");
295 changed_files.push(path.to_owned());
296 }
297 }
298
299 Ok(changed_files)
300 }
301}
302
303fn get_tar_file_list(pkg: &Path) -> Result<HashSet<String>> {
308 let mut tar_command = Command::new("tar");
309 tar_command.arg("-tf").arg(pkg);
310 trace!("Running command: {tar_command:?}");
311 let peek_output = tar_command
312 .output()
313 .context(format!("Failed to peek into pkg {pkg:?}"))?;
314 ensure_success(&peek_output).context("Error while peeking into package")?;
315
316 Ok(String::from_utf8_lossy(&peek_output.stdout)
317 .lines()
318 .map(|line| line.to_string())
319 .collect())
320}
321
322fn extract_pkg_files(pkg: &Path, target_dir: &Path, repo_name: &str) -> Result<()> {
337 let pkg_file_name = pkg
338 .file_name()
339 .expect("got directory when expecting file")
340 .to_string_lossy()
341 .to_string();
342 let pkg_name = remove_tarball_suffix(pkg_file_name)?;
343
344 let files = get_tar_file_list(pkg)?;
346
347 let pkg_target_dir = target_dir.join(repo_name).join(pkg_name);
349 create_dir_all(&pkg_target_dir)?;
350
351 let mut cmd_args = vec![
352 "-C".to_string(),
353 pkg_target_dir.to_string_lossy().to_string(),
354 "-xf".to_string(),
355 pkg.to_string_lossy().to_string(),
356 ];
357
358 for filetype in [".MTREE", ".BUILDINFO", ".PKGINFO", ".INSTALL"] {
361 if files.contains(filetype) {
362 cmd_args.push(filetype.to_string());
363 }
364 }
365
366 let mut tar_command = Command::new("tar");
368 tar_command.args(cmd_args);
369
370 trace!("Running command: {tar_command:?}");
371 let output = tar_command
372 .output()
373 .context(format!("Failed to extract files from pkg {pkg:?}"))?;
374 ensure_success(&output).context("Error while downloading packages via rsync")?;
375
376 Ok(())
377}
378
379pub fn remove_tarball_suffix(pkg_name: String) -> Result<String> {
382 let pkg_name = if let Some(pkg_name) = pkg_name.strip_suffix(".pkg.tar.zst") {
383 pkg_name
384 } else if let Some(pkg_name) = pkg_name.strip_suffix(".pkg.tar.xz") {
385 pkg_name
386 } else {
387 bail!("Found package with unknown tarball compression: {pkg_name:?}");
388 };
389
390 Ok(pkg_name.to_string())
391}