dev_scripts/sync/mirror/
mod.rs1mod rsync_changes;
2
3use std::{
4 collections::HashSet,
5 fs::{create_dir_all, remove_dir_all},
6 path::{Path, PathBuf},
7 process::Command,
8};
9
10use anyhow::{Context, Result, anyhow, bail};
11use log::{debug, info, trace};
12use rayon::iter::{IntoParallelIterator, ParallelIterator};
13
14use super::{PackageRepositories, filenames_in_dir};
15use crate::{cmd::ensure_success, ui::get_progress_bar};
16
17pub struct MirrorDownloader {
19 pub dest: PathBuf,
21 pub mirror: String,
23 pub repositories: Vec<PackageRepositories>,
25 pub extract_all: bool,
27}
28
29impl MirrorDownloader {
30 pub fn sync_remote_databases(&self) -> Result<()> {
36 let download_dir = self.dest.join("download/databases/");
37 let target_dir = self.dest.join("databases");
38
39 if !download_dir.exists() {
40 create_dir_all(&download_dir).context("Failed to create download directory")?;
41 }
42
43 if !target_dir.exists() {
44 create_dir_all(&target_dir)
45 .context("Failed to create pacman cache target directory")?;
46 }
47
48 for repo in self.repositories.iter() {
49 let name = repo.to_string();
50 info!("Downloading database for repository {name}");
51
52 let filename = format!("{name}.files");
53 let file_source = format!("rsync://{}/{name}/os/x86_64/{filename}", self.mirror);
54
55 let download_dest = download_dir.join(filename);
56
57 let mut db_sync_command = Command::new("rsync");
59 db_sync_command
60 .args([
61 "--recursive",
62 "--perms",
63 "--times",
64 "--itemize-changes",
66 "--copy-links",
70 ])
71 .arg(file_source)
72 .arg(&download_dest);
73
74 trace!("Running command: {db_sync_command:?}");
75 let output = db_sync_command
76 .output()
77 .context(format!("Failed to run rsync for pacman db {name}"))?;
78
79 if !output.status.success() {
80 bail!("rsync failed for pacman db {name}");
81 }
82
83 trace!(
84 "Rsync reports: {}",
85 String::from_utf8_lossy(&output.stdout).trim()
86 );
87
88 let repo_target_dir = target_dir.join(&name);
89 if repo_target_dir.exists() {
90 if !self.extract_all
91 && rsync_changes::Report::parser(&output.stdout)
92 .map_err(|e| anyhow!("{e}"))?
93 .file_content_updated()?
94 .is_none()
95 {
96 debug!("Database {name} is unchanged upstream, skipping extraction");
97 continue;
98 } else {
99 remove_dir_all(&repo_target_dir).context(format!(
101 "Failed to remove old repository: {repo_target_dir:?}"
102 ))?;
103 }
104 }
105 create_dir_all(&repo_target_dir)?;
106
107 debug!("Extracting db to {repo_target_dir:?}");
108
109 let mut tar_command = Command::new("tar");
111 tar_command
112 .arg("-x")
113 .arg("-f")
114 .arg(&download_dest)
115 .arg("-C")
116 .arg(&repo_target_dir);
117
118 trace!("Running command: {tar_command:?}");
119 let output = tar_command
120 .output()
121 .context(format!("Failed to start tar to extract pacman dbs {name}"))?;
122 ensure_success(&output)?;
123 }
124
125 Ok(())
126 }
127
128 pub fn sync_remote_packages(&self) -> Result<()> {
136 let download_dir = self.dest.join("download/packages");
137 let target_dir = self.dest.join("packages");
138
139 if !download_dir.exists() {
140 create_dir_all(&download_dir).context("Failed to create download directory")?;
141 }
142
143 if !target_dir.exists() {
144 create_dir_all(&target_dir)
145 .context("Failed to create pacman cache target directory")?;
146 }
147
148 for repo in self.repositories.iter() {
149 let repo_name = repo.to_string();
150 info!("Downloading packages for repository {repo_name}");
151
152 let file_source = format!("rsync://{}/{repo_name}/os/x86_64/", self.mirror);
153 let download_dest = download_dir.join(&repo_name);
154 let changed = self.download_packages(&repo_name, file_source, &download_dest)?;
155
156 let packages: Vec<PathBuf> = if self.extract_all {
157 let files: Vec<_> =
158 std::fs::read_dir(&download_dest)?.collect::<Result<_, std::io::Error>>()?;
159 files
160 .into_iter()
161 .map(|entry| entry.path().to_owned())
162 .collect::<Vec<_>>()
163 } else {
164 changed
165 .into_iter()
166 .map(|pkg| download_dest.join(pkg))
167 .collect()
168 }
169 .into_iter()
170 .filter(|entry| {
173 if let Some(path) = entry.to_str() {
174 !path.starts_with('.')
175 } else {
176 false
177 }
178 })
179 .collect();
180
181 info!("Extracting packages for repository {repo_name}");
182 let progress_bar = get_progress_bar(packages.len() as u64);
183 packages
184 .into_par_iter()
185 .map(|pkg| {
186 let result = extract_pkg_files(&pkg, &target_dir, &repo_name);
188 progress_bar.inc(1);
189 result
190 })
191 .collect::<Result<Vec<()>>>()?;
192 progress_bar.finish_with_message("Finished extracting files for repository {repo}.");
194 }
195
196 for repo in self.repositories.iter() {
198 let mirror_packages = filenames_in_dir(&download_dir.join(repo.to_string()))?
199 .into_iter()
200 .map(remove_tarball_suffix)
201 .collect::<Result<HashSet<String>>>()?;
202
203 let local_packages = filenames_in_dir(&target_dir.join(repo.to_string()))?;
204
205 let removed_pkgs: Vec<&String> = local_packages.difference(&mirror_packages).collect();
207
208 if !removed_pkgs.is_empty() {
210 info!("Found {} packages for cleanup:", removed_pkgs.len());
211 for removed in removed_pkgs {
212 debug!("Removing local package: {removed}");
213 remove_dir_all(target_dir.join(repo.to_string()).join(removed)).context(
214 format!(
215 "Failed to remove local package {:?}",
216 target_dir.join(repo.to_string()).join(removed)
217 ),
218 )?;
219 }
220 }
221 }
222
223 Ok(())
224 }
225
226 fn download_packages(
228 &self,
229 repo_name: &str,
230 file_source: String,
231 download_dest: &PathBuf,
232 ) -> Result<Vec<PathBuf>> {
233 let mut cmd = Command::new("rsync");
234 cmd.args([
235 "--recursive",
236 "--perms",
237 "--times",
238 "--delete",
239 "--hard-links",
240 "--copy-links",
244 "--delete-after",
246 "--delay-updates",
249 "--itemize-changes",
251 "--exclude=*.sig",
253 ]);
254
255 for variation in [
258 ".db",
259 ".db.tar.gz",
260 ".db.tar.gz.old",
261 ".links.tar.gz",
262 ".files",
263 ".files.tar.gz",
264 ".files.tar.gz.old",
265 ] {
266 cmd.arg(format!("--exclude={repo_name}{variation}"));
267 }
268
269 trace!("Running command: {cmd:?}");
270 let output = cmd
271 .arg(file_source)
272 .arg(download_dest)
273 .output()
274 .context(format!(
275 "Failed to start package rsync for pacman db {repo_name}"
276 ))?;
277
278 if !output.status.success() {
279 bail!("Package rsync failed for pacman db {repo_name}");
280 }
281
282 let mut changed_files = Vec::new();
283
284 for line in output.stdout.split(|&b| b == b'\n') {
285 if let Some(path) = rsync_changes::Report::parser(line)
286 .map_err(|e| anyhow!("{e}"))?
287 .file_content_updated()?
288 {
289 trace!("File at {path:?} changed, marking for extraction");
290 changed_files.push(path.to_owned());
291 }
292 }
293
294 Ok(changed_files)
295 }
296}
297
298fn get_tar_file_list(pkg: &Path) -> Result<HashSet<String>> {
303 let mut tar_command = Command::new("tar");
304 tar_command.arg("-tf").arg(pkg);
305 trace!("Running command: {tar_command:?}");
306 let peek_output = tar_command
307 .output()
308 .context(format!("Failed to peek into pkg {pkg:?}"))?;
309 ensure_success(&peek_output).context("Error while peeking into package")?;
310
311 Ok(String::from_utf8_lossy(&peek_output.stdout)
312 .lines()
313 .map(|line| line.to_string())
314 .collect())
315}
316
317fn extract_pkg_files(pkg: &Path, target_dir: &Path, repo_name: &str) -> Result<()> {
332 let pkg_file_name = pkg
333 .file_name()
334 .expect("got directory when expecting file")
335 .to_string_lossy()
336 .to_string();
337 let pkg_name = remove_tarball_suffix(pkg_file_name)?;
338
339 let files = get_tar_file_list(pkg)?;
341
342 let pkg_target_dir = target_dir.join(repo_name).join(pkg_name);
344 create_dir_all(&pkg_target_dir)?;
345
346 let mut cmd_args = vec![
347 "-C".to_string(),
348 pkg_target_dir.to_string_lossy().to_string(),
349 "-xf".to_string(),
350 pkg.to_string_lossy().to_string(),
351 ];
352
353 for filetype in [".MTREE", ".BUILDINFO", ".PKGINFO", ".INSTALL"] {
356 if files.contains(filetype) {
357 cmd_args.push(filetype.to_string());
358 }
359 }
360
361 let mut tar_command = Command::new("tar");
363 tar_command.args(cmd_args);
364
365 trace!("Running command: {tar_command:?}");
366 let output = tar_command
367 .output()
368 .context(format!("Failed to extract files from pkg {pkg:?}"))?;
369 ensure_success(&output).context("Error while downloading packages via rsync")?;
370
371 Ok(())
372}
373
374pub fn remove_tarball_suffix(pkg_name: String) -> Result<String> {
377 let pkg_name = if let Some(pkg_name) = pkg_name.strip_suffix(".pkg.tar.zst") {
378 pkg_name
379 } else if let Some(pkg_name) = pkg_name.strip_suffix(".pkg.tar.xz") {
380 pkg_name
381 } else {
382 bail!("Found package with unknown tarball compression: {pkg_name:?}");
383 };
384
385 Ok(pkg_name.to_string())
386}