alpm_types/version/comparison.rs
1//! Comparison for [`PackageVersion`].
2//!
3//! This module implements the behavior of the [rpmvercmp algorithm in RPM version 4.8.1].
4//! The current implementation is based on the [alpm-pkgver] specification and explicitly
5//! handles undefined and undocumented behavior observed from `rpmvercmp`.
6//!
7//! The current version, including its improvements over the reference implementation are described
8//! and specified in detail in [alpm-pkgver].
9//!
10//! [alpm-pkgver]: https://alpm.archlinux.page/specifications/alpm-pkgver.7.html
11//! [rpmvercmp algorithm in RPM version 4.8.1]: https://github.com/rpm-software-management/rpm/blob/rpm-4.8.1-release/lib/rpmvercmp.c
12
13use std::{
14 cmp::Ordering,
15 iter::Peekable,
16 str::{CharIndices, Chars, FromStr},
17};
18
19use crate::PackageVersion;
20
21/// This enum represents a single segment in a version string.
22/// [`VersionSegment`]s are returned by the [`VersionSegments`] iterator, which is responsible for
23/// splitting a version string into its segments.
24///
25/// Version strings are split according to the following rules:
26///
27/// - Non-alphanumeric characters always count as delimiters (`.`, `-`, `$`, etc.).
28/// - There's no differentiation between delimiters represented by different characters (e.g. `'$$$'
29/// == '...' == '.$-'`).
30/// - Each segment contains the info about the amount of leading delimiters for that segment.
31/// Leading delimiters that directly follow after one another are grouped together. The length of
32/// the delimiters is important, as it plays a crucial role in the algorithm that determines which
33/// version is newer.
34///
35/// `1...a` would be represented as:
36///
37/// ```
38/// use alpm_types::VersionSegment::*;
39/// vec![
40/// Segment { text: "1", delimiter_count: 0},
41/// Segment { text: "a", delimiter_count: 3},
42/// ];
43/// ```
44/// - Alphanumeric strings are also split into individual sub-segments. This is done by walking over
45/// the string and splitting it every time a switch from alphabetic to numeric is detected or vice
46/// versa.
47///
48/// `1.1foo123.0` would be represented as:
49///
50/// ```
51/// use alpm_types::VersionSegment::*;
52/// vec![
53/// Segment { text: "1", delimiter_count: 0},
54/// Segment { text: "1", delimiter_count: 1},
55/// SubSegment { text: "foo" },
56/// SubSegment { text: "123" },
57/// Segment { text: "0", delimiter_count: 1},
58/// ];
59/// ```
60/// - Trailing delimiters are encoded as an empty string.
61///
62/// `1...` would be represented as:
63///
64/// ```
65/// use alpm_types::VersionSegment::*;
66/// vec![
67/// Segment { text: "1", delimiter_count: 0},
68/// Segment { text: "", delimiter_count: 3},
69/// ];
70/// ```
71#[derive(Clone, Debug, Eq, PartialEq)]
72pub enum VersionSegment<'a> {
73 /// The start of a new segment.
74 /// If the current segment can be split into multiple sub-segments, this variant only contains
75 /// the **first** sub-segment.
76 ///
77 /// To figure out whether this is sub-segment, peek at the next element in the
78 /// [`VersionSegments`] iterator, whether it's a [`VersionSegment::SubSegment`].
79 Segment {
80 /// The string representation of this segment
81 text: &'a str,
82 /// The amount of leading delimiters that were found for this segment
83 delimiter_count: usize,
84 },
85 /// A sub-segment of a version string's segment.
86 ///
87 /// Note that the first sub-segment of a segment that can be split into sub-segments is
88 /// counterintuitively represented by [VersionSegment::Segment]. This implementation detail
89 /// is due to the way the comparison algorithm works, as it does not always differentiate
90 /// between segments and sub-segments.
91 SubSegment {
92 /// The string representation of this sub-segment
93 text: &'a str,
94 },
95}
96
97impl<'a> VersionSegment<'a> {
98 /// Returns the inner string slice independent of [`VersionSegment`] variant.
99 pub fn text(&self) -> &str {
100 match self {
101 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => text,
102 }
103 }
104
105 /// Returns whether the inner string slice is empty, independent of [`VersionSegment`] variant
106 pub fn is_empty(&self) -> bool {
107 match self {
108 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
109 text.is_empty()
110 }
111 }
112 }
113
114 /// Returns an iterator over the chars of the inner string slice.
115 pub fn chars(&self) -> Chars<'a> {
116 match self {
117 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
118 text.chars()
119 }
120 }
121 }
122
123 /// Creates a type `T` from the inner string slice by relying on `T`'s [`FromStr::from_str`]
124 /// implementation.
125 pub fn parse<T: FromStr>(&self) -> Result<T, T::Err> {
126 match self {
127 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
128 FromStr::from_str(text)
129 }
130 }
131 }
132
133 /// Compares the inner string slice with that of another [`VersionSegment`].
134 pub fn str_cmp(&self, other: &VersionSegment) -> Ordering {
135 match self {
136 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
137 text.cmp(&other.text())
138 }
139 }
140 }
141}
142
143/// An [Iterator] over all [VersionSegment]s of an upstream version string.
144/// Check the documentation on [VersionSegment] to see how a string is split into segments.
145///
146/// Important note:
147/// Trailing delimiters will also produce a trailing [VersionSegment] with an empty string.
148///
149/// This iterator is capable of handling utf-8 strings.
150/// However, non alphanumeric chars are still interpreted as delimiters.
151#[derive(Debug)]
152pub struct VersionSegments<'a> {
153 /// The original version string. We need that reference so we can get some string
154 /// slices based on indices later on.
155 version: &'a str,
156 /// An iterator over the version's chars and their respective start byte's index.
157 version_chars: Peekable<CharIndices<'a>>,
158 /// Check if the cursor is currently in a segment.
159 /// This is necessary to detect whether the next segment should be a sub-segment or a new
160 /// segment.
161 in_segment: bool,
162}
163
164impl<'a> VersionSegments<'a> {
165 /// Create a new instance of a VersionSegments iterator.
166 pub fn new(version: &'a str) -> Self {
167 VersionSegments {
168 version,
169 version_chars: version.char_indices().peekable(),
170 in_segment: false,
171 }
172 }
173}
174
175impl<'a> Iterator for VersionSegments<'a> {
176 type Item = VersionSegment<'a>;
177
178 /// Get the next [VersionSegment] of this version string.
179 fn next(&mut self) -> Option<VersionSegment<'a>> {
180 // Used to track the number of delimiters the next segment is prefixed with.
181 let mut delimiter_count = 0;
182
183 // First up, get the delimiters out of the way.
184 // Peek at the next char, if it's a delimiter, consume it and increase the delimiter count.
185 while let Some((_, char)) = self.version_chars.peek() {
186 // An alphanumeric char indicates that we reached the next segment.
187 if char.is_alphanumeric() {
188 break;
189 }
190
191 self.version_chars.next();
192 delimiter_count += 1;
193
194 // As soon as we hit a delimiter, we know that a new segment is about to start.
195 self.in_segment = false;
196 continue;
197 }
198
199 // Get the next char. If there's no further char, we reached the end of the version string.
200 let Some((first_index, first_char)) = self.version_chars.next() else {
201 // We're at the end of the string and now have to differentiate between two cases:
202
203 // 1. There are no trailing delimiters. We can just return `None` as we truly reached
204 // the end.
205 if delimiter_count == 0 {
206 return None;
207 }
208
209 // 2. There's no further segment, but there were some trailing delimiters. The
210 // comparison algorithm considers this case which is why we have to somehow encode
211 // it. We do so by returning an empty segment.
212 return Some(VersionSegment::Segment {
213 text: "",
214 delimiter_count,
215 });
216 };
217
218 // Cache the last valid char + index that was checked. We need this to
219 // calculate the offset in case the last char is a multi-byte UTF-8 char.
220 let mut last_char = first_char;
221 let mut last_char_index = first_index;
222
223 // The following section now handles the splitting of an alphanumeric string into its
224 // sub-segments. As described in the [VersionSegment] docs, the string needs to be split
225 // every time a switch from alphabetic to numeric or vice versa is detected.
226
227 let is_numeric = first_char.is_numeric();
228
229 if is_numeric {
230 // Go through chars until we hit a non-numeric char or reached the end of the string.
231 #[allow(clippy::while_let_on_iterator)]
232 while let Some((index, next_char)) =
233 self.version_chars.next_if(|(_, peek)| peek.is_numeric())
234 {
235 last_char_index = index;
236 last_char = next_char;
237 }
238 } else {
239 // Go through chars until we hit a non-alphabetic char or reached the end of the string.
240 #[allow(clippy::while_let_on_iterator)]
241 while let Some((index, next_char)) =
242 self.version_chars.next_if(|(_, peek)| peek.is_alphabetic())
243 {
244 last_char_index = index;
245 last_char = next_char;
246 }
247 }
248
249 // Create a subslice based on the indices of the first and last char.
250 // The last char might be multi-byte, which is why we add its length.
251 let segment_slice = &self.version[first_index..(last_char_index + last_char.len_utf8())];
252
253 if !self.in_segment {
254 // Any further segments should be sub-segments, unless we hit a delimiter in which
255 // case this variable will reset to false.
256 self.in_segment = true;
257 Some(VersionSegment::Segment {
258 text: segment_slice,
259 delimiter_count,
260 })
261 } else {
262 Some(VersionSegment::SubSegment {
263 text: segment_slice,
264 })
265 }
266 }
267}
268
269impl Ord for PackageVersion {
270 /// This block implements the logic to determine which of two package versions is newer or
271 /// whether they're considered equal.
272 ///
273 /// This logic is surprisingly complex as it mirrors the current C-alpmlib implementation's
274 /// behavior for backwards compatibility reasons.
275 /// <https://gitlab.archlinux.org/pacman/pacman/-/blob/a2d029388c7c206f5576456f91bfbea2dca98c96/lib/libalpm/version.c#L83-217>
276 fn cmp(&self, other: &Self) -> Ordering {
277 // Equal strings are considered equal versions.
278 if self.inner() == other.inner() {
279 return Ordering::Equal;
280 }
281
282 let mut self_segments = self.segments();
283 let mut other_segments = other.segments();
284
285 // Loop through both versions' segments and compare them.
286 loop {
287 // Try to get the next segments
288 let self_segment = self_segments.next();
289 let other_segment = other_segments.next();
290
291 // Make sure that there's a next segment for both versions.
292 let (self_segment, other_segment) = match (self_segment, other_segment) {
293 // Both segments exist, we continue after match.
294 (Some(self_seg), Some(other_seg)) => (self_seg, other_seg),
295
296 // Both versions reached their end and are thereby equal.
297 (None, None) => return Ordering::Equal,
298
299 // One version is longer than the other and both are equal until now.
300 //
301 // ## Case 1
302 //
303 // The longer version is one or more **segment**s longer.
304 // In this case, the longer version is always considered newer.
305 // `1.0` > `1`
306 // `1.0.0` > `1.0`
307 // `1.0.a` > `1.0`
308 // ⤷ New segment exists, thereby newer
309 //
310 // ## Case 2
311 //
312 // The current **segment** has one or more sub-segments and the next sub-segment is
313 // alphabetic.
314 // In this case, the shorter version is always newer.
315 // The reason for this is to handle pre-releases (e.g. alpha/beta).
316 // `1.0alpha` < `1.0`
317 // `1.0alpha.0` < `1.0`
318 // `1.0alpha12.0` < `1.0`
319 // ⤷ Next sub-segment is alphabetic.
320 //
321 // ## Case 3
322 //
323 // The current **segment** has one or more sub-segments and the next sub-segment is
324 // numeric. In this case, the longer version is always newer.
325 // `1.alpha0` > `1.alpha`
326 // `1.alpha0.1` > `1.alpha`
327 // ⤷ Next sub-segment is numeric.
328 (Some(seg), None) => {
329 // If the current segment is the start of a segment, it's always considered
330 // newer.
331 let text = match seg {
332 VersionSegment::Segment { .. } => return Ordering::Greater,
333 VersionSegment::SubSegment { text } => text,
334 };
335
336 // If it's a sub-segment, we have to check for the edge-case explained above
337 // If all chars are alphabetic, `self` is consider older.
338 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
339 return Ordering::Less;
340 }
341
342 return Ordering::Greater;
343 }
344
345 // This is the same logic as above, but inverted.
346 (None, Some(seg)) => {
347 let text = match seg {
348 VersionSegment::Segment { .. } => return Ordering::Less,
349 VersionSegment::SubSegment { text } => text,
350 };
351 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
352 return Ordering::Greater;
353 }
354 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
355 return Ordering::Greater;
356 }
357
358 return Ordering::Less;
359 }
360 };
361
362 // At this point, we have two sub-/segments.
363 //
364 // We start with the special case where one or both of the segments are empty.
365 // That means that the end of the version string has been reached, but there were one
366 // or more trailing delimiters, e.g.:
367 //
368 // `1.0.`
369 // `1.0...`
370 if other_segment.is_empty() && self_segment.is_empty() {
371 // Both reached the end of their version with a trailing delimiter.
372 // Counterintuitively, the trailing delimiter count is not considered and both
373 // versions are considered equal
374 // `1.0....` == `1.0.`
375 // ⤷ Length of delimiters is ignored.
376 return Ordering::Equal;
377 } else if self_segment.is_empty() {
378 // Now we have to consider the special case where `other` is alphabetic.
379 // If that's the case, `self` will be considered newer, as the alphabetic string
380 // indicates a pre-release,
381 // `1.0.` > `1.0alpha0`
382 // `1.0.` > `1.0.alpha.0`
383 // ⤷ Alphabetic sub-/segment and thereby always older.
384 //
385 // Also, we know that `other_segment` isn't empty at this point.
386 // It's noteworthy that this logic does not differentiated between segments and
387 // sub-segments.
388 if other_segment.chars().all(char::is_alphabetic) {
389 return Ordering::Greater;
390 }
391
392 // In all other cases, `other` is newer.
393 // `1.0.` < `1.0.0`
394 // `1.0.` < `1.0.2.0`
395 return Ordering::Less;
396 } else if other_segment.is_empty() {
397 // Check docs above, as it's the same logic as above, just inverted.
398 if self_segment.chars().all(char::is_alphabetic) {
399 return Ordering::Less;
400 }
401
402 return Ordering::Greater;
403 }
404
405 // We finally reached the end handling special cases when the version string ended.
406 // From now on, we know that we have two actual sub-/segments that might be prefixed by
407 // some delimiters.
408 //
409 // However, it is possible that one version has a segment and while the other has a
410 // sub-segment. This special case is what is handled next.
411 //
412 // We purposefully give up ownership of both segments.
413 // This is to ensure that following this match block, we finally only have to
414 // consider the actual text of the segments, as we'll know that both sub-/segments are
415 // of the same type.
416 let (self_text, other_text) = match (self_segment, other_segment) {
417 (
418 VersionSegment::Segment {
419 delimiter_count: self_count,
420 text: self_text,
421 },
422 VersionSegment::Segment {
423 delimiter_count: other_count,
424 text: other_text,
425 },
426 ) => {
427 // Special case:
428 // If one of the segments has more leading delimiters than the other, it is
429 // always considered newer, no matter what follows after the delimiters.
430 // `1..0.0` > `1.2.0`
431 // ⤷ Two delimiters, thereby always newer.
432 // `1..0.0` < `1..2.0`
433 // ⤷ Same amount of delimiters, now `2 > 0`
434 if self_count != other_count {
435 return self_count.cmp(&other_count);
436 }
437 (self_text, other_text)
438 }
439 // If one is the start of a new segment, while the other is still a sub-segment,
440 // we can return early as a new segment always overrules a sub-segment.
441 // `1.alpha0.0` < `1.alpha.0`
442 // ⤷ sub-segment ⤷ segment
443 // In the third iteration there's a sub-segment on the left side while
444 // there's a segment on the right side.
445 (VersionSegment::Segment { .. }, VersionSegment::SubSegment { .. }) => {
446 return Ordering::Greater;
447 }
448 (VersionSegment::SubSegment { .. }, VersionSegment::Segment { .. }) => {
449 return Ordering::Less;
450 }
451 (
452 VersionSegment::SubSegment { text: self_text },
453 VersionSegment::SubSegment { text: other_text },
454 ) => (self_text, other_text),
455 };
456
457 // At this point, we know that we are dealing with two identical types of sub-/segments.
458 // Thereby, we now only have to compare the text of those sub-/segments.
459
460 // Check whether any of the texts are numeric.
461 // Numeric sub-/segments are always considered newer than non-numeric sub-/segments.
462 // E.g.: `1.0.0` > `1.foo.0`
463 // ⤷ `0` vs `foo`.
464 // `0` is numeric and therebynewer than a alphanumeric one.
465 let self_is_numeric = !self_text.is_empty() && self_text.chars().all(char::is_numeric);
466 let other_is_numeric =
467 !other_text.is_empty() && other_text.chars().all(char::is_numeric);
468
469 if self_is_numeric && !other_is_numeric {
470 return Ordering::Greater;
471 } else if !self_is_numeric && other_is_numeric {
472 return Ordering::Less;
473 } else if self_is_numeric && other_is_numeric {
474 // In case both are numeric, we do a number comparison.
475 // We can parse the string as we know that they only consist of digits, hence the
476 // unwrap.
477 //
478 // Preceding zeroes are to be ignored, which is automatically done by Rust's number
479 // parser.
480 // E.g. `1.0001.1` == `1.1.1`
481 // ⤷ `000` is ignored in the comparison.
482 let ordering = self_text
483 .parse::<usize>()
484 .unwrap()
485 .cmp(&other_text.parse::<usize>().unwrap());
486
487 match ordering {
488 Ordering::Less => return Ordering::Less,
489 Ordering::Greater => return Ordering::Greater,
490 // If both numbers are equal we check the next sub-/segment.
491 Ordering::Equal => continue,
492 }
493 }
494
495 // At this point, we know that both sub-/segments are alphabetic.
496 // We do a simple string comparison to determine the newer version.
497 match self_text.cmp(other_text) {
498 Ordering::Less => return Ordering::Less,
499 Ordering::Greater => return Ordering::Greater,
500 // If the strings are equal, we check the next sub-/segment.
501 Ordering::Equal => continue,
502 }
503 }
504 }
505}
506
507impl PartialOrd for PackageVersion {
508 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
509 Some(self.cmp(other))
510 }
511}
512
513impl PartialEq for PackageVersion {
514 fn eq(&self, other: &Self) -> bool {
515 self.cmp(other).is_eq()
516 }
517}
518
519#[cfg(test)]
520mod tests {
521 use rstest::rstest;
522
523 use super::*;
524
525 #[rstest]
526 #[case("1.0.0", vec![
527 VersionSegment::Segment{ text:"1", delimiter_count: 0},
528 VersionSegment::Segment{ text:"0", delimiter_count: 1},
529 VersionSegment::Segment{ text:"0", delimiter_count: 1},
530 ])]
531 #[case("1..0", vec![
532 VersionSegment::Segment{ text:"1", delimiter_count: 0},
533 VersionSegment::Segment{ text:"0", delimiter_count: 2},
534 ])]
535 #[case("1.0.", vec![
536 VersionSegment::Segment{ text:"1", delimiter_count: 0},
537 VersionSegment::Segment{ text:"0", delimiter_count: 1},
538 VersionSegment::Segment{ text:"", delimiter_count: 1},
539 ])]
540 #[case("1..", vec![
541 VersionSegment::Segment{ text:"1", delimiter_count: 0},
542 VersionSegment::Segment{ text:"", delimiter_count: 2},
543 ])]
544 #[case("1...", vec![
545 VersionSegment::Segment{ text:"1", delimiter_count: 0},
546 VersionSegment::Segment{ text:"", delimiter_count: 3},
547 ])]
548 #[case("1.🗻lol.0", vec![
549 VersionSegment::Segment{ text:"1", delimiter_count: 0},
550 VersionSegment::Segment{ text:"lol", delimiter_count: 2},
551 VersionSegment::Segment{ text:"0", delimiter_count: 1},
552 ])]
553 #[case("1.🗻lol.", vec![
554 VersionSegment::Segment{ text:"1", delimiter_count: 0},
555 VersionSegment::Segment{ text:"lol", delimiter_count: 2},
556 VersionSegment::Segment{ text:"", delimiter_count: 1},
557 ])]
558 #[case("20220202", vec![
559 VersionSegment::Segment{ text:"20220202", delimiter_count: 0},
560 ])]
561 #[case("some_string", vec![
562 VersionSegment::Segment{ text:"some", delimiter_count: 0},
563 VersionSegment::Segment{ text:"string", delimiter_count: 1}
564 ])]
565 #[case("alpha7654numeric321", vec![
566 VersionSegment::Segment{ text:"alpha", delimiter_count: 0},
567 VersionSegment::SubSegment{ text:"7654"},
568 VersionSegment::SubSegment{ text:"numeric"},
569 VersionSegment::SubSegment{ text:"321"},
570 ])]
571 fn version_segment_iterator(
572 #[case] version: &str,
573 #[case] expected_segments: Vec<VersionSegment>,
574 ) {
575 let version = PackageVersion(version.to_string());
576 // Convert the simplified definition above into actual VersionSegment instances.
577 let mut segments_iter = version.segments();
578 let mut expected_iter = expected_segments.clone().into_iter();
579
580 // Iterate over both iterators.
581 // We do it manually to ensure that they both end at the same time.
582 loop {
583 let next_segment = segments_iter.next();
584 assert_eq!(
585 next_segment,
586 expected_iter.next(),
587 "Failed for segment {next_segment:?} in version string {version}:\nsegments: {:?}\n expected: {:?}",
588 version.segments().collect::<Vec<VersionSegment>>(),
589 expected_segments,
590 );
591 if next_segment.is_none() {
592 break;
593 }
594 }
595 }
596}