alpm_types/version/comparison.rs
1//! Comparison for [`PackageVersion`].
2//!
3//! This code is derived from the [rpmvercmp algorithm in RPM version 4.8.1].
4//! The current version, including its improvements over the original are described in detail in
5//! [alpm-pkgver].
6//!
7//! [alpm-pkgver]: https://alpm.archlinux.page/specifications/alpm-pkgver.7.html
8//! [rpmvercmp algorithm in RPM version 4.8.1]: https://github.com/rpm-software-management/rpm/blob/rpm-4.8.1-release/lib/rpmvercmp.c
9
10use std::{
11 cmp::Ordering,
12 iter::Peekable,
13 str::{CharIndices, Chars, FromStr},
14};
15
16use crate::PackageVersion;
17
18/// This enum represents a single segment in a version string.
19/// [`VersionSegment`]s are returned by the [`VersionSegments`] iterator, which is responsible for
20/// splitting a version string into its segments.
21///
22/// Version strings are split according to the following rules:
23///
24/// - Non-alphanumeric characters always count as delimiters (`.`, `-`, `$`, etc.).
25/// - There's no differentiation between delimiters represented by different characters (e.g. `'$$$'
26/// == '...' == '.$-'`).
27/// - Each segment contains the info about the amount of leading delimiters for that segment.
28/// Leading delimiters that directly follow after one another are grouped together. The length of
29/// the delimiters is important, as it plays a crucial role in the algorithm that determines which
30/// version is newer.
31///
32/// `1...a` would be represented as:
33///
34/// ```
35/// use alpm_types::VersionSegment::*;
36/// vec![
37/// Segment { text: "1", delimiter_count: 0},
38/// Segment { text: "a", delimiter_count: 3},
39/// ];
40/// ```
41/// - Alphanumeric strings are also split into individual sub-segments. This is done by walking over
42/// the string and splitting it every time a switch from alphabetic to numeric is detected or vice
43/// versa.
44///
45/// `1.1foo123.0` would be represented as:
46///
47/// ```
48/// use alpm_types::VersionSegment::*;
49/// vec![
50/// Segment { text: "1", delimiter_count: 0},
51/// Segment { text: "1", delimiter_count: 1},
52/// SubSegment { text: "foo" },
53/// SubSegment { text: "123" },
54/// Segment { text: "0", delimiter_count: 1},
55/// ];
56/// ```
57/// - Trailing delimiters are encoded as an empty string.
58///
59/// `1...` would be represented as:
60///
61/// ```
62/// use alpm_types::VersionSegment::*;
63/// vec![
64/// Segment { text: "1", delimiter_count: 0},
65/// Segment { text: "", delimiter_count: 3},
66/// ];
67/// ```
68#[derive(Clone, Debug, Eq, PartialEq)]
69pub enum VersionSegment<'a> {
70 /// The start of a new segment.
71 /// If the current segment can be split into multiple sub-segments, this variant only contains
72 /// the **first** sub-segment.
73 ///
74 /// To figure out whether this is sub-segment, peek at the next element in the
75 /// [`VersionSegments`] iterator, whether it's a [`VersionSegment::SubSegment`].
76 Segment {
77 /// The string representation of this segment
78 text: &'a str,
79 /// The amount of leading delimiters that were found for this segment
80 delimiter_count: usize,
81 },
82 /// A sub-segment of a version string's segment.
83 ///
84 /// Note that the first sub-segment of a segment that can be split into sub-segments is
85 /// counterintuitively represented by [VersionSegment::Segment]. This implementation detail
86 /// is due to the way the comparison algorithm works, as it does not always differentiate
87 /// between segments and sub-segments.
88 SubSegment {
89 /// The string representation of this sub-segment
90 text: &'a str,
91 },
92}
93
94impl<'a> VersionSegment<'a> {
95 /// Returns the inner string slice independent of [`VersionSegment`] variant.
96 pub fn text(&self) -> &str {
97 match self {
98 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => text,
99 }
100 }
101
102 /// Returns whether the inner string slice is empty, independent of [`VersionSegment`] variant
103 pub fn is_empty(&self) -> bool {
104 match self {
105 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
106 text.is_empty()
107 }
108 }
109 }
110
111 /// Returns an iterator over the chars of the inner string slice.
112 pub fn chars(&self) -> Chars<'a> {
113 match self {
114 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
115 text.chars()
116 }
117 }
118 }
119
120 /// Creates a type `T` from the inner string slice by relying on `T`'s [`FromStr::from_str`]
121 /// implementation.
122 pub fn parse<T: FromStr>(&self) -> Result<T, T::Err> {
123 match self {
124 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
125 FromStr::from_str(text)
126 }
127 }
128 }
129
130 /// Compares the inner string slice with that of another [`VersionSegment`].
131 pub fn str_cmp(&self, other: &VersionSegment) -> Ordering {
132 match self {
133 VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
134 text.cmp(&other.text())
135 }
136 }
137 }
138}
139
140/// An [Iterator] over all [VersionSegment]s of an upstream version string.
141/// Check the documentation on [VersionSegment] to see how a string is split into segments.
142///
143/// Important note:
144/// Trailing delimiters will also produce a trailing [VersionSegment] with an empty string.
145///
146/// This iterator is capable of handling utf-8 strings.
147/// However, non alphanumeric chars are still interpreted as delimiters.
148#[derive(Debug)]
149pub struct VersionSegments<'a> {
150 /// The original version string. We need that reference so we can get some string
151 /// slices based on indices later on.
152 version: &'a str,
153 /// An iterator over the version's chars and their respective start byte's index.
154 version_chars: Peekable<CharIndices<'a>>,
155 /// Check if the cursor is currently in a segment.
156 /// This is necessary to detect whether the next segment should be a sub-segment or a new
157 /// segment.
158 in_segment: bool,
159}
160
161impl<'a> VersionSegments<'a> {
162 /// Create a new instance of a VersionSegments iterator.
163 pub fn new(version: &'a str) -> Self {
164 VersionSegments {
165 version,
166 version_chars: version.char_indices().peekable(),
167 in_segment: false,
168 }
169 }
170}
171
172impl<'a> Iterator for VersionSegments<'a> {
173 type Item = VersionSegment<'a>;
174
175 /// Get the next [VersionSegment] of this version string.
176 fn next(&mut self) -> Option<VersionSegment<'a>> {
177 // Used to track the number of delimiters the next segment is prefixed with.
178 let mut delimiter_count = 0;
179
180 // First up, get the delimiters out of the way.
181 // Peek at the next char, if it's a delimiter, consume it and increase the delimiter count.
182 while let Some((_, char)) = self.version_chars.peek() {
183 // An alphanumeric char indicates that we reached the next segment.
184 if char.is_alphanumeric() {
185 break;
186 }
187
188 self.version_chars.next();
189 delimiter_count += 1;
190
191 // As soon as we hit a delimiter, we know that a new segment is about to start.
192 self.in_segment = false;
193 continue;
194 }
195
196 // Get the next char. If there's no further char, we reached the end of the version string.
197 let Some((first_index, first_char)) = self.version_chars.next() else {
198 // We're at the end of the string and now have to differentiate between two cases:
199
200 // 1. There are no trailing delimiters. We can just return `None` as we truly reached
201 // the end.
202 if delimiter_count == 0 {
203 return None;
204 }
205
206 // 2. There's no further segment, but there were some trailing delimiters. The
207 // comparison algorithm considers this case which is why we have to somehow encode
208 // it. We do so by returning an empty segment.
209 return Some(VersionSegment::Segment {
210 text: "",
211 delimiter_count,
212 });
213 };
214
215 // Cache the last valid char + index that was checked. We need this to
216 // calculate the offset in case the last char is a multi-byte UTF-8 char.
217 let mut last_char = first_char;
218 let mut last_char_index = first_index;
219
220 // The following section now handles the splitting of an alphanumeric string into its
221 // sub-segments. As described in the [VersionSegment] docs, the string needs to be split
222 // every time a switch from alphabetic to numeric or vice versa is detected.
223
224 let is_numeric = first_char.is_numeric();
225
226 if is_numeric {
227 // Go through chars until we hit a non-numeric char or reached the end of the string.
228 #[allow(clippy::while_let_on_iterator)]
229 while let Some((index, next_char)) =
230 self.version_chars.next_if(|(_, peek)| peek.is_numeric())
231 {
232 last_char_index = index;
233 last_char = next_char;
234 }
235 } else {
236 // Go through chars until we hit a non-alphabetic char or reached the end of the string.
237 #[allow(clippy::while_let_on_iterator)]
238 while let Some((index, next_char)) =
239 self.version_chars.next_if(|(_, peek)| peek.is_alphabetic())
240 {
241 last_char_index = index;
242 last_char = next_char;
243 }
244 }
245
246 // Create a subslice based on the indices of the first and last char.
247 // The last char might be multi-byte, which is why we add its length.
248 let segment_slice = &self.version[first_index..(last_char_index + last_char.len_utf8())];
249
250 if !self.in_segment {
251 // Any further segments should be sub-segments, unless we hit a delimiter in which
252 // case this variable will reset to false.
253 self.in_segment = true;
254 Some(VersionSegment::Segment {
255 text: segment_slice,
256 delimiter_count,
257 })
258 } else {
259 Some(VersionSegment::SubSegment {
260 text: segment_slice,
261 })
262 }
263 }
264}
265
266impl Ord for PackageVersion {
267 /// This block implements the logic to determine which of two package versions is newer or
268 /// whether they're considered equal.
269 ///
270 /// This logic is surprisingly complex as it mirrors the current C-alpmlib implementation for
271 /// backwards compatibility reasons.
272 /// <https://gitlab.archlinux.org/pacman/pacman/-/blob/a2d029388c7c206f5576456f91bfbea2dca98c96/lib/libalpm/version.c#L83-217>
273 fn cmp(&self, other: &Self) -> Ordering {
274 // Equal strings are considered equal versions.
275 if self.inner() == other.inner() {
276 return Ordering::Equal;
277 }
278
279 let mut self_segments = self.segments();
280 let mut other_segments = other.segments();
281
282 // Loop through both versions' segments and compare them.
283 loop {
284 // Try to get the next segments
285 let self_segment = self_segments.next();
286 let other_segment = other_segments.next();
287
288 // Make sure that there's a next segment for both versions.
289 let (self_segment, other_segment) = match (self_segment, other_segment) {
290 // Both segments exist, we continue after match.
291 (Some(self_seg), Some(other_seg)) => (self_seg, other_seg),
292
293 // Both versions reached their end and are thereby equal.
294 (None, None) => return Ordering::Equal,
295
296 // One version is longer than the other and both are equal until now.
297 //
298 // ## Case 1
299 //
300 // The longer version is one or more **segment**s longer.
301 // In this case, the longer version is always considered newer.
302 // `1.0` > `1`
303 // `1.0.0` > `1.0`
304 // `1.0.a` > `1.0`
305 // ⤷ New segment exists, thereby newer
306 //
307 // ## Case 2
308 //
309 // The current **segment** has one or more sub-segments and the next sub-segment is
310 // alphabetic.
311 // In this case, the shorter version is always newer.
312 // The reason for this is to handle pre-releases (e.g. alpha/beta).
313 // `1.0alpha` < `1.0`
314 // `1.0alpha.0` < `1.0`
315 // `1.0alpha12.0` < `1.0`
316 // ⤷ Next sub-segment is alphabetic.
317 //
318 // ## Case 3
319 //
320 // The current **segment** has one or more sub-segments and the next sub-segment is
321 // numeric. In this case, the longer version is always newer.
322 // `1.alpha0` > `1.alpha`
323 // `1.alpha0.1` > `1.alpha`
324 // ⤷ Next sub-segment is numeric.
325 (Some(seg), None) => {
326 // If the current segment is the start of a segment, it's always considered
327 // newer.
328 let text = match seg {
329 VersionSegment::Segment { .. } => return Ordering::Greater,
330 VersionSegment::SubSegment { text } => text,
331 };
332
333 // If it's a sub-segment, we have to check for the edge-case explained above
334 // If all chars are alphabetic, `self` is consider older.
335 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
336 return Ordering::Less;
337 }
338
339 return Ordering::Greater;
340 }
341
342 // This is the same logic as above, but inverted.
343 (None, Some(seg)) => {
344 let text = match seg {
345 VersionSegment::Segment { .. } => return Ordering::Less,
346 VersionSegment::SubSegment { text } => text,
347 };
348 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
349 return Ordering::Greater;
350 }
351 if !text.is_empty() && text.chars().all(char::is_alphabetic) {
352 return Ordering::Greater;
353 }
354
355 return Ordering::Less;
356 }
357 };
358
359 // At this point, we have two sub-/segments.
360 //
361 // We start with the special case where one or both of the segments are empty.
362 // That means that the end of the version string has been reached, but there were one
363 // or more trailing delimiters, e.g.:
364 //
365 // `1.0.`
366 // `1.0...`
367 if other_segment.is_empty() && self_segment.is_empty() {
368 // Both reached the end of their version with a trailing delimiter.
369 // Counterintuitively, the trailing delimiter count is not considered and both
370 // versions are considered equal
371 // `1.0....` == `1.0.`
372 // ⤷ Length of delimiters is ignored.
373 return Ordering::Equal;
374 } else if self_segment.is_empty() {
375 // Now we have to consider the special case where `other` is alphabetic.
376 // If that's the case, `self` will be considered newer, as the alphabetic string
377 // indicates a pre-release,
378 // `1.0.` > `1.0alpha0`
379 // `1.0.` > `1.0.alpha.0`
380 // ⤷ Alphabetic sub-/segment and thereby always older.
381 //
382 // Also, we know that `other_segment` isn't empty at this point.
383 // It's noteworthy that this logic does not differentiated between segments and
384 // sub-segments.
385 if other_segment.chars().all(char::is_alphabetic) {
386 return Ordering::Greater;
387 }
388
389 // In all other cases, `other` is newer.
390 // `1.0.` < `1.0.0`
391 // `1.0.` < `1.0.2.0`
392 return Ordering::Less;
393 } else if other_segment.is_empty() {
394 // Check docs above, as it's the same logic as above, just inverted.
395 if self_segment.chars().all(char::is_alphabetic) {
396 return Ordering::Less;
397 }
398
399 return Ordering::Greater;
400 }
401
402 // We finally reached the end handling special cases when the version string ended.
403 // From now on, we know that we have two actual sub-/segments that might be prefixed by
404 // some delimiters.
405 //
406 // However, it is possible that one version has a segment and while the other has a
407 // sub-segment. This special case is what is handled next.
408 //
409 // We purposefully give up ownership of both segments.
410 // This is to ensure that following this match block, we finally only have to
411 // consider the actual text of the segments, as we'll know that both sub-/segments are
412 // of the same type.
413 let (self_text, other_text) = match (self_segment, other_segment) {
414 (
415 VersionSegment::Segment {
416 delimiter_count: self_count,
417 text: self_text,
418 },
419 VersionSegment::Segment {
420 delimiter_count: other_count,
421 text: other_text,
422 },
423 ) => {
424 // Special case:
425 // If one of the segments has more leading delimiters than the other, it is
426 // always considered newer, no matter what follows after the delimiters.
427 // `1..0.0` > `1.2.0`
428 // ⤷ Two delimiters, thereby always newer.
429 // `1..0.0` < `1..2.0`
430 // ⤷ Same amount of delimiters, now `2 > 0`
431 if self_count != other_count {
432 return self_count.cmp(&other_count);
433 }
434 (self_text, other_text)
435 }
436 // If one is the start of a new segment, while the other is still a sub-segment,
437 // we can return early as a new segment always overrules a sub-segment.
438 // `1.alpha0.0` < `1.alpha.0`
439 // ⤷ sub-segment ⤷ segment
440 // In the third iteration there's a sub-segment on the left side while
441 // there's a segment on the right side.
442 (VersionSegment::Segment { .. }, VersionSegment::SubSegment { .. }) => {
443 return Ordering::Greater;
444 }
445 (VersionSegment::SubSegment { .. }, VersionSegment::Segment { .. }) => {
446 return Ordering::Less;
447 }
448 (
449 VersionSegment::SubSegment { text: self_text },
450 VersionSegment::SubSegment { text: other_text },
451 ) => (self_text, other_text),
452 };
453
454 // At this point, we know that we are dealing with two identical types of sub-/segments.
455 // Thereby, we now only have to compare the text of those sub-/segments.
456
457 // Check whether any of the texts are numeric.
458 // Numeric sub-/segments are always considered newer than non-numeric sub-/segments.
459 // E.g.: `1.0.0` > `1.foo.0`
460 // ⤷ `0` vs `foo`.
461 // `0` is numeric and therebynewer than a alphanumeric one.
462 let self_is_numeric = !self_text.is_empty() && self_text.chars().all(char::is_numeric);
463 let other_is_numeric =
464 !other_text.is_empty() && other_text.chars().all(char::is_numeric);
465
466 if self_is_numeric && !other_is_numeric {
467 return Ordering::Greater;
468 } else if !self_is_numeric && other_is_numeric {
469 return Ordering::Less;
470 } else if self_is_numeric && other_is_numeric {
471 // In case both are numeric, we do a number comparison.
472 // We can parse the string as we know that they only consist of digits, hence the
473 // unwrap.
474 //
475 // Preceding zeroes are to be ignored, which is automatically done by Rust's number
476 // parser.
477 // E.g. `1.0001.1` == `1.1.1`
478 // ⤷ `000` is ignored in the comparison.
479 let ordering = self_text
480 .parse::<usize>()
481 .unwrap()
482 .cmp(&other_text.parse::<usize>().unwrap());
483
484 match ordering {
485 Ordering::Less => return Ordering::Less,
486 Ordering::Greater => return Ordering::Greater,
487 // If both numbers are equal we check the next sub-/segment.
488 Ordering::Equal => continue,
489 }
490 }
491
492 // At this point, we know that both sub-/segments are alphabetic.
493 // We do a simple string comparison to determine the newer version.
494 match self_text.cmp(other_text) {
495 Ordering::Less => return Ordering::Less,
496 Ordering::Greater => return Ordering::Greater,
497 // If the strings are equal, we check the next sub-/segment.
498 Ordering::Equal => continue,
499 }
500 }
501 }
502}
503
504impl PartialOrd for PackageVersion {
505 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
506 Some(self.cmp(other))
507 }
508}
509
510impl PartialEq for PackageVersion {
511 fn eq(&self, other: &Self) -> bool {
512 self.cmp(other).is_eq()
513 }
514}
515
516#[cfg(test)]
517mod tests {
518 use rstest::rstest;
519
520 use super::*;
521
522 #[rstest]
523 #[case("1.0.0", vec![
524 VersionSegment::Segment{ text:"1", delimiter_count: 0},
525 VersionSegment::Segment{ text:"0", delimiter_count: 1},
526 VersionSegment::Segment{ text:"0", delimiter_count: 1},
527 ])]
528 #[case("1..0", vec![
529 VersionSegment::Segment{ text:"1", delimiter_count: 0},
530 VersionSegment::Segment{ text:"0", delimiter_count: 2},
531 ])]
532 #[case("1.0.", vec![
533 VersionSegment::Segment{ text:"1", delimiter_count: 0},
534 VersionSegment::Segment{ text:"0", delimiter_count: 1},
535 VersionSegment::Segment{ text:"", delimiter_count: 1},
536 ])]
537 #[case("1..", vec![
538 VersionSegment::Segment{ text:"1", delimiter_count: 0},
539 VersionSegment::Segment{ text:"", delimiter_count: 2},
540 ])]
541 #[case("1...", vec![
542 VersionSegment::Segment{ text:"1", delimiter_count: 0},
543 VersionSegment::Segment{ text:"", delimiter_count: 3},
544 ])]
545 #[case("1.🗻lol.0", vec![
546 VersionSegment::Segment{ text:"1", delimiter_count: 0},
547 VersionSegment::Segment{ text:"lol", delimiter_count: 2},
548 VersionSegment::Segment{ text:"0", delimiter_count: 1},
549 ])]
550 #[case("1.🗻lol.", vec![
551 VersionSegment::Segment{ text:"1", delimiter_count: 0},
552 VersionSegment::Segment{ text:"lol", delimiter_count: 2},
553 VersionSegment::Segment{ text:"", delimiter_count: 1},
554 ])]
555 #[case("20220202", vec![
556 VersionSegment::Segment{ text:"20220202", delimiter_count: 0},
557 ])]
558 #[case("some_string", vec![
559 VersionSegment::Segment{ text:"some", delimiter_count: 0},
560 VersionSegment::Segment{ text:"string", delimiter_count: 1}
561 ])]
562 #[case("alpha7654numeric321", vec![
563 VersionSegment::Segment{ text:"alpha", delimiter_count: 0},
564 VersionSegment::SubSegment{ text:"7654"},
565 VersionSegment::SubSegment{ text:"numeric"},
566 VersionSegment::SubSegment{ text:"321"},
567 ])]
568 fn version_segment_iterator(
569 #[case] version: &str,
570 #[case] expected_segments: Vec<VersionSegment>,
571 ) {
572 let version = PackageVersion(version.to_string());
573 // Convert the simplified definition above into actual VersionSegment instances.
574 let mut segments_iter = version.segments();
575 let mut expected_iter = expected_segments.clone().into_iter();
576
577 // Iterate over both iterators.
578 // We do it manually to ensure that they both end at the same time.
579 loop {
580 let next_segment = segments_iter.next();
581 assert_eq!(
582 next_segment,
583 expected_iter.next(),
584 "Failed for segment {next_segment:?} in version string {version}:\nsegments: {:?}\n expected: {:?}",
585 version.segments().collect::<Vec<VersionSegment>>(),
586 expected_segments,
587 );
588 if next_segment.is_none() {
589 break;
590 }
591 }
592 }
593}