alpm_types/version/
comparison.rs

1//! Comparison for [`PackageVersion`].
2//!
3//! This module implements the behavior of the [rpmvercmp algorithm in RPM version 4.8.1].
4//! The current implementation is based on the [alpm-pkgver] specification and explicitly
5//! handles undefined and undocumented behavior observed from `rpmvercmp`.
6//!
7//! The current version, including its improvements over the reference implementation are described
8//! and specified in detail in [alpm-pkgver].
9//!
10//! [alpm-pkgver]: https://alpm.archlinux.page/specifications/alpm-pkgver.7.html
11//! [rpmvercmp algorithm in RPM version 4.8.1]: https://github.com/rpm-software-management/rpm/blob/rpm-4.8.1-release/lib/rpmvercmp.c
12
13use std::{
14    cmp::Ordering,
15    iter::Peekable,
16    str::{CharIndices, Chars, FromStr},
17};
18
19use crate::PackageVersion;
20
21/// This enum represents a single segment in a version string.
22/// [`VersionSegment`]s are returned by the [`VersionSegments`] iterator, which is responsible for
23/// splitting a version string into its segments.
24///
25/// Version strings are split according to the following rules:
26///
27/// - Non-alphanumeric characters always count as delimiters (`.`, `-`, `$`, etc.).
28/// - There's no differentiation between delimiters represented by different characters (e.g. `'$$$'
29///   == '...' == '.$-'`).
30/// - Each segment contains the info about the amount of leading delimiters for that segment.
31///   Leading delimiters that directly follow after one another are grouped together. The length of
32///   the delimiters is important, as it plays a crucial role in the algorithm that determines which
33///   version is newer.
34///
35///   `1...a` would be represented as:
36///
37///   ```
38///   use alpm_types::VersionSegment::*;
39///   vec![
40///     Segment { text: "1", delimiter_count: 0},
41///     Segment { text: "a", delimiter_count: 3},
42///   ];
43///   ```
44/// - Alphanumeric strings are also split into individual sub-segments. This is done by walking over
45///   the string and splitting it every time a switch from alphabetic to numeric is detected or vice
46///   versa.
47///
48///   `1.1foo123.0` would be represented as:
49///
50///   ```
51///   use alpm_types::VersionSegment::*;
52///   vec![
53///     Segment { text: "1", delimiter_count: 0},
54///     Segment { text: "1", delimiter_count: 1},
55///     SubSegment { text: "foo" },
56///     SubSegment { text: "123" },
57///     Segment { text: "0", delimiter_count: 1},
58///   ];
59///   ```
60/// - Trailing delimiters are encoded as an empty string.
61///
62///   `1...` would be represented as:
63///
64///   ```
65///   use alpm_types::VersionSegment::*;
66///   vec![
67///     Segment { text: "1", delimiter_count: 0},
68///     Segment { text: "", delimiter_count: 3},
69///   ];
70///   ```
71#[derive(Clone, Debug, Eq, PartialEq)]
72pub enum VersionSegment<'a> {
73    /// The start of a new segment.
74    /// If the current segment can be split into multiple sub-segments, this variant only contains
75    /// the **first** sub-segment.
76    ///
77    /// To figure out whether this is sub-segment, peek at the next element in the
78    /// [`VersionSegments`] iterator, whether it's a [`VersionSegment::SubSegment`].
79    Segment {
80        /// The string representation of this segment
81        text: &'a str,
82        /// The amount of leading delimiters that were found for this segment
83        delimiter_count: usize,
84    },
85    /// A sub-segment of a version string's segment.
86    ///
87    /// Note that the first sub-segment of a segment that can be split into sub-segments is
88    /// counterintuitively represented by [VersionSegment::Segment]. This implementation detail
89    /// is due to the way the comparison algorithm works, as it does not always differentiate
90    /// between segments and sub-segments.
91    SubSegment {
92        /// The string representation of this sub-segment
93        text: &'a str,
94    },
95}
96
97impl<'a> VersionSegment<'a> {
98    /// Returns the inner string slice independent of [`VersionSegment`] variant.
99    pub fn text(&self) -> &str {
100        match self {
101            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => text,
102        }
103    }
104
105    /// Returns whether the inner string slice is empty, independent of [`VersionSegment`] variant
106    pub fn is_empty(&self) -> bool {
107        match self {
108            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
109                text.is_empty()
110            }
111        }
112    }
113
114    /// Returns an iterator over the chars of the inner string slice.
115    pub fn chars(&self) -> Chars<'a> {
116        match self {
117            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
118                text.chars()
119            }
120        }
121    }
122
123    /// Creates a type `T` from the inner string slice by relying on `T`'s [`FromStr::from_str`]
124    /// implementation.
125    pub fn parse<T: FromStr>(&self) -> Result<T, T::Err> {
126        match self {
127            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
128                FromStr::from_str(text)
129            }
130        }
131    }
132
133    /// Compares the inner string slice with that of another [`VersionSegment`].
134    pub fn str_cmp(&self, other: &VersionSegment) -> Ordering {
135        match self {
136            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
137                text.cmp(&other.text())
138            }
139        }
140    }
141}
142
143/// An [Iterator] over all [VersionSegment]s of an upstream version string.
144/// Check the documentation on [VersionSegment] to see how a string is split into segments.
145///
146/// Important note:
147/// Trailing delimiters will also produce a trailing [VersionSegment] with an empty string.
148///
149/// This iterator is capable of handling utf-8 strings.
150/// However, non alphanumeric chars are still interpreted as delimiters.
151#[derive(Debug)]
152pub struct VersionSegments<'a> {
153    /// The original version string. We need that reference so we can get some string
154    /// slices based on indices later on.
155    version: &'a str,
156    /// An iterator over the version's chars and their respective start byte's index.
157    version_chars: Peekable<CharIndices<'a>>,
158    /// Check if the cursor is currently in a segment.
159    /// This is necessary to detect whether the next segment should be a sub-segment or a new
160    /// segment.
161    in_segment: bool,
162}
163
164impl<'a> VersionSegments<'a> {
165    /// Create a new instance of a VersionSegments iterator.
166    pub fn new(version: &'a str) -> Self {
167        VersionSegments {
168            version,
169            version_chars: version.char_indices().peekable(),
170            in_segment: false,
171        }
172    }
173}
174
175impl<'a> Iterator for VersionSegments<'a> {
176    type Item = VersionSegment<'a>;
177
178    /// Get the next [VersionSegment] of this version string.
179    fn next(&mut self) -> Option<VersionSegment<'a>> {
180        // Used to track the number of delimiters the next segment is prefixed with.
181        let mut delimiter_count = 0;
182
183        // First up, get the delimiters out of the way.
184        // Peek at the next char, if it's a delimiter, consume it and increase the delimiter count.
185        while let Some((_, char)) = self.version_chars.peek() {
186            // An alphanumeric char indicates that we reached the next segment.
187            if char.is_alphanumeric() {
188                break;
189            }
190
191            self.version_chars.next();
192            delimiter_count += 1;
193
194            // As soon as we hit a delimiter, we know that a new segment is about to start.
195            self.in_segment = false;
196            continue;
197        }
198
199        // Get the next char. If there's no further char, we reached the end of the version string.
200        let Some((first_index, first_char)) = self.version_chars.next() else {
201            // We're at the end of the string and now have to differentiate between two cases:
202
203            // 1. There are no trailing delimiters. We can just return `None` as we truly reached
204            //    the end.
205            if delimiter_count == 0 {
206                return None;
207            }
208
209            // 2. There's no further segment, but there were some trailing delimiters. The
210            //    comparison algorithm considers this case which is why we have to somehow encode
211            //    it. We do so by returning an empty segment.
212            return Some(VersionSegment::Segment {
213                text: "",
214                delimiter_count,
215            });
216        };
217
218        // Cache the last valid char + index that was checked. We need this to
219        // calculate the offset in case the last char is a multi-byte UTF-8 char.
220        let mut last_char = first_char;
221        let mut last_char_index = first_index;
222
223        // The following section now handles the splitting of an alphanumeric string into its
224        // sub-segments. As described in the [VersionSegment] docs, the string needs to be split
225        // every time a switch from alphabetic to numeric or vice versa is detected.
226
227        let is_numeric = first_char.is_numeric();
228
229        if is_numeric {
230            // Go through chars until we hit a non-numeric char or reached the end of the string.
231            #[allow(clippy::while_let_on_iterator)]
232            while let Some((index, next_char)) =
233                self.version_chars.next_if(|(_, peek)| peek.is_numeric())
234            {
235                last_char_index = index;
236                last_char = next_char;
237            }
238        } else {
239            // Go through chars until we hit a non-alphabetic char or reached the end of the string.
240            #[allow(clippy::while_let_on_iterator)]
241            while let Some((index, next_char)) =
242                self.version_chars.next_if(|(_, peek)| peek.is_alphabetic())
243            {
244                last_char_index = index;
245                last_char = next_char;
246            }
247        }
248
249        // Create a subslice based on the indices of the first and last char.
250        // The last char might be multi-byte, which is why we add its length.
251        let segment_slice = &self.version[first_index..(last_char_index + last_char.len_utf8())];
252
253        if !self.in_segment {
254            // Any further segments should be sub-segments, unless we hit a delimiter in which
255            // case this variable will reset to false.
256            self.in_segment = true;
257            Some(VersionSegment::Segment {
258                text: segment_slice,
259                delimiter_count,
260            })
261        } else {
262            Some(VersionSegment::SubSegment {
263                text: segment_slice,
264            })
265        }
266    }
267}
268
269impl Ord for PackageVersion {
270    /// This block implements the logic to determine which of two package versions is newer or
271    /// whether they're considered equal.
272    ///
273    /// This logic is surprisingly complex as it mirrors the current C-alpmlib implementation's
274    /// behavior for backwards compatibility reasons.
275    /// <https://gitlab.archlinux.org/pacman/pacman/-/blob/a2d029388c7c206f5576456f91bfbea2dca98c96/lib/libalpm/version.c#L83-217>
276    fn cmp(&self, other: &Self) -> Ordering {
277        // Equal strings are considered equal versions.
278        if self.inner() == other.inner() {
279            return Ordering::Equal;
280        }
281
282        let mut self_segments = self.segments();
283        let mut other_segments = other.segments();
284
285        // Loop through both versions' segments and compare them.
286        loop {
287            // Try to get the next segments
288            let self_segment = self_segments.next();
289            let other_segment = other_segments.next();
290
291            // Make sure that there's a next segment for both versions.
292            let (self_segment, other_segment) = match (self_segment, other_segment) {
293                // Both segments exist, we continue after match.
294                (Some(self_seg), Some(other_seg)) => (self_seg, other_seg),
295
296                // Both versions reached their end and are thereby equal.
297                (None, None) => return Ordering::Equal,
298
299                // One version is longer than the other and both are equal until now.
300                //
301                // ## Case 1
302                //
303                // The longer version is one or more **segment**s longer.
304                // In this case, the longer version is always considered newer.
305                //   `1.0` > `1`
306                // `1.0.0` > `1.0`
307                // `1.0.a` > `1.0`
308                //     ⤷ New segment exists, thereby newer
309                //
310                // ## Case 2
311                //
312                // The current **segment** has one or more sub-segments and the next sub-segment is
313                // alphabetic.
314                // In this case, the shorter version is always newer.
315                // The reason for this is to handle pre-releases (e.g. alpha/beta).
316                // `1.0alpha` < `1.0`
317                // `1.0alpha.0` < `1.0`
318                // `1.0alpha12.0` < `1.0`
319                //     ⤷ Next sub-segment is alphabetic.
320                //
321                // ## Case 3
322                //
323                // The current **segment** has one or more sub-segments and the next sub-segment is
324                // numeric. In this case, the longer version is always newer.
325                // `1.alpha0` > `1.alpha`
326                // `1.alpha0.1` > `1.alpha`
327                //         ⤷ Next sub-segment is numeric.
328                (Some(seg), None) => {
329                    // If the current segment is the start of a segment, it's always considered
330                    // newer.
331                    let text = match seg {
332                        VersionSegment::Segment { .. } => return Ordering::Greater,
333                        VersionSegment::SubSegment { text } => text,
334                    };
335
336                    // If it's a sub-segment, we have to check for the edge-case explained above
337                    // If all chars are alphabetic, `self` is consider older.
338                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
339                        return Ordering::Less;
340                    }
341
342                    return Ordering::Greater;
343                }
344
345                // This is the same logic as above, but inverted.
346                (None, Some(seg)) => {
347                    let text = match seg {
348                        VersionSegment::Segment { .. } => return Ordering::Less,
349                        VersionSegment::SubSegment { text } => text,
350                    };
351                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
352                        return Ordering::Greater;
353                    }
354                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
355                        return Ordering::Greater;
356                    }
357
358                    return Ordering::Less;
359                }
360            };
361
362            // At this point, we have two sub-/segments.
363            //
364            // We start with the special case where one or both of the segments are empty.
365            // That means that the end of the version string has been reached, but there were one
366            // or more trailing delimiters, e.g.:
367            //
368            // `1.0.`
369            // `1.0...`
370            if other_segment.is_empty() && self_segment.is_empty() {
371                // Both reached the end of their version with a trailing delimiter.
372                // Counterintuitively, the trailing delimiter count is not considered and both
373                // versions are considered equal
374                // `1.0....` == `1.0.`
375                //       ⤷ Length of delimiters is ignored.
376                return Ordering::Equal;
377            } else if self_segment.is_empty() {
378                // Now we have to consider the special case where `other` is alphabetic.
379                // If that's the case, `self` will be considered newer, as the alphabetic string
380                // indicates a pre-release,
381                // `1.0.` > `1.0alpha0`
382                // `1.0.` > `1.0.alpha.0`
383                //                ⤷ Alphabetic sub-/segment and thereby always older.
384                //
385                // Also, we know that `other_segment` isn't empty at this point.
386                // It's noteworthy that this logic does not differentiated between segments and
387                // sub-segments.
388                if other_segment.chars().all(char::is_alphabetic) {
389                    return Ordering::Greater;
390                }
391
392                // In all other cases, `other` is newer.
393                // `1.0.` < `1.0.0`
394                // `1.0.` < `1.0.2.0`
395                return Ordering::Less;
396            } else if other_segment.is_empty() {
397                // Check docs above, as it's the same logic as above, just inverted.
398                if self_segment.chars().all(char::is_alphabetic) {
399                    return Ordering::Less;
400                }
401
402                return Ordering::Greater;
403            }
404
405            // We finally reached the end handling special cases when the version string ended.
406            // From now on, we know that we have two actual sub-/segments that might be prefixed by
407            // some delimiters.
408            //
409            // However, it is possible that one version has a segment and while the other has a
410            // sub-segment. This special case is what is handled next.
411            //
412            // We purposefully give up ownership of both segments.
413            // This is to ensure that following this match block, we finally only have to
414            // consider the actual text of the segments, as we'll know that both sub-/segments are
415            // of the same type.
416            let (self_text, other_text) = match (self_segment, other_segment) {
417                (
418                    VersionSegment::Segment {
419                        delimiter_count: self_count,
420                        text: self_text,
421                    },
422                    VersionSegment::Segment {
423                        delimiter_count: other_count,
424                        text: other_text,
425                    },
426                ) => {
427                    // Special case:
428                    // If one of the segments has more leading delimiters than the other, it is
429                    // always considered newer, no matter what follows after the delimiters.
430                    // `1..0.0` > `1.2.0`
431                    //    ⤷ Two delimiters, thereby always newer.
432                    // `1..0.0` < `1..2.0`
433                    //               ⤷ Same amount of delimiters, now `2 > 0`
434                    if self_count != other_count {
435                        return self_count.cmp(&other_count);
436                    }
437                    (self_text, other_text)
438                }
439                // If one is the start of a new segment, while the other is still a sub-segment,
440                // we can return early as a new segment always overrules a sub-segment.
441                // `1.alpha0.0` < `1.alpha.0`
442                //         ⤷ sub-segment  ⤷ segment
443                //         In the third iteration there's a sub-segment on the left side while
444                //         there's a segment on the right side.
445                (VersionSegment::Segment { .. }, VersionSegment::SubSegment { .. }) => {
446                    return Ordering::Greater;
447                }
448                (VersionSegment::SubSegment { .. }, VersionSegment::Segment { .. }) => {
449                    return Ordering::Less;
450                }
451                (
452                    VersionSegment::SubSegment { text: self_text },
453                    VersionSegment::SubSegment { text: other_text },
454                ) => (self_text, other_text),
455            };
456
457            // At this point, we know that we are dealing with two identical types of sub-/segments.
458            // Thereby, we now only have to compare the text of those sub-/segments.
459
460            // Check whether any of the texts are numeric.
461            // Numeric sub-/segments are always considered newer than non-numeric sub-/segments.
462            // E.g.: `1.0.0` > `1.foo.0`
463            //          ⤷ `0` vs `foo`.
464            //            `0` is numeric and therebynewer than a alphanumeric one.
465            let self_is_numeric = !self_text.is_empty() && self_text.chars().all(char::is_numeric);
466            let other_is_numeric =
467                !other_text.is_empty() && other_text.chars().all(char::is_numeric);
468
469            if self_is_numeric && !other_is_numeric {
470                return Ordering::Greater;
471            } else if !self_is_numeric && other_is_numeric {
472                return Ordering::Less;
473            } else if self_is_numeric && other_is_numeric {
474                // In case both are numeric, we do a number comparison.
475                // We can parse the string as we know that they only consist of digits, hence the
476                // unwrap.
477                //
478                // Preceding zeroes are to be ignored, which is automatically done by Rust's number
479                // parser.
480                // E.g. `1.0001.1` == `1.1.1`
481                //          ⤷ `000` is ignored in the comparison.
482                let ordering = self_text
483                    .parse::<usize>()
484                    .unwrap()
485                    .cmp(&other_text.parse::<usize>().unwrap());
486
487                match ordering {
488                    Ordering::Less => return Ordering::Less,
489                    Ordering::Greater => return Ordering::Greater,
490                    // If both numbers are equal we check the next sub-/segment.
491                    Ordering::Equal => continue,
492                }
493            }
494
495            // At this point, we know that both sub-/segments are alphabetic.
496            // We do a simple string comparison to determine the newer version.
497            match self_text.cmp(other_text) {
498                Ordering::Less => return Ordering::Less,
499                Ordering::Greater => return Ordering::Greater,
500                // If the strings are equal, we check the next sub-/segment.
501                Ordering::Equal => continue,
502            }
503        }
504    }
505}
506
507impl PartialOrd for PackageVersion {
508    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
509        Some(self.cmp(other))
510    }
511}
512
513impl PartialEq for PackageVersion {
514    fn eq(&self, other: &Self) -> bool {
515        self.cmp(other).is_eq()
516    }
517}
518
519#[cfg(test)]
520mod tests {
521    use rstest::rstest;
522
523    use super::*;
524
525    #[rstest]
526    #[case("1.0.0", vec![
527        VersionSegment::Segment{ text:"1", delimiter_count: 0},
528        VersionSegment::Segment{ text:"0", delimiter_count: 1},
529        VersionSegment::Segment{ text:"0", delimiter_count: 1},
530    ])]
531    #[case("1..0", vec![
532        VersionSegment::Segment{ text:"1", delimiter_count: 0},
533        VersionSegment::Segment{ text:"0", delimiter_count: 2},
534    ])]
535    #[case("1.0.", vec![
536        VersionSegment::Segment{ text:"1", delimiter_count: 0},
537        VersionSegment::Segment{ text:"0", delimiter_count: 1},
538        VersionSegment::Segment{ text:"", delimiter_count: 1},
539    ])]
540    #[case("1..", vec![
541        VersionSegment::Segment{ text:"1", delimiter_count: 0},
542        VersionSegment::Segment{ text:"", delimiter_count: 2},
543    ])]
544    #[case("1...", vec![
545        VersionSegment::Segment{ text:"1", delimiter_count: 0},
546        VersionSegment::Segment{ text:"", delimiter_count: 3},
547    ])]
548    #[case("1.🗻lol.0", vec![
549        VersionSegment::Segment{ text:"1", delimiter_count: 0},
550        VersionSegment::Segment{ text:"lol", delimiter_count: 2},
551        VersionSegment::Segment{ text:"0", delimiter_count: 1},
552    ])]
553    #[case("1.🗻lol.", vec![
554        VersionSegment::Segment{ text:"1", delimiter_count: 0},
555        VersionSegment::Segment{ text:"lol", delimiter_count: 2},
556        VersionSegment::Segment{ text:"", delimiter_count: 1},
557    ])]
558    #[case("20220202", vec![
559        VersionSegment::Segment{ text:"20220202", delimiter_count: 0},
560    ])]
561    #[case("some_string", vec![
562        VersionSegment::Segment{ text:"some", delimiter_count: 0},
563        VersionSegment::Segment{ text:"string", delimiter_count: 1}
564    ])]
565    #[case("alpha7654numeric321", vec![
566        VersionSegment::Segment{ text:"alpha", delimiter_count: 0},
567        VersionSegment::SubSegment{ text:"7654"},
568        VersionSegment::SubSegment{ text:"numeric"},
569        VersionSegment::SubSegment{ text:"321"},
570    ])]
571    fn version_segment_iterator(
572        #[case] version: &str,
573        #[case] expected_segments: Vec<VersionSegment>,
574    ) {
575        let version = PackageVersion(version.to_string());
576        // Convert the simplified definition above into actual VersionSegment instances.
577        let mut segments_iter = version.segments();
578        let mut expected_iter = expected_segments.clone().into_iter();
579
580        // Iterate over both iterators.
581        // We do it manually to ensure that they both end at the same time.
582        loop {
583            let next_segment = segments_iter.next();
584            assert_eq!(
585                next_segment,
586                expected_iter.next(),
587                "Failed for segment {next_segment:?} in version string {version}:\nsegments: {:?}\n expected: {:?}",
588                version.segments().collect::<Vec<VersionSegment>>(),
589                expected_segments,
590            );
591            if next_segment.is_none() {
592                break;
593            }
594        }
595    }
596}