alpm_types/version/
comparison.rs

1//! Comparison for [`PackageVersion`].
2//!
3//! This code is derived from the [rpmvercmp algorithm in RPM version 4.8.1].
4//! The current version, including its improvements over the original are described in detail in
5//! [alpm-pkgver].
6//!
7//! [alpm-pkgver]: https://alpm.archlinux.page/specifications/alpm-pkgver.7.html
8//! [rpmvercmp algorithm in RPM version 4.8.1]: https://github.com/rpm-software-management/rpm/blob/rpm-4.8.1-release/lib/rpmvercmp.c
9
10use std::{
11    cmp::Ordering,
12    iter::Peekable,
13    str::{CharIndices, Chars, FromStr},
14};
15
16use crate::PackageVersion;
17
18/// This enum represents a single segment in a version string.
19/// [`VersionSegment`]s are returned by the [`VersionSegments`] iterator, which is responsible for
20/// splitting a version string into its segments.
21///
22/// Version strings are split according to the following rules:
23///
24/// - Non-alphanumeric characters always count as delimiters (`.`, `-`, `$`, etc.).
25/// - There's no differentiation between delimiters represented by different characters (e.g. `'$$$'
26///   == '...' == '.$-'`).
27/// - Each segment contains the info about the amount of leading delimiters for that segment.
28///   Leading delimiters that directly follow after one another are grouped together. The length of
29///   the delimiters is important, as it plays a crucial role in the algorithm that determines which
30///   version is newer.
31///
32///   `1...a` would be represented as:
33///
34///   ```
35///   use alpm_types::VersionSegment::*;
36///   vec![
37///     Segment { text: "1", delimiter_count: 0},
38///     Segment { text: "a", delimiter_count: 3},
39///   ];
40///   ```
41/// - Alphanumeric strings are also split into individual sub-segments. This is done by walking over
42///   the string and splitting it every time a switch from alphabetic to numeric is detected or vice
43///   versa.
44///
45///   `1.1foo123.0` would be represented as:
46///
47///   ```
48///   use alpm_types::VersionSegment::*;
49///   vec![
50///     Segment { text: "1", delimiter_count: 0},
51///     Segment { text: "1", delimiter_count: 1},
52///     SubSegment { text: "foo" },
53///     SubSegment { text: "123" },
54///     Segment { text: "0", delimiter_count: 1},
55///   ];
56///   ```
57/// - Trailing delimiters are encoded as an empty string.
58///
59///   `1...` would be represented as:
60///
61///   ```
62///   use alpm_types::VersionSegment::*;
63///   vec![
64///     Segment { text: "1", delimiter_count: 0},
65///     Segment { text: "", delimiter_count: 3},
66///   ];
67///   ```
68#[derive(Clone, Debug, Eq, PartialEq)]
69pub enum VersionSegment<'a> {
70    /// The start of a new segment.
71    /// If the current segment can be split into multiple sub-segments, this variant only contains
72    /// the **first** sub-segment.
73    ///
74    /// To figure out whether this is sub-segment, peek at the next element in the
75    /// [`VersionSegments`] iterator, whether it's a [`VersionSegment::SubSegment`].
76    Segment {
77        /// The string representation of this segment
78        text: &'a str,
79        /// The amount of leading delimiters that were found for this segment
80        delimiter_count: usize,
81    },
82    /// A sub-segment of a version string's segment.
83    ///
84    /// Note that the first sub-segment of a segment that can be split into sub-segments is
85    /// counterintuitively represented by [VersionSegment::Segment]. This implementation detail
86    /// is due to the way the comparison algorithm works, as it does not always differentiate
87    /// between segments and sub-segments.
88    SubSegment {
89        /// The string representation of this sub-segment
90        text: &'a str,
91    },
92}
93
94impl<'a> VersionSegment<'a> {
95    /// Returns the inner string slice independent of [`VersionSegment`] variant.
96    pub fn text(&self) -> &str {
97        match self {
98            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => text,
99        }
100    }
101
102    /// Returns whether the inner string slice is empty, independent of [`VersionSegment`] variant
103    pub fn is_empty(&self) -> bool {
104        match self {
105            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
106                text.is_empty()
107            }
108        }
109    }
110
111    /// Returns an iterator over the chars of the inner string slice.
112    pub fn chars(&self) -> Chars<'a> {
113        match self {
114            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
115                text.chars()
116            }
117        }
118    }
119
120    /// Creates a type `T` from the inner string slice by relying on `T`'s [`FromStr::from_str`]
121    /// implementation.
122    pub fn parse<T: FromStr>(&self) -> Result<T, T::Err> {
123        match self {
124            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
125                FromStr::from_str(text)
126            }
127        }
128    }
129
130    /// Compares the inner string slice with that of another [`VersionSegment`].
131    pub fn str_cmp(&self, other: &VersionSegment) -> Ordering {
132        match self {
133            VersionSegment::Segment { text, .. } | VersionSegment::SubSegment { text } => {
134                text.cmp(&other.text())
135            }
136        }
137    }
138}
139
140/// An [Iterator] over all [VersionSegment]s of an upstream version string.
141/// Check the documentation on [VersionSegment] to see how a string is split into segments.
142///
143/// Important note:
144/// Trailing delimiters will also produce a trailing [VersionSegment] with an empty string.
145///
146/// This iterator is capable of handling utf-8 strings.
147/// However, non alphanumeric chars are still interpreted as delimiters.
148#[derive(Debug)]
149pub struct VersionSegments<'a> {
150    /// The original version string. We need that reference so we can get some string
151    /// slices based on indices later on.
152    version: &'a str,
153    /// An iterator over the version's chars and their respective start byte's index.
154    version_chars: Peekable<CharIndices<'a>>,
155    /// Check if the cursor is currently in a segment.
156    /// This is necessary to detect whether the next segment should be a sub-segment or a new
157    /// segment.
158    in_segment: bool,
159}
160
161impl<'a> VersionSegments<'a> {
162    /// Create a new instance of a VersionSegments iterator.
163    pub fn new(version: &'a str) -> Self {
164        VersionSegments {
165            version,
166            version_chars: version.char_indices().peekable(),
167            in_segment: false,
168        }
169    }
170}
171
172impl<'a> Iterator for VersionSegments<'a> {
173    type Item = VersionSegment<'a>;
174
175    /// Get the next [VersionSegment] of this version string.
176    fn next(&mut self) -> Option<VersionSegment<'a>> {
177        // Used to track the number of delimiters the next segment is prefixed with.
178        let mut delimiter_count = 0;
179
180        // First up, get the delimiters out of the way.
181        // Peek at the next char, if it's a delimiter, consume it and increase the delimiter count.
182        while let Some((_, char)) = self.version_chars.peek() {
183            // An alphanumeric char indicates that we reached the next segment.
184            if char.is_alphanumeric() {
185                break;
186            }
187
188            self.version_chars.next();
189            delimiter_count += 1;
190
191            // As soon as we hit a delimiter, we know that a new segment is about to start.
192            self.in_segment = false;
193            continue;
194        }
195
196        // Get the next char. If there's no further char, we reached the end of the version string.
197        let Some((first_index, first_char)) = self.version_chars.next() else {
198            // We're at the end of the string and now have to differentiate between two cases:
199
200            // 1. There are no trailing delimiters. We can just return `None` as we truly reached
201            //    the end.
202            if delimiter_count == 0 {
203                return None;
204            }
205
206            // 2. There's no further segment, but there were some trailing delimiters. The
207            //    comparison algorithm considers this case which is why we have to somehow encode
208            //    it. We do so by returning an empty segment.
209            return Some(VersionSegment::Segment {
210                text: "",
211                delimiter_count,
212            });
213        };
214
215        // Cache the last valid char + index that was checked. We need this to
216        // calculate the offset in case the last char is a multi-byte UTF-8 char.
217        let mut last_char = first_char;
218        let mut last_char_index = first_index;
219
220        // The following section now handles the splitting of an alphanumeric string into its
221        // sub-segments. As described in the [VersionSegment] docs, the string needs to be split
222        // every time a switch from alphabetic to numeric or vice versa is detected.
223
224        let is_numeric = first_char.is_numeric();
225
226        if is_numeric {
227            // Go through chars until we hit a non-numeric char or reached the end of the string.
228            #[allow(clippy::while_let_on_iterator)]
229            while let Some((index, next_char)) =
230                self.version_chars.next_if(|(_, peek)| peek.is_numeric())
231            {
232                last_char_index = index;
233                last_char = next_char;
234            }
235        } else {
236            // Go through chars until we hit a non-alphabetic char or reached the end of the string.
237            #[allow(clippy::while_let_on_iterator)]
238            while let Some((index, next_char)) =
239                self.version_chars.next_if(|(_, peek)| peek.is_alphabetic())
240            {
241                last_char_index = index;
242                last_char = next_char;
243            }
244        }
245
246        // Create a subslice based on the indices of the first and last char.
247        // The last char might be multi-byte, which is why we add its length.
248        let segment_slice = &self.version[first_index..(last_char_index + last_char.len_utf8())];
249
250        if !self.in_segment {
251            // Any further segments should be sub-segments, unless we hit a delimiter in which
252            // case this variable will reset to false.
253            self.in_segment = true;
254            Some(VersionSegment::Segment {
255                text: segment_slice,
256                delimiter_count,
257            })
258        } else {
259            Some(VersionSegment::SubSegment {
260                text: segment_slice,
261            })
262        }
263    }
264}
265
266impl Ord for PackageVersion {
267    /// This block implements the logic to determine which of two package versions is newer or
268    /// whether they're considered equal.
269    ///
270    /// This logic is surprisingly complex as it mirrors the current C-alpmlib implementation for
271    /// backwards compatibility reasons.
272    /// <https://gitlab.archlinux.org/pacman/pacman/-/blob/a2d029388c7c206f5576456f91bfbea2dca98c96/lib/libalpm/version.c#L83-217>
273    fn cmp(&self, other: &Self) -> Ordering {
274        // Equal strings are considered equal versions.
275        if self.inner() == other.inner() {
276            return Ordering::Equal;
277        }
278
279        let mut self_segments = self.segments();
280        let mut other_segments = other.segments();
281
282        // Loop through both versions' segments and compare them.
283        loop {
284            // Try to get the next segments
285            let self_segment = self_segments.next();
286            let other_segment = other_segments.next();
287
288            // Make sure that there's a next segment for both versions.
289            let (self_segment, other_segment) = match (self_segment, other_segment) {
290                // Both segments exist, we continue after match.
291                (Some(self_seg), Some(other_seg)) => (self_seg, other_seg),
292
293                // Both versions reached their end and are thereby equal.
294                (None, None) => return Ordering::Equal,
295
296                // One version is longer than the other and both are equal until now.
297                //
298                // ## Case 1
299                //
300                // The longer version is one or more **segment**s longer.
301                // In this case, the longer version is always considered newer.
302                //   `1.0` > `1`
303                // `1.0.0` > `1.0`
304                // `1.0.a` > `1.0`
305                //     ⤷ New segment exists, thereby newer
306                //
307                // ## Case 2
308                //
309                // The current **segment** has one or more sub-segments and the next sub-segment is
310                // alphabetic.
311                // In this case, the shorter version is always newer.
312                // The reason for this is to handle pre-releases (e.g. alpha/beta).
313                // `1.0alpha` < `1.0`
314                // `1.0alpha.0` < `1.0`
315                // `1.0alpha12.0` < `1.0`
316                //     ⤷ Next sub-segment is alphabetic.
317                //
318                // ## Case 3
319                //
320                // The current **segment** has one or more sub-segments and the next sub-segment is
321                // numeric. In this case, the longer version is always newer.
322                // `1.alpha0` > `1.alpha`
323                // `1.alpha0.1` > `1.alpha`
324                //         ⤷ Next sub-segment is numeric.
325                (Some(seg), None) => {
326                    // If the current segment is the start of a segment, it's always considered
327                    // newer.
328                    let text = match seg {
329                        VersionSegment::Segment { .. } => return Ordering::Greater,
330                        VersionSegment::SubSegment { text } => text,
331                    };
332
333                    // If it's a sub-segment, we have to check for the edge-case explained above
334                    // If all chars are alphabetic, `self` is consider older.
335                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
336                        return Ordering::Less;
337                    }
338
339                    return Ordering::Greater;
340                }
341
342                // This is the same logic as above, but inverted.
343                (None, Some(seg)) => {
344                    let text = match seg {
345                        VersionSegment::Segment { .. } => return Ordering::Less,
346                        VersionSegment::SubSegment { text } => text,
347                    };
348                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
349                        return Ordering::Greater;
350                    }
351                    if !text.is_empty() && text.chars().all(char::is_alphabetic) {
352                        return Ordering::Greater;
353                    }
354
355                    return Ordering::Less;
356                }
357            };
358
359            // At this point, we have two sub-/segments.
360            //
361            // We start with the special case where one or both of the segments are empty.
362            // That means that the end of the version string has been reached, but there were one
363            // or more trailing delimiters, e.g.:
364            //
365            // `1.0.`
366            // `1.0...`
367            if other_segment.is_empty() && self_segment.is_empty() {
368                // Both reached the end of their version with a trailing delimiter.
369                // Counterintuitively, the trailing delimiter count is not considered and both
370                // versions are considered equal
371                // `1.0....` == `1.0.`
372                //       ⤷ Length of delimiters is ignored.
373                return Ordering::Equal;
374            } else if self_segment.is_empty() {
375                // Now we have to consider the special case where `other` is alphabetic.
376                // If that's the case, `self` will be considered newer, as the alphabetic string
377                // indicates a pre-release,
378                // `1.0.` > `1.0alpha0`
379                // `1.0.` > `1.0.alpha.0`
380                //                ⤷ Alphabetic sub-/segment and thereby always older.
381                //
382                // Also, we know that `other_segment` isn't empty at this point.
383                // It's noteworthy that this logic does not differentiated between segments and
384                // sub-segments.
385                if other_segment.chars().all(char::is_alphabetic) {
386                    return Ordering::Greater;
387                }
388
389                // In all other cases, `other` is newer.
390                // `1.0.` < `1.0.0`
391                // `1.0.` < `1.0.2.0`
392                return Ordering::Less;
393            } else if other_segment.is_empty() {
394                // Check docs above, as it's the same logic as above, just inverted.
395                if self_segment.chars().all(char::is_alphabetic) {
396                    return Ordering::Less;
397                }
398
399                return Ordering::Greater;
400            }
401
402            // We finally reached the end handling special cases when the version string ended.
403            // From now on, we know that we have two actual sub-/segments that might be prefixed by
404            // some delimiters.
405            //
406            // However, it is possible that one version has a segment and while the other has a
407            // sub-segment. This special case is what is handled next.
408            //
409            // We purposefully give up ownership of both segments.
410            // This is to ensure that following this match block, we finally only have to
411            // consider the actual text of the segments, as we'll know that both sub-/segments are
412            // of the same type.
413            let (self_text, other_text) = match (self_segment, other_segment) {
414                (
415                    VersionSegment::Segment {
416                        delimiter_count: self_count,
417                        text: self_text,
418                    },
419                    VersionSegment::Segment {
420                        delimiter_count: other_count,
421                        text: other_text,
422                    },
423                ) => {
424                    // Special case:
425                    // If one of the segments has more leading delimiters than the other, it is
426                    // always considered newer, no matter what follows after the delimiters.
427                    // `1..0.0` > `1.2.0`
428                    //    ⤷ Two delimiters, thereby always newer.
429                    // `1..0.0` < `1..2.0`
430                    //               ⤷ Same amount of delimiters, now `2 > 0`
431                    if self_count != other_count {
432                        return self_count.cmp(&other_count);
433                    }
434                    (self_text, other_text)
435                }
436                // If one is the start of a new segment, while the other is still a sub-segment,
437                // we can return early as a new segment always overrules a sub-segment.
438                // `1.alpha0.0` < `1.alpha.0`
439                //         ⤷ sub-segment  ⤷ segment
440                //         In the third iteration there's a sub-segment on the left side while
441                //         there's a segment on the right side.
442                (VersionSegment::Segment { .. }, VersionSegment::SubSegment { .. }) => {
443                    return Ordering::Greater;
444                }
445                (VersionSegment::SubSegment { .. }, VersionSegment::Segment { .. }) => {
446                    return Ordering::Less;
447                }
448                (
449                    VersionSegment::SubSegment { text: self_text },
450                    VersionSegment::SubSegment { text: other_text },
451                ) => (self_text, other_text),
452            };
453
454            // At this point, we know that we are dealing with two identical types of sub-/segments.
455            // Thereby, we now only have to compare the text of those sub-/segments.
456
457            // Check whether any of the texts are numeric.
458            // Numeric sub-/segments are always considered newer than non-numeric sub-/segments.
459            // E.g.: `1.0.0` > `1.foo.0`
460            //          ⤷ `0` vs `foo`.
461            //            `0` is numeric and therebynewer than a alphanumeric one.
462            let self_is_numeric = !self_text.is_empty() && self_text.chars().all(char::is_numeric);
463            let other_is_numeric =
464                !other_text.is_empty() && other_text.chars().all(char::is_numeric);
465
466            if self_is_numeric && !other_is_numeric {
467                return Ordering::Greater;
468            } else if !self_is_numeric && other_is_numeric {
469                return Ordering::Less;
470            } else if self_is_numeric && other_is_numeric {
471                // In case both are numeric, we do a number comparison.
472                // We can parse the string as we know that they only consist of digits, hence the
473                // unwrap.
474                //
475                // Preceding zeroes are to be ignored, which is automatically done by Rust's number
476                // parser.
477                // E.g. `1.0001.1` == `1.1.1`
478                //          ⤷ `000` is ignored in the comparison.
479                let ordering = self_text
480                    .parse::<usize>()
481                    .unwrap()
482                    .cmp(&other_text.parse::<usize>().unwrap());
483
484                match ordering {
485                    Ordering::Less => return Ordering::Less,
486                    Ordering::Greater => return Ordering::Greater,
487                    // If both numbers are equal we check the next sub-/segment.
488                    Ordering::Equal => continue,
489                }
490            }
491
492            // At this point, we know that both sub-/segments are alphabetic.
493            // We do a simple string comparison to determine the newer version.
494            match self_text.cmp(other_text) {
495                Ordering::Less => return Ordering::Less,
496                Ordering::Greater => return Ordering::Greater,
497                // If the strings are equal, we check the next sub-/segment.
498                Ordering::Equal => continue,
499            }
500        }
501    }
502}
503
504impl PartialOrd for PackageVersion {
505    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
506        Some(self.cmp(other))
507    }
508}
509
510impl PartialEq for PackageVersion {
511    fn eq(&self, other: &Self) -> bool {
512        self.cmp(other).is_eq()
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use rstest::rstest;
519
520    use super::*;
521
522    #[rstest]
523    #[case("1.0.0", vec![
524        VersionSegment::Segment{ text:"1", delimiter_count: 0},
525        VersionSegment::Segment{ text:"0", delimiter_count: 1},
526        VersionSegment::Segment{ text:"0", delimiter_count: 1},
527    ])]
528    #[case("1..0", vec![
529        VersionSegment::Segment{ text:"1", delimiter_count: 0},
530        VersionSegment::Segment{ text:"0", delimiter_count: 2},
531    ])]
532    #[case("1.0.", vec![
533        VersionSegment::Segment{ text:"1", delimiter_count: 0},
534        VersionSegment::Segment{ text:"0", delimiter_count: 1},
535        VersionSegment::Segment{ text:"", delimiter_count: 1},
536    ])]
537    #[case("1..", vec![
538        VersionSegment::Segment{ text:"1", delimiter_count: 0},
539        VersionSegment::Segment{ text:"", delimiter_count: 2},
540    ])]
541    #[case("1...", vec![
542        VersionSegment::Segment{ text:"1", delimiter_count: 0},
543        VersionSegment::Segment{ text:"", delimiter_count: 3},
544    ])]
545    #[case("1.🗻lol.0", vec![
546        VersionSegment::Segment{ text:"1", delimiter_count: 0},
547        VersionSegment::Segment{ text:"lol", delimiter_count: 2},
548        VersionSegment::Segment{ text:"0", delimiter_count: 1},
549    ])]
550    #[case("1.🗻lol.", vec![
551        VersionSegment::Segment{ text:"1", delimiter_count: 0},
552        VersionSegment::Segment{ text:"lol", delimiter_count: 2},
553        VersionSegment::Segment{ text:"", delimiter_count: 1},
554    ])]
555    #[case("20220202", vec![
556        VersionSegment::Segment{ text:"20220202", delimiter_count: 0},
557    ])]
558    #[case("some_string", vec![
559        VersionSegment::Segment{ text:"some", delimiter_count: 0},
560        VersionSegment::Segment{ text:"string", delimiter_count: 1}
561    ])]
562    #[case("alpha7654numeric321", vec![
563        VersionSegment::Segment{ text:"alpha", delimiter_count: 0},
564        VersionSegment::SubSegment{ text:"7654"},
565        VersionSegment::SubSegment{ text:"numeric"},
566        VersionSegment::SubSegment{ text:"321"},
567    ])]
568    fn version_segment_iterator(
569        #[case] version: &str,
570        #[case] expected_segments: Vec<VersionSegment>,
571    ) {
572        let version = PackageVersion(version.to_string());
573        // Convert the simplified definition above into actual VersionSegment instances.
574        let mut segments_iter = version.segments();
575        let mut expected_iter = expected_segments.clone().into_iter();
576
577        // Iterate over both iterators.
578        // We do it manually to ensure that they both end at the same time.
579        loop {
580            let next_segment = segments_iter.next();
581            assert_eq!(
582                next_segment,
583                expected_iter.next(),
584                "Failed for segment {next_segment:?} in version string {version}:\nsegments: {:?}\n expected: {:?}",
585                version.segments().collect::<Vec<VersionSegment>>(),
586                expected_segments,
587            );
588            if next_segment.is_none() {
589                break;
590            }
591        }
592    }
593}