From 9505b54a4f9f0265c9d8be53763f0c59d6f62a44 Mon Sep 17 00:00:00 2001 From: zimoun Date: Tue, 19 Jan 2021 22:28:08 +0100 Subject: utils: Add string distance. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * guix/utils.scm (string-distance): New procedure. (string-closest): New procedure. * tests/utils.scm ("string-distance", "string-closest"): New tests. Signed-off-by: Ludovic Courtès --- guix/utils.scm | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'guix') diff --git a/guix/utils.scm b/guix/utils.scm index a85e2f495c..1625cab19b 100644 --- a/guix/utils.scm +++ b/guix/utils.scm @@ -8,6 +8,7 @@ ;;; Copyright © 2017 Mathieu Othacehe ;;; Copyright © 2018, 2020 Marius Bakke ;;; Copyright © 2020 Efraim Flashner +;;; Copyright © 2021 Simon Tournier ;;; ;;; This file is part of GNU Guix. ;;; @@ -37,6 +38,7 @@ #:use-module (guix memoization) #:use-module ((guix build utils) #:select (dump-port mkdir-p delete-file-recursively)) #:use-module ((guix build syscalls) #:select (mkdtemp! fdatasync)) + #:use-module ((guix combinators) #:select (fold2)) #:use-module (guix diagnostics) ;, &error-location, etc. #:use-module (ice-9 format) #:use-module (ice-9 regex) @@ -115,7 +117,10 @@ call-with-decompressed-port compressed-output-port call-with-compressed-output-port - canonical-newline-port)) + canonical-newline-port + + string-distance + string-closest)) ;;; @@ -880,6 +885,46 @@ be determined." ;; raising an error would upset Geiser users #f)))))) + +;;; +;;; String comparison. +;;; + +(define (string-distance s1 s2) + "Compute the Levenshtein distance between two strings." + ;; Naive implemenation + (define loop + (mlambda (as bt) + (match as + (() (length bt)) + ((a s ...) + (match bt + (() (length as)) + ((b t ...) + (if (char=? a b) + (loop s t) + (1+ (min + (loop as t) + (loop s bt) + (loop s t)))))))))) + + (let ((c1 (string->list s1)) + (c2 (string->list s2))) + (loop c1 c2))) + +(define* (string-closest trial tests #:key (threshold 3)) + "Return the string from TESTS that is the closest from the TRIAL, +according to 'string-distance'. If the TESTS are too far from TRIAL, +according to THRESHOLD, then #f is returned." + (identity ;discard second return value + (fold2 (lambda (test closest minimal) + (let ((dist (string-distance trial test))) + (if (and (< dist minimal) (< dist threshold)) + (values test dist) + (values closest minimal)))) + #f +inf.0 + tests))) + ;;; Local Variables: ;;; eval: (put 'call-with-progress-reporter 'scheme-indent-function 1) ;;; End: -- cgit v1.2.3