t3x.org / nss / regex.html

(Nils' Scheme Snippets)

 
Paren matching: OFF  |  Category: strings  |  Overview  |  Scheme Books  |  License
 

(re-comp string) => list
(re-match list string) => string | #f

 
Purpose
Compile and match regular expressions. Re-comp compiles a regular expression (RE) and returns it. Compiled REs are represented by lists. Re-match matches a compiled RE against a string. When (part of) the string matches the RE, it returns the matching part. When the strings does not match, it returns #f.
The following RE patterns are evaluated:
. match any character
[char...] match character class (may contain ranges of the form c1-c2)
^ match beginning of line
$ match end of line
* match zero or more instances of the preceding pattern
+ match one or more instances of the preceding pattern
? match the preceding pattern optionally
 
Arguments
re-comp: re regular expression
re-match: cre compiled regular expression
re-match: s string to match
 
Example
(re-match (re-comp "[a-z]*") "123 test") => "test"
(re-match (re-comp "^[a-z]*$") "123 test") => #f
(define (make-range c0 cn cls)
    (if (> c0 cn)
        cls
        (make-range (+ 1 c0)
                    cn
                    (cons (integer->char c0) cls))))

(define (compile-class in out cls first)
  (cond
    ((null? in) #f)
    ((char=? #\] (car in))
      (list (cdr in) (cons (reverse cls) out)))
    ((and first (char=? #\^ (car in)))
      (compile-class (cdr in) out '(#\]) #f))
    ((and (not first)
          (not (null? (cdr cls)))
          (char=? #\- (car in))
          (pair? (cdr in))
          (not (char=? #\] (cadr in))))
      (let ((c0 (char->integer (car cls)))
            (cn (char->integer (cadr in))))
        (if (< c0 cn)
            (compile-class (cddr in)
                           out
                           (make-range c0 cn (cdr cls)) #f)
            (compile-class (cdr in)
                           out
                           (cons #\- cls) #f))))
    (else
      (compile-class (cdr in)
                     out
                     (cons (car in) cls) #f))))

(define (re-comp re)
  (letrec
     ((compile
       (lambda (in out)
         (cond
           ((not in) #f)
           ((null? in) (reverse out))
           (else (case (car in)
                   ((#\\)
                     (if (pair? (cdr in))
                         (compile (cddr in)
                                  (cons (cadr in) out))
                         #f))
                   ((#\^ #\$ #\.)
                     (compile (cdr in)
                              (cons (list (car in)) out)))
                   ((#\* #\?)
                     (compile (cdr in)
                              (if (null? out)
                                  (cons (car in) out)
                                  (cons (list (car in) (car out))
                                        (cdr out)))))
                   ((#\+)
                     (compile (cdr in)
                              (if (null? out)
                                  (cons (car in) out)
                                  (cons (list #\* (car out)) out))))
                   ((#\[)
                     (apply compile
                            (compile-class (cdr in) out '(#\[) #t)))
                   (else
                     (compile (cdr in)
                              (cons (car in) out)))))))))
    (compile (string->list re) '())))

(define (match-char p c)
  (cond ((char? p)
          (char=? p c))
        ((char=? #\. (car p)) #t)
        ((char=? #\[ (car p))
          (and (memv c (cdr p)) #t))
        ((char=? #\] (car p))
          (not (memv c (cdr p))))
        (else #f)))

(define (match-cre p s m)
  (cond
    ((null? p)
      (list->string (reverse m)))
    ((null? s)
      (if (equal? p '((#\$)))
          (match-cre '() '() m)
          #f))
    ((pair? (car p))
      (cond
        ((char=? #\* (caar p))
          (match-star p s m))
        ((char=? #\? (caar p))
          (if (match-char (cadar p) (car s))
              (match-cre (cdr p) (cdr s) (cons (car s) m))
              (match-cre (cdr p) s m)))
        ((match-char (car p) (car s))
          (match-cre (cdr p) (cdr s) (cons (car s) m)))
        (else #f)))
    ((char=? (car p) (car s))
      (match-cre (cdr p) (cdr s) (cons (car s) m)))
    (else #f)))

(define (make-choices p s m)
  (cond
    ((or (null? s)
         (not (match-char (cadar p) (car s))))
      (list (list s m)))
    (else (cons (list s m)
                (make-choices p (cdr s) (cons (car s) m))))))

(define (match-star p s m)
  (letrec
    ((try-choices
       (lambda (c*)
         (if (null? c*)
             #f
             (let ((r (match-cre (cdr p) (caar c*) (cadar c*))))
               (if r (list->string (append (reverse m) (string->list r)))
                     (try-choices (cdr c*))))))))
    (try-choices (reverse (make-choices p s '())))))

(define (try-matches p s)
  (cond ((null? s) #f)
    (else (let ((r (match-cre p s '())))
            (if (or (not r) (string=? "" r))
                (try-matches p (cdr s))
                r)))))

(define (re-match cre s)
  (if (and (pair? cre) (equal? '(#\^) (car cre)))
      (match-cre (cdr cre) (string->list s) '())
      (try-matches cre (string->list s))))

Copyright (C) 2007 Nils M Holm <nmh @ t3x . org>