Re-comp compiles a regular expression (RE) and returns
it. Compiled REs are represented by lists. Re-match
matches a compiled RE against a string. When (part of) the string
matches the RE, it returns the matching part. When the strings
does not match, it returns #f.| . | match any character |
|---|---|
| [char...] | match character class (may contain ranges of the form c1-c2) |
| ^ | match beginning of line |
| $ | match end of line |
| * | match zero or more instances of the preceding pattern |
| + | match one or more instances of the preceding pattern |
| ? | match the preceding pattern optionally |
| re-comp: re | regular expression |
|---|---|
| re-match: cre | compiled regular expression |
| re-match: s | string to match |
(re-match (re-comp "[a-z]*") "123 test") => "test" (re-match (re-comp "^[a-z]*$") "123 test") => #f
(define (make-range c0 cn cls)
(if (> c0 cn)
cls
(make-range (+ 1 c0)
cn
(cons (integer->char c0) cls))))
(define (compile-class in out cls first)
(cond
((null? in) #f)
((char=? #\] (car in))
(list (cdr in) (cons (reverse cls) out)))
((and first (char=? #\^ (car in)))
(compile-class (cdr in) out '(#\]) #f))
((and (not first)
(not (null? (cdr cls)))
(char=? #\- (car in))
(pair? (cdr in))
(not (char=? #\] (cadr in))))
(let ((c0 (char->integer (car cls)))
(cn (char->integer (cadr in))))
(if (< c0 cn)
(compile-class (cddr in)
out
(make-range c0 cn (cdr cls)) #f)
(compile-class (cdr in)
out
(cons #\- cls) #f))))
(else
(compile-class (cdr in)
out
(cons (car in) cls) #f))))
(define (re-comp re)
(letrec
((compile
(lambda (in out)
(cond
((not in) #f)
((null? in) (reverse out))
(else (case (car in)
((#\\)
(if (pair? (cdr in))
(compile (cddr in)
(cons (cadr in) out))
#f))
((#\^ #\$ #\.)
(compile (cdr in)
(cons (list (car in)) out)))
((#\* #\?)
(compile (cdr in)
(if (null? out)
(cons (car in) out)
(cons (list (car in) (car out))
(cdr out)))))
((#\+)
(compile (cdr in)
(if (null? out)
(cons (car in) out)
(cons (list #\* (car out)) out))))
((#\[)
(apply compile
(compile-class (cdr in) out '(#\[) #t)))
(else
(compile (cdr in)
(cons (car in) out)))))))))
(compile (string->list re) '())))
(define (match-char p c)
(cond ((char? p)
(char=? p c))
((char=? #\. (car p)) #t)
((char=? #\[ (car p))
(and (memv c (cdr p)) #t))
((char=? #\] (car p))
(not (memv c (cdr p))))
(else #f)))
(define (match-cre p s m)
(cond
((null? p)
(list->string (reverse m)))
((null? s)
(if (equal? p '((#\$)))
(match-cre '() '() m)
#f))
((pair? (car p))
(cond
((char=? #\* (caar p))
(match-star p s m))
((char=? #\? (caar p))
(if (match-char (cadar p) (car s))
(match-cre (cdr p) (cdr s) (cons (car s) m))
(match-cre (cdr p) s m)))
((match-char (car p) (car s))
(match-cre (cdr p) (cdr s) (cons (car s) m)))
(else #f)))
((char=? (car p) (car s))
(match-cre (cdr p) (cdr s) (cons (car s) m)))
(else #f)))
(define (make-choices p s m)
(cond
((or (null? s)
(not (match-char (cadar p) (car s))))
(list (list s m)))
(else (cons (list s m)
(make-choices p (cdr s) (cons (car s) m))))))
(define (match-star p s m)
(letrec
((try-choices
(lambda (c*)
(if (null? c*)
#f
(let ((r (match-cre (cdr p) (caar c*) (cadar c*))))
(if r (list->string (append (reverse m) (string->list r)))
(try-choices (cdr c*))))))))
(try-choices (reverse (make-choices p s '())))))
(define (try-matches p s)
(cond ((null? s) #f)
(else (let ((r (match-cre p s '())))
(if (or (not r) (string=? "" r))
(try-matches p (cdr s))
r)))))
(define (re-match cre s)
(if (and (pair? cre) (equal? '(#\^) (car cre)))
(match-cre (cdr cre) (string->list s) '())
(try-matches cre (string->list s))))