Let's write β

プログラミング中にできたことか、思ったこととか

SuffixArrayを使った簡易検索エンジンやってみた。

文字列の検索アルゴリズムについてしらべていたら、良いチュートリアルを発見したので
やってみました。
簡単なWebサーチエンジンの作り方

(defun all-suffix-node (str)
  (loop for s from 1 to (length str)
       collect (cons s (subseq str (1- s)))))

(defun suffix-array (str)
  (mapcar #'car (sort (all-suffix-node str)
		      (lambda (a b) (string<= (cdr a) (cdr b))))))

(defun suffix (str idx)
  (subseq str (1- idx)))

(defun prefix-p (str sub)
  (when (>= (length str) (length sub))
    (string= (subseq str 0 (length sub))
	     sub)))

(defun sfx-search (str sub)
  (labels ((search% (str sub sfx-array)
	     (when (not (null sfx-array))
	       (let* ((center (ash (length sfx-array) -1))
		      (center-idx (nth center sfx-array))
		      (center-str (suffix str center-idx)))
		 (cond ((string< center-str sub)
			(search% str sub (nthcdr (1+ center) sfx-array)))
		       ((string= center-str sub)
			(cons center-idx
			      (search% str sub (nthcdr (1+ center) sfx-array))))
		       ((string> center-str sub)
			(cond ((prefix-p center-str sub)
			       (list center-idx
				     (search% str sub (subseq sfx-array 0 center))
				     (search% str sub (nthcdr (1+ center) sfx-array))))
			      (t (search% str sub (subseq sfx-array 0 center))))))))))
    (search% str sub (suffix-array str))))

実行結果は

CL-USER> (sfx-search "abcbccab" "ab")
(1 7)

とりあえず第三課題まで。