Whoosh
オンラインドキュメント
https://pypi.org/project/Whoosh/
https://whoosh.readthedocs.org/en/latest/
インストール
pip install whoosh |
1 |
使い方
以下の内容で whoosh_sample.py を作成する。
ディレクトリ dirpath の ファイル \*.txt を対象に search_word を含む文書を検索する。
from whoosh.index import create_in |
from whoosh.fields import Schema, TEXT, ID, NGRAM |
from whoosh.qparser import QueryParser |
import os |
import glob |
dirpath = "C:/Users/eagle_eight/text/" |
search_word = "backup" |
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=NGRAM(stored=True)) |
ix = create_in(dirpath, schema) |
os.chdir(dirpath) |
files = glob.glob("*.txt") |
writer = ix.writer() |
for file in files : |
f = open(file, "r", encoding="utf-8") |
contents = f.read() |
f.close() |
writer.add_document(title=file, path=dirpath, content=contents) |
writer.commit() |
with ix.searcher() as searcher: |
query = QueryParser("content", ix.schema).parse(search_word) |
results = searcher.search(query) |
for result in results : |
serach_hit_file_path = result["path"] + result["title"] |
print(serach_hit_file_path) |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |