搜索引擎以及各种网站中的搜索功能已经是人们从海量信息中快速获取特定信息的常用方式。使用 Redis 可以搭建高性能、多特性的搜索引擎,也特别适合解决基于搜索的问题。
本实训项目从构建反向索引,基本搜索操作,实现搜索三个方面介绍如何使用 Redis 解决基于搜索的问题。
第1关:构建反向索引
#!/usr/bin/env python #-*- coding:utf-8 -*- import re import redis conn = redis.Redis() # 文本序列化 def tokenize(content): # 请在下面完成要求的功能 #********* Begin *********# words = set() for word in re.findall("[a-z]{2,}", content.lower()): if len(word) >= 2: words.add(word) return words #********* End *********# # 创建文本的反向索引 def index_document(content): # 请在下面完成要求的功能 #********* Begin *********# content_id = conn.incr("content:id") conn.hset("contents", content_id, content) words = tokenize(content) pipeline = conn.pipeline(True) for word in words: pipeline.sadd('keyword:' + word, content_id) pipeline.execute() #********* End *********#第2关:基本搜索操作
#!/usr/bin/env python #-*- coding:utf-8 -*- import re import uuid import redis conn = redis.Redis() # 解析检索式 def parse(query): # 请在下面完成要求的功能 #********* Begin *********# unwanted = set() wanted = [] synonym = set() for qword in re.findall("[+-]?[a-z]{2,}", query.lower()): prefix = qword[:1] if prefix in '+-': qword = qword[1:] else: prefix = None if prefix == '-': unwanted.add(qword) continue if synonym and not prefix: wanted.append(list(synonym)) synonym = set() synonym.add(qword) if synonym: wanted.append(list(synonym)) return wanted, list(unwanted) #********* End *********# # 对集合进行交/并/差操作,并暂存至临时集合 def set_common(method, names): # 请在下面完成要求的功能 #********* Begin *********# common_id = str(uuid.uuid4()) names = ["keyword:" + name for name in names] getattr(conn, method)("keyword:" + common_id, *names) conn.expire("keyword:" + common_id, 60) return common_id #********* End *********#第3关:实现搜索
#!/usr/bin/env python #-*- coding:utf-8 -*- import re import uuid import redis conn = redis.Redis() # 执行搜索 def search(query): # 请在下面完成要求的功能 #********* Begin *********# wanted, unwanted = parse(query) if not wanted: return None to_intersect = [] for qwords in wanted: if len(qwords) > 1: to_intersect.append(set_common("sunionstore", qwords)) else: to_intersect.append(qwords[0]) if len(to_intersect) > 1: result = set_common("sinterstore", to_intersect) else: result = to_intersect[0] if unwanted: unwanted.insert(0, result) result = set_common("sdiffstore", unwanted) return conn.smembers("keyword:" + result) #********* End *********# # 解析检索式 def parse(query): unwanted = set() wanted = [] synonym = set() for qword in re.findall("[+-]?[a-z]{2,}", query.lower()): prefix = qword[:1] if prefix in '+-': qword = qword[1:] else: prefix = None if prefix == '-': unwanted.add(qword) continue if synonym and not prefix: wanted.append(list(synonym)) synonym = set() synonym.add(qword) if synonym: wanted.append(list(synonym)) return wanted, list(unwanted) # 对集合进行交/并/差操作,并暂存至临时集合 def set_common(method, names): common_id = str(uuid.uuid4()) names = ["keyword:" + name for name in names] getattr(conn, method)("keyword:" + common_id, *names) conn.expire("keyword:" + common_id, 60) return common_id