Python实现简单的索引排序与搜索功能_Python

Python实现简单的索引排序与搜索功能

2021-10-09 00:21Mr.Pan_学狂 Python

这篇文章主要介绍了Python实现简单的索引排序与搜索功能,本文通过实例代码给大家介绍的非常详细，对大家的学习或工作具有一定的参考借鉴价值，需要的朋友可以参考下

今天，我上的课，学了索引排序与搜索。让我们用python实现，觉得有点意思就跟大家分享一波。

代码如下图：

				?

									import requests

									import re

									def News_Spider():#定义一个爬虫

									    url = 'https://news.sina.com.cn/'#url地址，新浪新闻

									    headers = {#请求头

									        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'

									    }

									    response = requests.get(url,headers,verify=False)#针对https，采用verify=False

									    response.encoding='utf-8'#编码方式

									    html = response.text#获取页面源代码

									    #print(html)#打印源代码

									    reg = 'target="_blank">(.*?)</a>'#设置规则

									    content = re.findall(reg,html)#从页面源代码中筛选

									    ls = []#定义一个空列表

									    for c in content:

									        if '<' in c:

									            continue

									        else:

									            if len(c) > 6 and '客户端' not in c:

									                #print(c)

									                ls.append(c)

									            else:

									                continue

									    docu_set = {}#定义一个字典

									    for l in range(len(ls)):

									        docu_set['d{}'.format(l+1)] = ls[l]#格式化方法，从1开始

									    return docu_set

									def change_set():

									    all_words = []#定义一个空列表用于存储

									    docu_set = News_Spider()

									    for i in docu_set.values():

									        cut = i.split()#分词

									        all_words.extend(cut)#添加分词

									    set_all_words = set(all_words)

									    return set_all_words

									    #print(set_all_words)

									def reverse_index():

									    invert_index = dict()#定义空字典

									    set_all_words = change_set()#将返回值传递给变量

									    docu_set = News_Spider()

									    for b in set_all_words:

									        temp = []

									        for k in docu_set.keys():

									            field = docu_set[k]

									            split_field = field.split()

									            if b in split_field:

									                temp.append(k)

									        invert_index[b] = temp

									    print(invert_index)

									    return invert_index

									def Select():

									    docu_set = News_Spider()

									    invert_index = reverse_index()

									    news = []

									    # for i in invert_index:

									    #     print(invert_index[i])

									    while True:

									        Find = str(input('请输入查找内容：'))

									        if Find == '不查了':

									            break

									        for Contetnt in invert_index:#循环每一个键

									            if Find in Contetnt:#如果输入在键的字符串中

									                Result = invert_index[Contetnt]#循环出字典中每一个对应的值

									                #print(Result)

									                for r in Result:#循环每一个值

									                    if r in docu_set.keys():#如果值在字典中

									                        news.append(docu_set[r])#列表增加字典docu_set的值

									                        print(docu_set[r])#打印输出字典的值

									                    else:

									                        continue

									            else:

									                if Find not in Contetnt:

									                    news.append('很抱歉，没有找到更多内容！！')

									        #news = set(news)

									        for n in news:

									            if '很抱歉' in n:

									                print(n)

									                break

									            else:

									                print(n)

									def main_function():#定义一个主方法

									    News_Spider()

									    change_set()

									    reverse_index()

									    Select()

									if __name__ == '__main__':#程序入口

									    main_function()