爬取百度指数

1 代码

  • github地址

  • demo.py

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    from baidu_index.utils import test_cookies
    from baidu_index import config
    from baidu_index import BaiduIndex, ExtendedBaiduIndex

    import openpyxl //写入Excel

    cookies = "BDUSS=HREQUt......"

    if __name__ == "__main__":
    // 1 写入Excel---表头
    workbook = openpyxl.Workbook()
    sheet = workbook.create_sheet("四姑娘山海拔")
    sheet.cell(1,1,'type')
    sheet.cell(1,2,'date')
    sheet.cell(1,3,'index')

    // 2 测试cookies是否配置正确
    # True为配置成功,False为配置不成功
    print(test_cookies(cookies))

    # 3 keywords = [['英雄联盟'], ['冠军杯', '英雄联盟'], ['抑郁', '自杀', '明星']]
    keywords = [['四姑娘山海拔']]


    # 4 获取城市代码, 将代码传入area可以获取不同城市的指数, 不传则为全国
    # 媒体指数不能分地区获取
    # print(config.PROVINCE_CODE)
    # print(config.CITY_CODE)

    # 5-1获取百度搜索指数
    # index: {'keyword': ['抑郁', '自杀', '明星'], 'type': 'wise', 'date': '2018-06-10', 'index': '1835'}
    baidu_index = BaiduIndex(
    keywords=keywords,
    start_date='2019-01-01',
    end_date='2019-09-02',
    cookies=cookies
    )
    dirrindex = 1
    for index in baidu_index.get_index():
    dirrindex = dirrindex + 1
    print(index)
    sheet.cell(dirrindex,1,index['type'])
    sheet.cell(dirrindex,2,index['date'])
    sheet.cell(dirrindex,3,index['index'])
    workbook.save("c.xlsx")

    # # 5-2 获取百度媒体指数
    # # index: {'keyword': ['抑郁', '自杀', '明星'], 'date': '2018-12-29', 'index': '0'}
    # news_index = ExtendedBaiduIndex(
    # keywords=keywords,
    # start_date='2019-01-01',
    # end_date='2019-01-02',
    # cookies=cookies,
    # kind='news'
    # )
    # newsindex = 1
    # print('-----news-----')

    # for index in news_index.get_index():
    # newsindex = newsindex + 1
    # # print(type(index),'hhhh')
    # print(index)
    # # print(index.keyword)
    # # print(index.type)
    # # print(index.date)
    # # print(index.index)
    # # sheet.cell(newsindex,1,index['keyword'])
    # # sheet.cell(newsindex,2,index['type'])
    # sheet.cell(newsindex,3,index['date'])
    # sheet.cell(newsindex,4,index['index'])


    # # 5-3 获取百度咨询指数
    # # index: {'keyword': ['抑郁', '自杀', '明星'], 'date': '2018-12-29', 'index': '1102911'}
    # feed_index = ExtendedBaiduIndex(
    # keywords=keywords,
    # start_date='2019-01-01',
    # end_date='2019-09-02',
    # cookies=cookies,
    # kind='feed'
    # )
    # feedindex = 2
    # print('-----feed-----')
    # for index in feed_index.get_index():
    # feedindex = feedindex + 1
    # # type(index)

    # print(index)
    # # sheet.cell(feedindex,1,index['keyword'])
    # # sheet.cell(feedindex,2,index['type'])
    # sheet.cell(feedindex,3,index['date'])
    # sheet.cell(feedindex,4,index['index'])
    # workbook.save("c.xls")

2 注意事项

  • 安装requests

    1
    根目录下执行:pip install requests
  • cookies的值只取BDUSS=....这部分

  • keywords设置查询关键词

    1
    2
    keywords = [['英雄联盟'], ['冠军杯', '英雄联盟'], ['抑郁', '自杀', '明星']]
    keywords = [['四姑娘山']]
  • area设置查询地区,不设置为全国,地区码在输出最开始

  • start_date='2018-01-01',end_date='2019-01-01',设置开始和结束时间