爬虫动态采集

2022/1/17 6:09:19

本文主要是介绍爬虫动态采集,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

爬取各地区gdp数据并保存

import requests
import json.encoder
import pandas as pd
import matplotlib.pyplot as plt
# 获取数据
def getData():
    headers = {
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.116 Safari/537.36',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-Mode': 'cors',
        'Referer': 'https://data.stats.gov.cn/easyquery.htm?cn=E0103',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
    }

    params = (
        ('m', 'QueryData'),
        ('dbcode', 'fsnd'),
        ('rowcode', 'reg'),
        ('colcode', 'sj'),
        ('wds', '[{"wdcode":"zb","valuecode":"A020101"}]'),
        ('dfwds', '[]'),
        ('k1', '1625471171166'),
    )

    # 1、获取数据
    response = requests.get('https://data.stats.gov.cn/easyquery.htm', headers=headers, params=params, verify=False)

    # 2、解析数据
    data=json.loads(response.content)
    name=data['returndata']['wdnodes'][0]['nodes'][0]['cname']
    datanodes=data['returndata']['datanodes']
    columns=data['returndata']['wdnodes'][2]['nodes']
    rows=data['returndata']['wdnodes'][1]['nodes']

    # 3、写入数据
    # 数据标题
    column=[]
    column.append(name)
    for temp in columns:
        column.append(temp['cname'])
    
    temp_contents=[]
    index=0
    for row in rows:
        temp_content=[]
        temp_content.append(row['cname'])
        for i in range(index,index+10):
            temp_content.append(datanodes[i]['data']['data'])
        index+=10
        temp_contents.append(temp_content)
    return column,temp_contents,name

# 保存数据
def save_csv(path,column,temp_contents):
    df=pd.DataFrame(temp_contents,columns=column)
    df.to_csv(path,mode='w',index=False,encoding='utf-8')
def get(path):
    df_year=['2020','2019','2018','2017','2016','2015','2014','2013','2012','2011']
    df=pd.read_csv('地区生产总值.csv')
    for i in range(31):
        area=df.loc[i]
        title=area[0]
        df_data=[]
        for i in range(1,11):
        
            df_data.append(area[i])
        plt.rcParams['font.sans-serif']=['SimHei']
        plt.title(title)
        plt.plot(df_year, df_data,)
        plt.show()
if __name__ == "__main__":
    # 获取数据
    column,temp_contents,name=getData()
    path=name+'.csv'
    # 保存数据
    save_csv(path,column,temp_contents)
    get(path)



这篇关于爬虫动态采集的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!


扫一扫关注最新编程教程