pandas数据分析练习记录
        【摘要】 
                     
 pandas数据分析练习 
 # coding=utf-8"""    @Project :pachong-master     @File    :list_series.py    @Author  :gaojs    @Date    :2022/6/5 22:06    @Blogs   : https://www.gao...
    
    
    
    pandas数据分析练习
   
    - 
     
      
     
     
      
       # coding=utf-8
      
     
 
    - 
     
      
     
     
      
       """
      
     
 
    - 
     
      
     
     
      
        @Project :pachong-master 
      
     
 
    - 
     
      
     
     
      
        @File :list_series.py
      
     
 
    - 
     
      
     
     
      
        @Author :gaojs
      
     
 
    - 
     
      
     
     
      
        @Date :2022/6/5 22:06
      
     
 
    - 
     
      
     
     
      
        @Blogs : https://www.gaojs.com.cn
      
     
 
    - 
     
      
     
     
      
       """
      
     
 
    - 
     
      
     
     
      
       import pandas as pd
      
     
 
    - 
     
      
     
     
      
       import numpy as np
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
      
       class Pandas:
      
     
 
    - 
     
      
     
     
          """
      
     
 
    - 
     
      
     
     
      
        pandas类库练习
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
          def __init__(self):
      
     
 
    - 
     
      
     
     
              pass
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def list_series(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        列表转换成series:索引为数字
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               course = ["语文", "数学", "英语", "计算机"]
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=course)
      
     
 
    - 
     
      
     
     
              print(data)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def dict_series(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        字典转换成series:索引为key值
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=grades)
      
     
 
    - 
     
      
     
     
              print(data)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def series_list(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        series转换成list
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=grades)
      
     
 
    - 
     
      
     
     
      
               members = data.to_list()
      
     
 
    - 
     
      
     
     
              print(members)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def series_dataframe(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        series转换成dataframe
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=grades)
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data, columns=['grade'])
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def numpy_create_series(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        numpy创建series
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               s = pd.Series(
      
     
 
    - 
     
      
     
     
                  # 树脂:10-90,间隔10
      
     
 
    - 
     
      
     
     
      
                   np.arange(10, 100, 10),
      
     
 
    - 
     
      
     
     
                  # 索引:101-109,间隔1
      
     
 
    - 
     
      
     
     
      
                   index=np.arange(101, 110),
      
     
 
    - 
     
      
     
     
                  # 类型:float64
      
     
 
    - 
     
      
     
     
      
                   dtype='float'
      
     
 
    - 
     
      
     
     
      
               )
      
     
 
    - 
     
      
     
     
              print(s)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def series_datatype(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        数据类型转换
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               s = pd.Series(
      
     
 
    - 
     
      
     
     
      
                   data=["001", "002", "003", "004"],
      
     
 
    - 
     
      
     
     
      
                   index=list("abcd")
      
     
 
    - 
     
      
     
     
      
               )
      
     
 
    - 
     
      
     
     
              # s = s.astype(int)
      
     
 
    - 
     
      
     
     
              # 效果一样,map里的int是函数
      
     
 
    - 
     
      
     
     
      
               s = s.map(int)
      
     
 
    - 
     
      
     
     
              print(s)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def series_add_ele(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        给series添加新元素
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=grades)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
      
               data = data.append(pd.Series({
      
     
 
    - 
     
      
     
     
                  "物理": 99,
      
     
 
    - 
     
      
     
     
                  "高数": 88
      
     
 
    - 
     
      
     
     
      
               }))
      
     
 
    - 
     
      
     
     
              print(data)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def series_to_dataframe(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        series转换成dataframe
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
      
     
 
    - 
     
      
     
     
      
               data = pd.Series(data=grades)
      
     
 
    - 
     
      
     
     
      
               df = data.reset_index()
      
     
 
    - 
     
      
     
     
      
               df.columns = ['course', 'grade']
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def dict_create_dataframe(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        使用字典创建dataframe
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(
      
     
 
    - 
     
      
     
     
      
                   {
      
     
 
    - 
     
      
     
     
                      "姓名": ["张三", "李四", "王麻子", "高先生"],
      
     
 
    - 
     
      
     
     
                      "性别": ["男", "女", "男", "女"],
      
     
 
    - 
     
      
     
     
                      "年龄": [18, 19, 20, 21]
      
     
 
    - 
     
      
     
     
      
                   }
      
     
 
    - 
     
      
     
     
      
               )
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def set_dataframe_index(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        给dataframe设置索引列
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(
      
     
 
    - 
     
      
     
     
      
                   {
      
     
 
    - 
     
      
     
     
                      "姓名": ["张三", "李四", "王麻子", "高先生"],
      
     
 
    - 
     
      
     
     
                      "性别": ["男", "女", "男", "女"],
      
     
 
    - 
     
      
     
     
                      "年龄": [18, 19, 20, 21]
      
     
 
    - 
     
      
     
     
      
                   }
      
     
 
    - 
     
      
     
     
      
               )
      
     
 
    - 
     
      
     
     
      
               df.set_index("姓名", inplace=True)
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
              
      
     
 
    - 
     
      
     
     
          def dataMonth(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        一个月份所有日期
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
              # date_range = pd.date_range(start='2022-05-01', end='2022-05-31')
      
     
 
    - 
     
      
     
     
      
               date_range = pd.date_range(start='2022-05-01', periods=31)
      
     
 
    - 
     
      
     
     
              print(date_range)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def mondayDate(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        一年所有的周一日期
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
              # data_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='W-Mon')
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-01-01', periods=52, freq='W-Mon')
      
     
 
    - 
     
      
     
     
              print(data_range)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def dayHourDate(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        一日所有的小时
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
              # data_range = pd.date_range(start='2022-01-01', periods=24, freq='H')
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-01-01', end='2022-01-02', freq='H', closed='left')
      
     
 
    - 
     
      
     
     
              print(data_range)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def dateToDataframe(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        日期生成DataFrame
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-5-01', periods=31, freq='D')
      
     
 
    - 
     
      
     
     
              # print(data_range)
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data_range, columns=['day'])
      
     
 
    - 
     
      
     
     
      
               df['day_of_year'] = df['day'].dt.dayofyear
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def dateToRandomDataFrame(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        日期随机生成dataframe:均匀 正态 二项分布
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
      
     
 
    - 
     
      
     
     
      
               data = {
      
     
 
    - 
     
      
     
     
                  'norm': np.random.normal(loc=0, scale=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'unifom': np.random.uniform(low=0, high=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'binomial': np.random.binomial(n=1, p=0.2, size=1000)
      
     
 
    - 
     
      
     
     
      
               }
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data, index=data_range)
      
     
 
    - 
     
      
     
     
              print(df)
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def logHeadLine(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        打印前10行
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
      
     
 
    - 
     
      
     
     
      
               data = {
      
     
 
    - 
     
      
     
     
                  'norm': np.random.normal(loc=0, scale=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'unifom': np.random.uniform(low=0, high=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'binomial': np.random.binomial(n=1, p=0.2, size=1000)
      
     
 
    - 
     
      
     
     
      
               }
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data, index=data_range)
      
     
 
    - 
     
      
     
     
              print(df.head(10))
      
     
 
    - 
     
      
     
     
              print(df.tail(5))
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def catDataFrame(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        查看dataframe基本信息
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
      
     
 
    - 
     
      
     
     
      
               data = {
      
     
 
    - 
     
      
     
     
                  'norm': np.random.normal(loc=0, scale=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'unifom': np.random.uniform(low=0, high=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'binomial': np.random.binomial(n=1, p=0.2, size=1000)
      
     
 
    - 
     
      
     
     
      
               }
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data, index=data_range)
      
     
 
    - 
     
      
     
     
              print(df.info())
      
     
 
    - 
     
      
     
     
              print(df.describe())
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def countDataAppear(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        统计数据出现次数
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
      
     
 
    - 
     
      
     
     
      
               data = {
      
     
 
    - 
     
      
     
     
                  'norm': np.random.normal(loc=0, scale=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'unifom': np.random.uniform(low=0, high=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'binomial': np.random.binomial(n=1, p=0.2, size=1000)
      
     
 
    - 
     
      
     
     
      
               }
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data, index=data_range)
      
     
 
    - 
     
      
     
     
              print(df['binomial'].value_counts())
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def saveCSV(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        保存前一百行存入csv
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
      
     
 
    - 
     
      
     
     
      
               data = {
      
     
 
    - 
     
      
     
     
                  'norm': np.random.normal(loc=0, scale=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'unifom': np.random.uniform(low=0, high=1, size=1000),
      
     
 
    - 
     
      
     
     
                  'binomial': np.random.binomial(n=1, p=0.2, size=1000)
      
     
 
    - 
     
      
     
     
      
               }
      
     
 
    - 
     
      
     
     
      
               df = pd.DataFrame(data=data, index=data_range)
      
     
 
    - 
     
      
     
     
      
               df.head(10).to_csv('分布数据前50.csv')
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
          def csvLoadDataframe(self):
      
     
 
    - 
     
      
     
     
              """
      
     
 
    - 
     
      
     
     
      
        csv加载成dataframe
      
     
 
    - 
     
      
     
     
      
        :return:
      
     
 
    - 
     
      
     
     
      
        """
      
     
 
    - 
     
      
     
     
      
               df = pd.read_csv('分布数据前50.csv', index_col=0)
      
     
 
    - 
     
      
     
     
              print(df.info())
      
     
 
    - 
     
      
     
     
              print(df.head())
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
       
      
     
 
    - 
     
      
     
     
      
       # test = Pandas()
      
     
 
    - 
     
      
     
     
      
       # test.csvLoadDataframe()
      
     
 
   
  
   
 未完待续
文章来源: blog.csdn.net,作者:懿曲折扇情,版权归原作者所有,如需转载,请联系作者。
原文链接:blog.csdn.net/qq_41332844/article/details/126837419
        【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
            cloudbbs@huaweicloud.com
        
        
        
        
        
        
        - 点赞
 - 收藏
 - 关注作者
 
            
           
评论(0)