pandas数据分析练习记录
【摘要】
pandas数据分析练习
# coding=utf-8""" @Project :pachong-master @File :list_series.py @Author :gaojs @Date :2022/6/5 22:06 @Blogs : https://www.gao...
pandas数据分析练习
-
# coding=utf-8
-
"""
-
@Project :pachong-master
-
@File :list_series.py
-
@Author :gaojs
-
@Date :2022/6/5 22:06
-
@Blogs : https://www.gaojs.com.cn
-
"""
-
import pandas as pd
-
import numpy as np
-
-
-
class Pandas:
-
"""
-
pandas类库练习
-
"""
-
def __init__(self):
-
pass
-
-
def list_series(self):
-
"""
-
列表转换成series:索引为数字
-
:return:
-
"""
-
course = ["语文", "数学", "英语", "计算机"]
-
data = pd.Series(data=course)
-
print(data)
-
-
def dict_series(self):
-
"""
-
字典转换成series:索引为key值
-
:return:
-
"""
-
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
-
data = pd.Series(data=grades)
-
print(data)
-
-
def series_list(self):
-
"""
-
series转换成list
-
:return:
-
"""
-
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
-
data = pd.Series(data=grades)
-
members = data.to_list()
-
print(members)
-
-
def series_dataframe(self):
-
"""
-
series转换成dataframe
-
:return:
-
"""
-
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
-
data = pd.Series(data=grades)
-
df = pd.DataFrame(data, columns=['grade'])
-
print(df)
-
-
def numpy_create_series(self):
-
"""
-
numpy创建series
-
:return:
-
"""
-
s = pd.Series(
-
# 树脂:10-90,间隔10
-
np.arange(10, 100, 10),
-
# 索引:101-109,间隔1
-
index=np.arange(101, 110),
-
# 类型:float64
-
dtype='float'
-
)
-
print(s)
-
-
def series_datatype(self):
-
"""
-
数据类型转换
-
:return:
-
"""
-
s = pd.Series(
-
data=["001", "002", "003", "004"],
-
index=list("abcd")
-
)
-
# s = s.astype(int)
-
# 效果一样,map里的int是函数
-
s = s.map(int)
-
print(s)
-
-
def series_add_ele(self):
-
"""
-
给series添加新元素
-
:return:
-
"""
-
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
-
data = pd.Series(data=grades)
-
-
data = data.append(pd.Series({
-
"物理": 99,
-
"高数": 88
-
}))
-
print(data)
-
-
def series_to_dataframe(self):
-
"""
-
series转换成dataframe
-
:return:
-
"""
-
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
-
data = pd.Series(data=grades)
-
df = data.reset_index()
-
df.columns = ['course', 'grade']
-
print(df)
-
-
def dict_create_dataframe(self):
-
"""
-
使用字典创建dataframe
-
:return:
-
"""
-
df = pd.DataFrame(
-
{
-
"姓名": ["张三", "李四", "王麻子", "高先生"],
-
"性别": ["男", "女", "男", "女"],
-
"年龄": [18, 19, 20, 21]
-
}
-
)
-
print(df)
-
-
def set_dataframe_index(self):
-
"""
-
给dataframe设置索引列
-
:return:
-
"""
-
df = pd.DataFrame(
-
{
-
"姓名": ["张三", "李四", "王麻子", "高先生"],
-
"性别": ["男", "女", "男", "女"],
-
"年龄": [18, 19, 20, 21]
-
}
-
)
-
df.set_index("姓名", inplace=True)
-
print(df)
-
-
def dataMonth(self):
-
"""
-
一个月份所有日期
-
:return:
-
"""
-
# date_range = pd.date_range(start='2022-05-01', end='2022-05-31')
-
date_range = pd.date_range(start='2022-05-01', periods=31)
-
print(date_range)
-
-
def mondayDate(self):
-
"""
-
一年所有的周一日期
-
:return:
-
"""
-
# data_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='W-Mon')
-
data_range = pd.date_range(start='2022-01-01', periods=52, freq='W-Mon')
-
print(data_range)
-
-
def dayHourDate(self):
-
"""
-
一日所有的小时
-
:return:
-
"""
-
# data_range = pd.date_range(start='2022-01-01', periods=24, freq='H')
-
data_range = pd.date_range(start='2022-01-01', end='2022-01-02', freq='H', closed='left')
-
print(data_range)
-
-
def dateToDataframe(self):
-
"""
-
日期生成DataFrame
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-5-01', periods=31, freq='D')
-
# print(data_range)
-
df = pd.DataFrame(data=data_range, columns=['day'])
-
df['day_of_year'] = df['day'].dt.dayofyear
-
print(df)
-
-
def dateToRandomDataFrame(self):
-
"""
-
日期随机生成dataframe:均匀 正态 二项分布
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
-
data = {
-
'norm': np.random.normal(loc=0, scale=1, size=1000),
-
'unifom': np.random.uniform(low=0, high=1, size=1000),
-
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
-
}
-
df = pd.DataFrame(data=data, index=data_range)
-
print(df)
-
-
def logHeadLine(self):
-
"""
-
打印前10行
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
-
data = {
-
'norm': np.random.normal(loc=0, scale=1, size=1000),
-
'unifom': np.random.uniform(low=0, high=1, size=1000),
-
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
-
}
-
df = pd.DataFrame(data=data, index=data_range)
-
print(df.head(10))
-
print(df.tail(5))
-
-
def catDataFrame(self):
-
"""
-
查看dataframe基本信息
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
-
data = {
-
'norm': np.random.normal(loc=0, scale=1, size=1000),
-
'unifom': np.random.uniform(low=0, high=1, size=1000),
-
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
-
}
-
df = pd.DataFrame(data=data, index=data_range)
-
print(df.info())
-
print(df.describe())
-
-
def countDataAppear(self):
-
"""
-
统计数据出现次数
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
-
data = {
-
'norm': np.random.normal(loc=0, scale=1, size=1000),
-
'unifom': np.random.uniform(low=0, high=1, size=1000),
-
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
-
}
-
df = pd.DataFrame(data=data, index=data_range)
-
print(df['binomial'].value_counts())
-
-
def saveCSV(self):
-
"""
-
保存前一百行存入csv
-
:return:
-
"""
-
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
-
data = {
-
'norm': np.random.normal(loc=0, scale=1, size=1000),
-
'unifom': np.random.uniform(low=0, high=1, size=1000),
-
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
-
}
-
df = pd.DataFrame(data=data, index=data_range)
-
df.head(10).to_csv('分布数据前50.csv')
-
-
def csvLoadDataframe(self):
-
"""
-
csv加载成dataframe
-
:return:
-
"""
-
df = pd.read_csv('分布数据前50.csv', index_col=0)
-
print(df.info())
-
print(df.head())
-
-
-
# test = Pandas()
-
# test.csvLoadDataframe()
未完待续
文章来源: blog.csdn.net,作者:懿曲折扇情,版权归原作者所有,如需转载,请联系作者。
原文链接:blog.csdn.net/qq_41332844/article/details/126837419
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)