پایتون؛ کتابخانه pandas

# Drop rows with NaN values

df_no_nan_rows = df.dropna()


# Shuffle the rows of the DataFrame

shuffled_df = (df_no_nan_rows.sample(frac=1).reset_index(drop=True))

###################

df = pd.read_csv('flights.csv')

df[df['arr_delay'].isnull()]

output ==>list data if there is null in  arr_delay column,.


df['arr_delay'].value_counts()    #returns count on arr_delay groupby


pd.crosstab(df['arr_delay'], df['dep_delay'], margins=True)  #it is look like groupby two fields and show result as matrix, Sth like pivot table while each column are in row and column header and count as inside values


###################

import pandas as pd

data = pd.read_csv(r"melb_data.csv")

data.head()  #first 5 records

data.tail() #last 5 records

data.columns    #returns all column titles


df[ (df['Price']> 1800000) & (df['Rooms'] < 3) 
df['new_rooms'] = df['Rooms'] + 1    # create new column

df['Type'].unique()   # distinct type

df['Type'].value_counts()     #group by type count


#same result

average_by_origin = df.groupby('origin')['arr_delay'].mean()   # group by type, mean(arr_delay) in each origin

average_by_origin = df.groupby("origin", as_index=True).agg({'arr_delay' : 'mean'})

###############################

# Defining the columns to read

usecols = ["Time"]    # it is case sensitive

data = pd.read_csv(r"flowdata.csv", index_col="Time", usecols=usecols)


data.info()    #return column list

data.describe()      #return statistical parameters list mean, ..............

data.to_csv("XYZ.csv", sep="\t" , encoding='utf-8', header=False) #export to CSV file with tab delimiter, without header

###############################

import pandas as pd

names = ['Ahmad' ,'Ali', 'Arash']

ages = [20,30,40]

# create series from array

pd_ages = pd.Series(ages, index=['n1','n2','n3'])

families = {

    "n1": "Mostofi",

    "n2": "Masoudi",

    'n3': None

}

pd_families = pd.Series(families)

pd_names = pd.Series(names, index=['n1','n2','n3'])


df = pd.DataFrame([pd_names, pd_ages, pd_families], index=['r1','r2','r3'] )

print (df)

df.loc["r2","n2"]

df.loc[:, 'n2']

df.drop('r2', inplace=True)    #inplace=True  ==> do it on df

df['n1']  = '*'      #==> set n1 column with *



نظرات 0 + ارسال نظر
امکان ثبت نظر جدید برای این مطلب وجود ندارد.