# Drop rows with NaN values
df_no_nan_rows = df.dropna()
# Shuffle the rows of the DataFrame
shuffled_df = (df_no_nan_rows.sample(frac=1).reset_index(drop=True))
###################
df = pd.read_csv('flights.csv')
df[df['arr_delay'].isnull()]
output ==>list data if there is null in arr_delay column,.
df['arr_delay'].value_counts() #returns count on arr_delay groupby
pd.crosstab(df['arr_delay'], df['dep_delay'], margins=True) #it is look like groupby two fields and show result as matrix, Sth like pivot table while each column are in row and column header and count as inside values
###################
import pandas as pd
data = pd.read_csv(r"melb_data.csv")
data.head() #first 5 records
data.tail() #last 5 records
data.columns #returns all column titles
df[ (df['Price']> 1800000) & (df['Rooms'] < 3)
df['new_rooms'] = df['Rooms'] + 1 # create new column
df['Type'].unique() # distinct type
df['Type'].value_counts() #group by type count
#same result
average_by_origin = df.groupby('origin')['arr_delay'].mean() # group by type, mean(arr_delay) in each origin
average_by_origin = df.groupby("origin", as_index=True).agg({'arr_delay' : 'mean'})
###############################
# Defining the columns to read
usecols = ["Time"] # it is case sensitive
data = pd.read_csv(r"flowdata.csv", index_col="Time", usecols=usecols)
data.info() #return column list
data.describe() #return statistical parameters list mean, ..............
data.to_csv("XYZ.csv", sep="\t" , encoding='utf-8', header=False) #export to CSV file with tab delimiter, without header
###############################
import pandas as pd
names = ['Ahmad' ,'Ali', 'Arash']
ages = [20,30,40]
# create series from array
pd_ages = pd.Series(ages, index=['n1','n2','n3'])
families = {
"n1": "Mostofi",
"n2": "Masoudi",
'n3': None
}
pd_families = pd.Series(families)
pd_names = pd.Series(names, index=['n1','n2','n3'])
df = pd.DataFrame([pd_names, pd_ages, pd_families], index=['r1','r2','r3'] )
print (df)
df.loc["r2","n2"]
df.loc[:, 'n2']
df.drop('r2', inplace=True) #inplace=True ==> do it on df
df['n1'] = '*' #==> set n1 column with *