Pandas Python Programming Examples
Enhance your data manipulation skills with these comprehensive pandas examples.
1. Creating a DataFrame
import pandas as pd
# Creating a DataFrame from a dictionary
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print(df)
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
2. Reading a CSV File
import pandas as pd
# Reading a CSV file into a DataFrame
df = pd.read_csv('students.csv')
print(df)
Output:
Name Age Grade
0 David 22 A
1 Eva 23 B
2 Frank 21 A
0 David 22 A
1 Eva 23 B
2 Frank 21 A
3. Selecting Columns
import pandas as pd
data = {
'Product': ['Laptop', 'Tablet', 'Smartphone'],
'Price': [1200, 600, 800],
'Stock': [30, 50, 100]
}
df = pd.DataFrame(data)
# Selecting the 'Product' and 'Price' columns
selected_columns = df[['Product', 'Price']]
print(selected_columns)
Output:
Product Price
0 Laptop 1200
1 Tablet 600
2 Smartphone 800
0 Laptop 1200
1 Tablet 600
2 Smartphone 800
4. Filtering Rows
import pandas as pd
data = {
'Name': ['Anna', 'Brian', 'Catherine', 'David'],
'Age': [28, 34, 29, 42],
'Department': ['HR', 'Engineering', 'Marketing', 'Engineering']
}
df = pd.DataFrame(data)
# Filtering employees older than 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)
Output:
Name Age Department
1 Brian 34 Engineering
3 David 42 Engineering
1 Brian 34 Engineering
3 David 42 Engineering
5. Adding a New Column
import pandas as pd
data = {
'Item': ['Pen', 'Notebook', 'Eraser'],
'Price': [1.5, 3.0, 0.5]
}
df = pd.DataFrame(data)
# Adding a new column 'Quantity'
df['Quantity'] = [100, 200, 150]
print(df)
Output:
Item Price Quantity
0 Pen 1.5 100
1 Notebook 3.0 200
2 Eraser 0.5 150
0 Pen 1.5 100
1 Notebook 3.0 200
2 Eraser 0.5 150
6. Grouping and Aggregation
import pandas as pd
data = {
'Department': ['Sales', 'Sales', 'HR', 'HR', 'Engineering', 'Engineering'],
'Employee': ['John', 'Emma', 'Sam', 'Olivia', 'Liam', 'Sophia'],
'Salary': [50000, 55000, 45000, 47000, 70000, 72000]
}
df = pd.DataFrame(data)
# Grouping by Department and calculating average salary
grouped = df.groupby('Department')['Salary'].mean().reset_index()
print(grouped)
Output:
Department Salary
0 Engineering 71000.0
1 HR 46000.0
2 Sales 52500.0
0 Engineering 71000.0
1 HR 46000.0
2 Sales 52500.0
7. Handling Missing Data
import pandas as pd
import numpy as np
data = {
'Name': ['Tom', 'Jerry', 'Mickey', 'Donald'],
'Age': [25, np.nan, 30, 22],
'Score': [85, 90, np.nan, 75]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
# Filling missing values
df_filled = df.fillna({
'Age': df['Age'].mean(),
'Score': df['Score'].mean()
})
print("\nDataFrame after filling missing values:")
print(df_filled)
Output:
Original DataFrame:
Name Age Score
0 Tom 25.0 85.0
1 Jerry NaN 90.0
2 Mickey 30.0 NaN
3 Donald 22.0 75.0
DataFrame after filling missing values:
Name Age Score
0 Tom 25.000000 85.0
1 Jerry 25.666667 90.0
2 Mickey 30.000000 83.333333
3 Donald 22.000000 75.0
Explanation:
• The missing Age value for Jerry is filled with the mean age: (25 + 30 + 22) / 3 ≈ 25.6667
• The missing Score value for Mickey is filled with the mean score: (85 + 90 + 75) / 3 ≈ 83.3333
Original DataFrame:
Name Age Score
0 Tom 25.0 85.0
1 Jerry NaN 90.0
2 Mickey 30.0 NaN
3 Donald 22.0 75.0
DataFrame after filling missing values:
Name Age Score
0 Tom 25.000000 85.0
1 Jerry 25.666667 90.0
2 Mickey 30.000000 83.333333
3 Donald 22.000000 75.0
Explanation:
• The missing Age value for Jerry is filled with the mean age: (25 + 30 + 22) / 3 ≈ 25.6667
• The missing Score value for Mickey is filled with the mean score: (85 + 90 + 75) / 3 ≈ 83.3333
8. Merging DataFrames
import pandas as pd
# First DataFrame
df_employees = pd.DataFrame({
'EmployeeID': [1, 2, 3],
'Name': ['Alice', 'Bob', 'Charlie']
})
# Second DataFrame
df_salary = pd.DataFrame({
'EmployeeID': [1, 2, 4],
'Salary': [70000, 80000, 50000]
})
# Merging DataFrames on 'EmployeeID'
merged_df = pd.merge(df_employees, df_salary, on='EmployeeID', how='left')
print(merged_df)
Output:
EmployeeID Name Salary
0 1 Alice 70000.0
1 2 Bob 80000.0
2 3 Charlie NaN
Explanation:
• EmployeeID 3 (Charlie) does not have a corresponding salary entry, resulting in NaN.
0 1 Alice 70000.0
1 2 Bob 80000.0
2 3 Charlie NaN
Explanation:
• EmployeeID 3 (Charlie) does not have a corresponding salary entry, resulting in NaN.
9. Sorting Data
import pandas as pd
data = {
'Student': ['Zara', 'Mike', 'Adam', 'Kelly'],
'Score': [88, 92, 95, 85]
}
df = pd.DataFrame(data)
# Sorting by Score in descending order
sorted_df = df.sort_values(by='Score', ascending=False)
print(sorted_df)
Output:
Student Score
2 Adam 95
1 Mike 92
0 Zara 88
3 Kelly 85
2 Adam 95
1 Mike 92
0 Zara 88
3 Kelly 85
10. Pivot Tables
import pandas as pd
data = {
'Date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02'],
'Product': ['A', 'B', 'A', 'B'],
'Sales': [100, 150, 200, 250]
}
df = pd.DataFrame(data)
# Creating a pivot table
pivot = pd.pivot_table(df, values='Sales', index='Date', columns='Product', aggfunc='sum')
print(pivot)
Output:
Product A B
Date
2024-01-01 100.0 150.0
2024-01-02 200.0 250.0
Date
2024-01-01 100.0 150.0
2024-01-02 200.0 250.0
11. Using apply
to Transform Data
import pandas as pd
# Sample DataFrame
data = {
'Name': ['alice', 'BOB', 'Charlie'],
'Age': [25, 30, 35]
}
df = pd.DataFrame(data)
# Capitalizing the names using apply
df['Name'] = df['Name'].apply(lambda x: x.capitalize())
print(df)
Output:
Name Age
0 Alice 25
1 Bob 30
2 Charlie 35
0 Alice 25
1 Bob 30
2 Charlie 35
12. Handling Date and Time Data
import pandas as pd
# Creating a DataFrame with date strings
data = {
'Event': ['Start', 'Middle', 'End'],
'Date': ['2024-01-01', '2024-06-15', '2024-12-31']
}
df = pd.DataFrame(data)
# Converting 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])
# Extracting year, month, and day
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
print(df)
Output:
Event Date Year Month Day
0 Start 2024-01-01 2024 1 1
1 Middle 2024-06-15 2024 6 15
2 End 2024-12-31 2024 12 31
0 Start 2024-01-01 2024 1 1
1 Middle 2024-06-15 2024 6 15
2 End 2024-12-31 2024 12 31
13. Removing Duplicate Rows
import pandas as pd
# DataFrame with duplicate rows
data = {
'ID': [1, 2, 2, 3, 4, 4, 4],
'Name': ['Alice', 'Bob', 'Bob', 'Charlie', 'David', 'David', 'David'],
'Score': [85, 90, 90, 95, 80, 80, 80]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
# Removing duplicate rows
df_unique = df.drop_duplicates()
print("\nDataFrame after removing duplicates:")
print(df_unique)
Output:
Original DataFrame:
ID Name Score
0 1 Alice 85
1 2 Bob 90
2 2 Bob 90
3 3 Charlie 95
4 4 David 80
5 4 David 80
6 4 David 80
DataFrame after removing duplicates:
ID Name Score
0 1 Alice 85
1 2 Bob 90
3 3 Charlie 95
4 4 David 80
Original DataFrame:
ID Name Score
0 1 Alice 85
1 2 Bob 90
2 2 Bob 90
3 3 Charlie 95
4 4 David 80
5 4 David 80
6 4 David 80
DataFrame after removing duplicates:
ID Name Score
0 1 Alice 85
1 2 Bob 90
3 3 Charlie 95
4 4 David 80
14. Renaming Columns
import pandas as pd
# Sample DataFrame
data = {
'fname': ['Alice', 'Bob', 'Charlie'],
'age_years': [25, 30, 35],
'cty': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
# Renaming columns
df.rename(columns={'fname': 'Name', 'age_years': 'Age', 'cty': 'City'}, inplace=True)
print("\nDataFrame after renaming columns:")
print(df)
Output:
Original DataFrame:
fname age_years cty
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
DataFrame after renaming columns:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
Original DataFrame:
fname age_years cty
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
DataFrame after renaming columns:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
15. Selecting Rows with iloc
and loc
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
'Age': [25, 30, 35, 40, 22],
'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
# Selecting rows by index using iloc (e.g., first three rows)
first_three = df.iloc[:3]
print("\nFirst three rows using iloc:")
print(first_three)
# Selecting rows by label using loc (e.g., rows with index 2 to 4)
rows_2_to_4 = df.loc[2:4]
print("\nRows with index 2 to 4 using loc:")
print(rows_2_to_4)
Output:
Original DataFrame:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
3 David 40 Houston
4 Eva 22 Phoenix
First three rows using iloc:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
Rows with index 2 to 4 using loc:
Name Age City
2 Charlie 35 Chicago
3 David 40 Houston
4 Eva 22 Phoenix
Original DataFrame:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
3 David 40 Houston
4 Eva 22 Phoenix
First three rows using iloc:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
Rows with index 2 to 4 using loc:
Name Age City
2 Charlie 35 Chicago
3 David 40 Houston
4 Eva 22 Phoenix
16. Concatenating DataFrames
import pandas as pd
# First DataFrame
df1 = pd.DataFrame({
'ID': [1, 2, 3],
'Name': ['Alice', 'Bob', 'Charlie']
})
# Second DataFrame
df2 = pd.DataFrame({
'ID': [4, 5],
'Name': ['David', 'Eva']
})
# Concatenating vertically
df_vertical = pd.concat([df1, df2], ignore_index=True)
print("Vertically Concatenated DataFrame:")
print(df_vertical)
# Third DataFrame for horizontal concatenation
df3 = pd.DataFrame({
'Age': [25, 30, 35, 40, 22]
})
# Concatenating horizontally
df_horizontal = pd.concat([df_vertical, df3], axis=1)
print("\nHorizontally Concatenated DataFrame:")
print(df_horizontal)
Output:
Vertically Concatenated DataFrame:
ID Name
0 1 Alice
1 2 Bob
2 3 Charlie
3 4 David
4 5 Eva
Horizontally Concatenated DataFrame:
ID Name Age
0 1 Alice 25
1 2 Bob 30
2 3 Charlie 35
3 4 David 40
4 5 Eva 22
Vertically Concatenated DataFrame:
ID Name
0 1 Alice
1 2 Bob
2 3 Charlie
3 4 David
4 5 Eva
Horizontally Concatenated DataFrame:
ID Name Age
0 1 Alice 25
1 2 Bob 30
2 3 Charlie 35
3 4 David 40
4 5 Eva 22
17. Exporting DataFrame to CSV
import pandas as pd
# Sample DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print("DataFrame to be exported:")
print(df)
# Exporting to CSV
df.to_csv('exported_data.csv', index=False)
print("\nDataFrame has been exported to 'exported_data.csv')
Output:
DataFrame to be exported:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
DataFrame has been exported to ‘exported_data.csv’
After running the program, the
DataFrame to be exported:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Charlie 35 Chicago
DataFrame has been exported to ‘exported_data.csv’
After running the program, the
exported_data.csv
file will contain:Name,Age,City
Alice,25,New York
Bob,30,Los Angeles
Charlie,35,Chicago