Pandas Python Examples

Pandas Python Programming Examples

Enhance your data manipulation skills with these comprehensive pandas examples.

1. Creating a DataFrame

import pandas as pd

# Creating a DataFrame from a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print(df)
Output:     Name Age City
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago

2. Reading a CSV File

import pandas as pd

# Reading a CSV file into a DataFrame
df = pd.read_csv('students.csv')
print(df)
Output:     Name Age Grade
    0 David 22 A
    1 Eva 23 B
    2 Frank 21 A

3. Selecting Columns

import pandas as pd

data = {
    'Product': ['Laptop', 'Tablet', 'Smartphone'],
    'Price': [1200, 600, 800],
    'Stock': [30, 50, 100]
}

df = pd.DataFrame(data)

# Selecting the 'Product' and 'Price' columns
selected_columns = df[['Product', 'Price']]
print(selected_columns)
Output:     Product Price
    0 Laptop 1200
    1 Tablet 600
    2 Smartphone 800

4. Filtering Rows

import pandas as pd

data = {
    'Name': ['Anna', 'Brian', 'Catherine', 'David'],
    'Age': [28, 34, 29, 42],
    'Department': ['HR', 'Engineering', 'Marketing', 'Engineering']
}

df = pd.DataFrame(data)

# Filtering employees older than 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)
Output:     Name Age Department
    1 Brian 34 Engineering
    3 David 42 Engineering

5. Adding a New Column

import pandas as pd

data = {
    'Item': ['Pen', 'Notebook', 'Eraser'],
    'Price': [1.5, 3.0, 0.5]
}

df = pd.DataFrame(data)

# Adding a new column 'Quantity'
df['Quantity'] = [100, 200, 150]
print(df)
Output:                     Item Price Quantity
    0 Pen 1.5 100
    1 Notebook 3.0 200
    2 Eraser 0.5 150

6. Grouping and Aggregation

import pandas as pd

data = {
    'Department': ['Sales', 'Sales', 'HR', 'HR', 'Engineering', 'Engineering'],
    'Employee': ['John', 'Emma', 'Sam', 'Olivia', 'Liam', 'Sophia'],
    'Salary': [50000, 55000, 45000, 47000, 70000, 72000]
}

df = pd.DataFrame(data)

# Grouping by Department and calculating average salary
grouped = df.groupby('Department')['Salary'].mean().reset_index()
print(grouped)
Output:     Department Salary
    0 Engineering 71000.0
    1 HR 46000.0
    2 Sales 52500.0

7. Handling Missing Data

import pandas as pd
import numpy as np

data = {
    'Name': ['Tom', 'Jerry', 'Mickey', 'Donald'],
    'Age': [25, np.nan, 30, 22],
    'Score': [85, 90, np.nan, 75]
}

df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

# Filling missing values
df_filled = df.fillna({
    'Age': df['Age'].mean(),
    'Score': df['Score'].mean()
})

print("\nDataFrame after filling missing values:")
print(df_filled)
Output:
Original DataFrame:
    Name Age Score
    0 Tom 25.0 85.0
    1 Jerry NaN 90.0
    2 Mickey 30.0 NaN
    3 Donald 22.0 75.0

DataFrame after filling missing values:
    Name Age Score
    0 Tom 25.000000 85.0
    1 Jerry 25.666667 90.0
    2 Mickey 30.000000 83.333333
    3 Donald 22.000000 75.0
Explanation:
• The missing Age value for Jerry is filled with the mean age: (25 + 30 + 22) / 3 ≈ 25.6667
• The missing Score value for Mickey is filled with the mean score: (85 + 90 + 75) / 3 ≈ 83.3333

8. Merging DataFrames

import pandas as pd

# First DataFrame
df_employees = pd.DataFrame({
    'EmployeeID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

# Second DataFrame
df_salary = pd.DataFrame({
    'EmployeeID': [1, 2, 4],
    'Salary': [70000, 80000, 50000]
})

# Merging DataFrames on 'EmployeeID'
merged_df = pd.merge(df_employees, df_salary, on='EmployeeID', how='left')
print(merged_df)
Output:     EmployeeID Name Salary
    0 1 Alice 70000.0
    1 2 Bob 80000.0
    2 3 Charlie NaN
Explanation:
• EmployeeID 3 (Charlie) does not have a corresponding salary entry, resulting in NaN.

9. Sorting Data

import pandas as pd

data = {
    'Student': ['Zara', 'Mike', 'Adam', 'Kelly'],
    'Score': [88, 92, 95, 85]
}

df = pd.DataFrame(data)

# Sorting by Score in descending order
sorted_df = df.sort_values(by='Score', ascending=False)
print(sorted_df)
Output:     Student Score
    2 Adam 95
    1 Mike 92
    0 Zara 88
    3 Kelly 85

10. Pivot Tables

import pandas as pd

data = {
    'Date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02'],
    'Product': ['A', 'B', 'A', 'B'],
    'Sales': [100, 150, 200, 250]
}

df = pd.DataFrame(data)

# Creating a pivot table
pivot = pd.pivot_table(df, values='Sales', index='Date', columns='Product', aggfunc='sum')
print(pivot)
Output:     Product A B
    Date
    2024-01-01 100.0 150.0
    2024-01-02 200.0 250.0

11. Using apply to Transform Data

import pandas as pd

# Sample DataFrame
data = {
    'Name': ['alice', 'BOB', 'Charlie'],
    'Age': [25, 30, 35]
}

df = pd.DataFrame(data)

# Capitalizing the names using apply
df['Name'] = df['Name'].apply(lambda x: x.capitalize())
print(df)
Output:             Name Age
    0 Alice 25
    1 Bob 30
    2 Charlie 35

12. Handling Date and Time Data

import pandas as pd

# Creating a DataFrame with date strings
data = {
    'Event': ['Start', 'Middle', 'End'],
    'Date': ['2024-01-01', '2024-06-15', '2024-12-31']
}

df = pd.DataFrame(data)

# Converting 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Extracting year, month, and day
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

print(df)
Output:     Event Date Year Month Day
    0 Start 2024-01-01 2024 1 1
    1 Middle 2024-06-15 2024 6 15
    2 End 2024-12-31 2024 12 31

13. Removing Duplicate Rows

import pandas as pd

# DataFrame with duplicate rows
data = {
    'ID': [1, 2, 2, 3, 4, 4, 4],
    'Name': ['Alice', 'Bob', 'Bob', 'Charlie', 'David', 'David', 'David'],
    'Score': [85, 90, 90, 95, 80, 80, 80]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Removing duplicate rows
df_unique = df.drop_duplicates()
print("\nDataFrame after removing duplicates:")
print(df_unique)
Output:
Original DataFrame:
    ID Name Score
    0 1 Alice 85
    1 2 Bob 90
    2 2 Bob 90
    3 3 Charlie 95
    4 4 David 80
    5 4 David 80
    6 4 David 80

DataFrame after removing duplicates:
    ID Name Score
    0 1 Alice 85
    1 2 Bob 90
    3 3 Charlie 95
    4 4 David 80

14. Renaming Columns

import pandas as pd

# Sample DataFrame
data = {
    'fname': ['Alice', 'Bob', 'Charlie'],
    'age_years': [25, 30, 35],
    'cty': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Renaming columns
df.rename(columns={'fname': 'Name', 'age_years': 'Age', 'cty': 'City'}, inplace=True)
print("\nDataFrame after renaming columns:")
print(df)
Output:
Original DataFrame:
    fname age_years cty
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago

DataFrame after renaming columns:
    Name Age City
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago

15. Selecting Rows with iloc and loc

import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 22],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# Selecting rows by index using iloc (e.g., first three rows)
first_three = df.iloc[:3]
print("\nFirst three rows using iloc:")
print(first_three)

# Selecting rows by label using loc (e.g., rows with index 2 to 4)
rows_2_to_4 = df.loc[2:4]
print("\nRows with index 2 to 4 using loc:")
print(rows_2_to_4)
Output:
Original DataFrame:
    Name Age City
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago
    3 David 40 Houston
    4 Eva 22 Phoenix

First three rows using iloc:
    Name Age City
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago

Rows with index 2 to 4 using loc:
    Name Age City
    2 Charlie 35 Chicago
    3 David 40 Houston
    4 Eva 22 Phoenix

16. Concatenating DataFrames

import pandas as pd

# First DataFrame
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

# Second DataFrame
df2 = pd.DataFrame({
    'ID': [4, 5],
    'Name': ['David', 'Eva']
})

# Concatenating vertically
df_vertical = pd.concat([df1, df2], ignore_index=True)
print("Vertically Concatenated DataFrame:")
print(df_vertical)

# Third DataFrame for horizontal concatenation
df3 = pd.DataFrame({
    'Age': [25, 30, 35, 40, 22]
})

# Concatenating horizontally
df_horizontal = pd.concat([df_vertical, df3], axis=1)
print("\nHorizontally Concatenated DataFrame:")
print(df_horizontal)
Output:
Vertically Concatenated DataFrame:
    ID Name
    0 1 Alice
    1 2 Bob
    2 3 Charlie
    3 4 David
    4 5 Eva

Horizontally Concatenated DataFrame:
    ID Name Age
    0 1 Alice 25
    1 2 Bob 30
    2 3 Charlie 35
    3 4 David 40
    4 5 Eva 22

17. Exporting DataFrame to CSV

import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print("DataFrame to be exported:")
print(df)

# Exporting to CSV
df.to_csv('exported_data.csv', index=False)
print("\nDataFrame has been exported to 'exported_data.csv')
Output:
DataFrame to be exported:
    Name Age City
    0 Alice 25 New York
    1 Bob 30 Los Angeles
    2 Charlie 35 Chicago

DataFrame has been exported to ‘exported_data.csv’
After running the program, the exported_data.csv file will contain:
Name,Age,City
Alice,25,New York
Bob,30,Los Angeles
Charlie,35,Chicago
© 2024 Pandas Python Examples. All rights reserved.

Leave a Comment

Your email address will not be published. Required fields are marked *