Pandas Basics

Use the Template to explore the basic functionality of Pandas. Create new cells with # %% as necessary.

Use the Data Management section and the Pandas Documentation for help.

Template
# %%
# Import Pandas


# %%
# Create a Pandas Series with the values 1-5 [pd.Series]


# %%
# Create a Pandas DataFrame with three columns, "x", "y", "z",
# with values 1-5, 10-50, and A-E, respectively [pd.DataFrame]


# %%
# Inspect the DataFrame [.info, .describe]


# %%
# Index the DataFrame to pick out the:
# - "x" column
# - "x" and "y" columns
# - [3, 30, C] row
# - 40 cell
# Use both label- and number-based indexing [.loc, .iloc]


# %%
# Add a new variable [.assign]:
# - "a", that has the value 1 for all rows
# - "b", that is the squared (pow 2) value of "x"


# %%
# Sort the DataFrame on [.sort_values, .sort_index]:
# - "x", ascending
# - "y", descending
# - the index, descending


# %%
# Using method chaining, create a DataFrame as above but in one step:
# - DataFrame with columns "x", "y", and "z"
# - Add the columns "a" and "b"
# - Sort on "y", descending
Solution
# %%
# Import Pandas
import pandas as pd

# %%
# Create a Pandas Series with the values 1-5 [pd.Series]
my_series = pd.Series([1, 2, 3, 4, 5])

# Or
my_series = pd.Series(range(1, 6))

my_series

# %%
# Create a Pandas DataFrame with three columns, "x", "y", "z",
# with values 1-5, 10-50, and A-E, respectively [pd.DataFrame]
df_1 = pd.DataFrame(
    {
        "x": range(1, 6),
        "y": range(10, 51, 10),
        "z": list("ABCDE"),  # or ["A", "B", "C", "D", "E"]
    }
)

df_1

# %%
# Inspect the DataFrame [.info, .describe]
df_1.info()
df_1.describe()

# %%
# Index the DataFrame to pick out the:
# - "x" column
# - "x" and "y" columns
# - [3, 30, C] row
# - 40 cell
# Use both label- and number-based indexing [.loc, .iloc]
df_1["x"]

df_1[["x", "y"]]

df_1.loc[2]
df_1.iloc[2]

df_1.loc[3, "y"]
df_1.iloc[3, 1]

# %%
# Add a new variable [.assign]:
# - "a", that has the value 1 for all rows
# - "b", that is the squared (pow 2) value of "x"

df_2 = df_1.assign(a=1, b=lambda x: x["x"] ** 2)

df_2

# %%
# Sort the DataFrame on [.sort_values, .sort_index]:
# - "x", ascending
# - "y", descending
# - the index, descending

df_2.sort_values("x")

df_2.sort_values("y", ascending=False)

df_2.sort_index(ascending=False)

# %%
# Using method chaining, create a DataFrame as above but in one step:
# - DataFrame with columns "x", "y", and "z"
# - Add the columns "a" and "b"
# - Sort on "y", descending

df = (
    pd.DataFrame(
        {
            "x": range(1, 6),
            "y": range(10, 51, 10),
            "z": list("ABCDE"),  # or ["A", "B", "C", "D", "E"]
        }
    )
    .assign(a=1, b=lambda x: x["x"] ** 2)
    .sort_values("y", ascending=False)
)

df