Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Improve error message when tansfrom() with incorrect axis #58494

Merged
merged 11 commits into from
May 10, 2024
3 changes: 2 additions & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,8 @@ def normalize_dictlike_arg(

cols = Index(list(func.keys())).difference(obj.columns, sort=True)
if len(cols) > 0:
raise KeyError(f"Column(s) {list(cols)} do not exist")
# GH 58474
raise KeyError(f"Label(s) {list(cols)} do not exist")

aggregator_types = (list, tuple, dict)

Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,15 @@ def test_dict_nested_renaming_depr(method):
def test_missing_column(method, func):
# GH 40004
obj = DataFrame({"A": [1]})
match = re.escape("Column(s) ['B'] do not exist")
with pytest.raises(KeyError, match=match):
msg = r"Label\(s\) \['B'\] do not exist"
with pytest.raises(KeyError, match=msg):
getattr(obj, method)(func)


def test_transform_mixed_column_name_dtypes():
# GH39025
df = DataFrame({"a": ["1"]})
msg = r"Column\(s\) \[1, 'b'\] do not exist"
msg = r"Label\(s\) \[1, 'b'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.transform({"a": int, 1: str, "b": int})

Expand Down Expand Up @@ -359,3 +359,15 @@ def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
obj.transform(op)


def test_transform_missing_labels_raises():
# GH 58474
df = DataFrame({"foo": [2, 4, 6], "bar": [1, 2, 3]}, index=["A", "B", "C"])
msg = r"Label\(s\) \['A', 'B'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.transform({"A": lambda x: x + 2, "B": lambda x: x * 2}, axis=0)

msg = r"Label\(s\) \['bar', 'foo'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.transform({"foo": lambda x: x + 2, "bar": lambda x: x * 2}, axis=1)
5 changes: 2 additions & 3 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import datetime
import functools
from functools import partial
import re

import numpy as np
import pytest
Expand Down Expand Up @@ -816,8 +815,8 @@ def test_agg_relabel_other_raises(self):

def test_missing_raises(self):
df = DataFrame({"A": [0, 1], "B": [1, 2]})
match = re.escape("Column(s) ['C'] do not exist")
with pytest.raises(KeyError, match=match):
msg = r"Label\(s\) \['C'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.groupby("A").agg(c=("C", "sum"))

def test_agg_namedtuple(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_aggregate_api_consistency():
expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])

msg = r"Column\(s\) \['r', 'r2'\] do not exist"
msg = r"Label\(s\) \['r', 'r2'\] do not exist"
with pytest.raises(KeyError, match=msg):
grouped[["D", "C"]].agg({"r": "sum", "r2": "mean"})

Expand All @@ -224,7 +224,7 @@ def test_agg_dict_renaming_deprecation():
{"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}}
)

msg = r"Column\(s\) \['ma'\] do not exist"
msg = r"Label\(s\) \['ma'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.groupby("A")[["B", "C"]].agg({"ma": "max"})

Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_agg_consistency():

r = df.resample("3min")

msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
msg = r"Label\(s\) \['r1', 'r2'\] do not exist"
with pytest.raises(KeyError, match=msg):
r.agg({"r1": "mean", "r2": "sum"})

Expand All @@ -343,7 +343,7 @@ def test_agg_consistency_int_str_column_mix():

r = df.resample("3min")

msg = r"Column\(s\) \[2, 'b'\] do not exist"
msg = r"Label\(s\) \[2, 'b'\] do not exist"
with pytest.raises(KeyError, match=msg):
r.agg({2: "mean", "b": "sum"})

Expand Down Expand Up @@ -534,7 +534,7 @@ def test_agg_with_lambda(cases, agg):
],
)
def test_agg_no_column(cases, agg):
msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
msg = r"Label\(s\) \['result1', 'result2'\] do not exist"
with pytest.raises(KeyError, match=msg):
cases[["A", "B"]].agg(**agg)

Expand Down Expand Up @@ -582,7 +582,7 @@ def test_agg_specificationerror_series(cases, agg):
def test_agg_specificationerror_invalid_names(cases):
# errors
# invalid names in the agg specification
msg = r"Column\(s\) \['B'\] do not exist"
msg = r"Label\(s\) \['B'\] do not exist"
with pytest.raises(KeyError, match=msg):
cases[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})

Expand Down Expand Up @@ -631,7 +631,7 @@ def test_try_aggregate_non_existing_column():
df = DataFrame(data).set_index("dt")

# Error as we don't have 'z' column
msg = r"Column\(s\) \['z'\] do not exist"
msg = r"Label\(s\) \['z'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.resample("30min").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})

Expand Down