Replies: 1 comment
-
Instead of modifying your df and losing categorical columns like "Make", apply the outlier removal only on numeric columns: import pandas as pd
import numpy as np
# Select only numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns
# Compute IQR only for numeric columns
Q1 = df[numeric_cols].quantile(0.25)
Q3 = df[numeric_cols].quantile(0.75)
IQR = Q3 - Q1
# Remove outliers only from numeric columns, keeping categorical ones
df = df[~((df[numeric_cols] < (Q1 - 1.5 * IQR)) | (df[numeric_cols] > (Q3 + 1.5 * IQR))).any(axis=1)] This keeps categorical columns (like "Make") intact while filtering out numerical outliers. |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Excuse me, as I was executing the cell "
df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
df.shape
"
I got an TypeError as below, I've tried to remove columns that types are not int or float. But if I removed that, I couldn't run the bar chart with "Make" category in the next step.
Is there any solution to this problem? Thanks a lot!
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, errors, try_cast, mgr)
1414 with np.errstate(all='ignore'):
-> 1415 result = get_result(other)
1416
~\Anaconda3\lib\site-packages\pandas\core\internals.py in get_result(other)
1382 else:
-> 1383 result = func(values, other)
1384
TypeError: '<' not supported between instances of 'str' and 'float'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
in
----> 1 df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
2 df.shape
~\Anaconda3\lib\site-packages\pandas\core\ops.py in f(self, other)
1618 return _combine_series_frame(self, other, func,
1619 fill_value=None, axis=None,
-> 1620 level=None, try_cast=False)
1621 else:
1622
~\Anaconda3\lib\site-packages\pandas\core\ops.py in _combine_series_frame(self, other, func, fill_value, axis, level, try_cast)
1437 # default axis is columns
1438 return self._combine_match_columns(other, func, level=level,
-> 1439 try_cast=try_cast)
1440
1441
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _combine_match_columns(self, other, func, level, try_cast)
4771 new_data = left._data.eval(func=func, other=right,
4772 axes=[left.columns, self.index],
-> 4773 try_cast=try_cast)
4774 return self._constructor(new_data)
4775
~\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, **kwargs)
3685
3686 def eval(self, **kwargs):
-> 3687 return self.apply('eval', **kwargs)
3688
3689 def quantile(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3579
3580 kwargs['mgr'] = self
-> 3581 applied = getattr(b, f)(**kwargs)
3582 result_blocks = _extend_blocks(applied, result_blocks)
3583
~\Anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, errors, try_cast, mgr)
1420 raise
1421 except Exception as detail:
-> 1422 result = handle_error()
1423
1424 # technically a broadcast error in numpy can 'work' by returning a
~\Anaconda3\lib\site-packages\pandas\core\internals.py in handle_error()
1403 raise TypeError(
1404 'Could not operate {other!r} with block values '
-> 1405 '{detail!s}'.format(other=other, detail=detail)) # noqa
1406 else:
1407 # return the values
TypeError: Could not operate array([nan, nan, nan, nan]) with block values '<' not supported between instances of 'str' and 'float'
Beta Was this translation helpful? Give feedback.
All reactions