Skip to content

Commit 38f2711

Browse files
authored
Minor updates for v0.10.13 (#154)
* + update version * + minor updates to copyright year * + included ecdf option in eda.plot_int_dist * + improved plot_int_dist * + minor refactoring in plot_int_dist + minor bugfix in convert_dtypes
1 parent 662b574 commit 38f2711

File tree

5 files changed

+19
-12
lines changed

5 files changed

+19
-12
lines changed

LICENSE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
This License pertains to `oreum_core` hosted on
44
[Oreum Industries' GitHub](https://github.com/oreum-industries/oreum_core)
55

6-
Copyright 2024 Oreum FZCO t/a Oreum Industries. All rights reserved.
6+
Copyright 2025 Oreum FZCO t/a Oreum Industries. All rights reserved.
77
Oreum FZCO, IFZA, Dubai Silicon Oasis, Dubai, UAE, reg. 25515
88
[oreum.io](https://oreum.io)
99

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
[![code style: ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
1010
[![code style: interrogate](https://raw.githubusercontent.com/oreum-industries/oreum_core/master/assets/img/interrogate_badge.svg)](https://pypi.org/project/interrogate/)
1111
[![code security: bandit](https://img.shields.io/badge/code%20security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)
12-
<!-- [![Conda Forge](https://img.shields.io/conda/vn/oreum-industries/oreum_core.svg)](https://anaconda.org/oreum-industries/oreum_core) -->
1312

1413
---
1514

oreum_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import logging
1818

19-
__version__ = "0.10.12"
19+
__version__ = "0.10.13"
2020

2121
# logger goes to null handler by default
2222
# packages that import oreum_core can override this and direct elsewhere

oreum_core/curate/data_transform.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ def convert_dtypes(self, dfraw: pd.DataFrame) -> pd.DataFrame:
113113

114114
for ft in self.ftsd["fbool"] + self.ftsd["fbool_nan_to_false"]:
115115
# tame string, strip, lower, use self.bool_dict, use pd.NA
116-
# if not isinstance(df.dtypes[ft], bool):
117-
if df.dtypes[ft].type != np.bool:
116+
if not isinstance(df.dtypes[ft], bool):
117+
# if df.dtypes[ft].type != bool:
118118
df[ft] = df[ft].apply(lambda x: str(x).strip().lower())
119119
df.loc[df[ft].isin(self.strnans), ft] = pd.NA
120120
df[ft] = df[ft].apply(lambda x: self.bool_dict.get(x, x))
@@ -123,7 +123,7 @@ def convert_dtypes(self, dfraw: pd.DataFrame) -> pd.DataFrame:
123123
df.loc[df[ft].isnull(), ft] = False
124124

125125
set_tf_only = set(df[ft].unique())
126-
if set_tf_only in set([True, False]): # most common, use np.bool
126+
if len(set_tf_only - set([True, False])) == 0: # most common
127127
df[ft] = df[ft].astype(bool)
128128
elif pd.isnull(df[ft]).sum() > 0: # contains NaNs, use pd.boolean
129129
df[ft] = df[ft].convert_dtypes(convert_boolean=True)

oreum_core/eda/plot.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,19 @@ def plot_int_dist(
273273
vsize: float = 1.5,
274274
bins: int = None,
275275
plot_zeros: bool = True,
276+
ecdf: bool = False,
276277
**kwargs,
277278
) -> figure.Figure:
278279
"""Plot group counts as histogram (optional log)"""
279-
# handle under/over selecting fts
280+
kws_hist = dict(stat="count")
281+
legpos = "upper right"
282+
t = "Empirical distribution"
283+
if ecdf:
284+
kws_hist = dict(stat="proportion", cumulative=True)
285+
legpos = "lower right"
286+
t += " ECDF"
287+
288+
# handles under/over selecting fts
280289
fts = list(set.intersection(set(df.columns.tolist()), set(fts)))
281290
if len(fts) == 0:
282291
return None
@@ -295,17 +304,16 @@ def plot_int_dist(
295304
ax = sns.histplot(
296305
df.loc[df[ft].notnull(), ft],
297306
kde=False,
298-
stat="count",
299307
bins=bins,
300308
label=f"NaNs: {n_nans}, zeros: {n_zeros}, mean: {mean:.2f}, med: {med:.2f}",
301309
color=sns.color_palette()[i % 7],
302310
ax=ax1d[i][0],
311+
**kws_hist,
303312
)
304313
if log:
305-
_ = ax.set(yscale="log", title=ft, ylabel="log(count)")
306-
_ = ax.set(title=ft, ylabel="count", xlabel=None) # 'value'
307-
_ = ax.legend(loc="upper right")
308-
t = "Empirical distribution"
314+
_ = ax.set(yscale="log", title=ft, ylabel=f"log({kws_hist['stat']})")
315+
_ = ax.set(title=ft, ylabel=kws_hist["stat"], xlabel=None)
316+
_ = ax.legend(loc=legpos)
309317
txtadd = kwargs.pop("txtadd", None)
310318
_ = f.suptitle(" - ".join(filter(None, [t, "ints", txtadd])), y=1, fontsize=14)
311319
_ = f.tight_layout(pad=0.9)

0 commit comments

Comments
 (0)