diff --git a/docs/files/Heatmap_monthly_yearly.png b/docs/files/Heatmap_monthly_yearly.png new file mode 100644 index 0000000..d593cf9 Binary files /dev/null and b/docs/files/Heatmap_monthly_yearly.png differ diff --git a/docs/files/Heatmap_yearly_profiles_wind.png b/docs/files/Heatmap_yearly_profiles_wind.png new file mode 100644 index 0000000..add93d7 Binary files /dev/null and b/docs/files/Heatmap_yearly_profiles_wind.png differ diff --git a/docs/files/Linear_regression.png b/docs/files/Linear_regression.png new file mode 100644 index 0000000..d8e834b Binary files /dev/null and b/docs/files/Linear_regression.png differ diff --git a/docs/files/Yearly_stripes.png b/docs/files/Yearly_stripes.png new file mode 100644 index 0000000..0686823 Binary files /dev/null and b/docs/files/Yearly_stripes.png differ diff --git a/docs/files/Yearly_vertical_profiles_current.png b/docs/files/Yearly_vertical_profiles_current.png new file mode 100644 index 0000000..687593a Binary files /dev/null and b/docs/files/Yearly_vertical_profiles_current.png differ diff --git a/docs/files/Yearly_vertical_profiles_wind.png b/docs/files/Yearly_vertical_profiles_wind.png new file mode 100644 index 0000000..ac28b87 Binary files /dev/null and b/docs/files/Yearly_vertical_profiles_wind.png differ diff --git a/docs/files/table_linear_regression.csv b/docs/files/table_linear_regression.csv new file mode 100644 index 0000000..e804f00 --- /dev/null +++ b/docs/files/table_linear_regression.csv @@ -0,0 +1,14 @@ +index,LS_slope,LS_intercept,LS_r_square,TS_slope,TS_intercept,TS_slope_lower,TS_slope_upper,Kendall_tau,Kendal_p_value +jan.,0.029711207446130317,-55.36514088996559,0.11816382666464985,0.025655241935483797,-47.54730342741921,-0.0126008064516129,0.07096774193548384,0.1948051948051948,0.21747455727039447 +feb.,-0.027866959956941727,57.608486263014086,0.11845730198747513,-0.028593749999999973,59.087466517857095,-0.06592261904761913,0.015277777777777748,-0.1948051948051948,0.21747455727039447 +mars,0.01768501484490265,-32.16031242600319,0.03870303537700306,0.024264705882352945,-45.23926707779887,-0.009879032258064477,0.055181451612903235,0.26406926406926406,0.09079907642795323 +april,0.0010476661020139201,0.005907914549234672,0.00024411362841211584,-0.002440476190476223,6.795535714285778,-0.030000000000000027,0.040722222222222194,-0.021645021645021644,0.9112884689584274 +mai,-0.009514398644833414,20.37348522795575,0.04579936634814474,-0.01908602150537633,39.273252688172015,-0.033064516129032384,0.006634897360703793,-0.2727272727272727,0.08028526277549888 +juni,-0.004398174289478636,10.135343967626573,0.014806729237167148,-0.003541666666666776,8.47114583333355,-0.02124999999999999,0.015708333333333324,-0.05627705627705628,0.7380940484479567 +juli,-0.005692974627055975,12.650315568022442,0.0337516274921531,-0.0015552995391705199,4.504349078341039,-0.021054147465437816,0.009646401985111668,-0.0303030303030303,0.8672669832921104 +aug.,0.0008877342853499994,-0.349576055080975,0.0008189992079955256,0.0018548387096774198,-2.2734274193548396,-0.013164136622390884,0.017679900744416884,0.0303030303030303,0.8672669832921104 +sep.,0.03726661020139281,-71.31799642386599,0.36449213098047584,0.04026041666666669,-77.17263020833337,0.013229166666666653,0.0629166666666667,0.4458874458874459,0.003207140388506757 +okt.,0.017707327735378236,-32.37476321014191,0.07780296692937472,0.014536290322580636,-26.135252016129012,-0.0141633064516129,0.054233870967742126,0.2121212121212121,0.17796763426213033 +nov.,0.02891821946169772,-54.03596955580651,0.20230887953096446,0.030370370370370332,-56.882199074074,0.0009444444444444736,0.06178030303030304,0.316017316017316,0.041288634586930426 +des.,0.010406003533633251,-17.15056692956409,0.020622058640743835,0.013709677419354804,-23.67379032258058,-0.021572580645161283,0.05241935483870974,0.16017316017316016,0.31397950542939346 +Year,0.008189232868174282,-13.846941088045558,0.2010254722688341,0.009867492576789694,-17.135340644259827,0.0012875215210719215,0.017266500622665003,0.35064935064935066,0.022775270000007175 diff --git a/docs/index.rst b/docs/index.rst index f09a678..5019b99 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1990,6 +1990,115 @@ Taylor diagram .. image:: files/Taylor_diagram.png :width: 500 + +Climate and trends +================== +Yearly stripes +-------------- + +.. code-block:: python + + plots.plot_yearly_stripes( + df, + var_name='HS', + method= 'mean', + ylabel='Hs [m]', + output_file='Yearly_stripes.png') + +.. image:: files/Yearly_stripes.png + :width: 500 + +Monthly and yearly heatmap +-------------------------- + +.. code-block:: python + + plots.plot_heatmap_monthly_yearly( + df, + var_name='T2m', + method= 'mean', + cb_label='2-m temperature [°C]', + output_file='Heatmap_monthly_yearly.png') + +.. image:: files/Heatmap_monthly_yearly.png + :width: 500 + +Yearly vertical profiles +------------------------ + +.. code-block:: python + + plots.plot_yearly_vertical_profiles( + df, + rad_colname='current_speed_', + method= 'mean', + yaxis_direction='down', + xlabel='Current speed [m/s]', + output_file='Yearly_vertical_profiles_current.png') + +.. image:: files/Yearly_vertical_profiles_current.png + :width: 500 + +.. code-block:: python + + plots.plot_yearly_vertical_profiles( + df, + rad_colname='W', + method= 'mean', + yaxis_direction='up', + xlabel='Wind speed [m/s]', + output_file='Yearly_vertical_profiles_wind.png') + +.. image:: files/Yearly_vertical_profiles_wind.png + :width: 500 + +.. code-block:: python + + plots.plot_heatmap_profiles_yearly( + df, + rad_colname='W', + cb_label='Wind speed [m/s]', + yaxis_direction='up', + method='P80', + output_file='Heatmap_yearly_profiles_wind.png') + +.. image:: files/Heatmap_yearly_profiles_wind.png + :width: 500 + +Linear regression Plot and Table +-------------------------------- + +.. code-block:: python + + plots.plot_yearly_vertical_profiles( + df, + var='HS', + time='Year', + stat='P70', + method=['Least-Squares','Theil-Sen'], + confidence_interval=0.95, + ylabel='Hs [m]', + output_figure='Linear_regression.png') + +.. image:: files/Linear_regression.png + :width: 500 + +.. code-block:: python + + tables.table_linear_regression( + df, + var='HS', + stat='mean', + method=['Least-Squares','Theil-Sen','Kendall-tau'], + confidence_interval=0.95, + intercept=True, + output_file='table_linear_regression.csv' + ) + +.. csv-table:: + :header-rows: 1 + :file: files/table_linear_regression.csv + Auxiliary Functions =================== diff --git a/examples/example_NORA_full.py b/examples/example_NORA_full.py index 770a9da..06f7058 100644 --- a/examples/example_NORA_full.py +++ b/examples/example_NORA_full.py @@ -130,7 +130,7 @@ # # fig = plots.plot_profile_monthly_stats(ds_ocean, var=['temp_' + d for d in depth], z=[float(d[:-1]) for d in depth], method='minimum',title='Min. Sea Temperature [°C]', output_file='plot_min_temp_profile_monthly_stats.png') # # fig = plots.plot_profile_monthly_stats(ds_ocean, var=['temp_' + d for d in depth], z=[float(d[:-1]) for d in depth], method='maximum',title='Max. Sea Temperature [°C]', output_file='plot_max_temp_profile_monthly_stats.png') -# # # Sainity: +# # # Salinity: # # df = tables.table_profile_monthly_stats(ds_ocean, var=['salt_' + d for d in depth], z=[float(d[:-1]) for d in depth], method='mean', output_file='table_mean_sal_profile_monthly_stats.png') # # df = tables.table_profile_monthly_stats(ds_ocean, var=['salt_' + d for d in depth], z=[float(d[:-1]) for d in depth], method='std.dev', output_file='table_std_sal_profile_monthly_stats.png') # # df = tables.table_profile_monthly_stats(ds_ocean, var=['salt_' + d for d in depth], z=[float(d[:-1]) for d in depth], method='minimum', output_file='table_min_sal_profile_monthly_stats.png') @@ -150,3 +150,4 @@ # #fig = plots.plot_storm_surge_for_given_hs(df,var_surge='zeta_0m', var_hs='HS', max_hs=20, output_file='surge_for_given_hs.png') # #df = tables.table_extreme_total_water_level(df, var_hs='HS',var_tp='TP',var_surge='zeta_0m', var_tide='tide', periods=[100,10000], output_file='table_extreme_total_water_level.csv') # #df = tables.table_storm_surge_for_rv_hs(df, var_hs='HS',var_tp='TP',var_surge='zeta_0m', var_tide='tide', periods=[1,10,100,10000],depth=200, output_file='table_storm_surge_for_rv_hs.csv') +# #tide_type = stats.tidal_type(ds_tide,var='tide') \ No newline at end of file diff --git a/metocean_stats/plots/__init__.py b/metocean_stats/plots/__init__.py index 9c6a32c..0540c6a 100644 --- a/metocean_stats/plots/__init__.py +++ b/metocean_stats/plots/__init__.py @@ -10,4 +10,5 @@ from .extreme import * from .general import * from .verification import * +from .climate import * from .spectra import * diff --git a/metocean_stats/plots/climate.py b/metocean_stats/plots/climate.py new file mode 100644 index 0000000..ebdd2e5 --- /dev/null +++ b/metocean_stats/plots/climate.py @@ -0,0 +1,372 @@ +import matplotlib.pyplot as plt +import matplotlib.colors as mcolors +from matplotlib.ticker import AutoMinorLocator +from ..tables import climate,general +from ..stats import general +import numpy as np +import calendar +import sys +import re + + +#################################### TO BE ADJUSTED FOR DISCRETE COLORMAP ##################################### +def plot_heatmap_profiles_yearly(df, rad_colname='current_speed_', cb_label='Current speed [m/s]', yaxis_direction='down', method='mean', output_file='heatmap_profile.pdf'): + """ + This function plot heatmap of yearly vertical profiles + + Parameters + ---------- + df: pd.DataFrame + Should have datetime as index and + column name in format of level and m at the end, for example 12.3m + rad_colname: string + yaxis_directin: string + Can be 'up' for increasing y-axis (height) and 'down' for decreasing y-axis (depth) + method: string + List of percentiles, e.g. P25, P50, P75, 30%, 40% etc. + Some others are also allowed: count (number of data points), and min, mean, max. + cb_label: string + Label of color bar + + Returns + ------- + Figure + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + df1=climate.table_yearly_1stat_vertical_levels(df, rad_colname=rad_colname, method=method, output_file='') + levs = [str(re.search(r'\d+(\.\d+)?', s).group()) for s in df1.columns] + # Delete the last line of the dataframe (All years) + df1 = df1.iloc[:-1,:] + + min_value = min(df1.min()) + max_value = max(df1.max()) + + norm = mcolors.Normalize(vmin=min_value, vmax=max_value) + cmap = plt.cm.get_cmap('viridis',int((max_value-min_value)*1000)) + + fig, ax = plt.subplots(figsize=(12, 7)) + cax = ax.imshow(df1.T, aspect='auto', cmap=cmap, norm=norm) + cbar = fig.colorbar(cax, ax=ax) + cbar.ax.tick_params(labelsize=14) # Adjust labelsize as desired + cbar.set_label(cb_label, size=14) # Adjust size as desired + ax.set_yticks(ticks=range(len(levs)), labels=levs) + if len(df1.index)>10: + ax.set_xticks(ticks=range(len(df1.index))[::5], labels=df1.index[::5]) + else: + ax.set_xticks(ticks=range(len(df1.index)), labels=df1.index) + ax.set_xlabel('Years', fontsize=14) + ax.set_ylabel('z [m]', fontsize=14) + ax.tick_params(axis='x', labelsize=14) + ax.tick_params(axis='y', labelsize=14) + + if yaxis_direction=='up': + plt.gca().invert_yaxis() + ax.xaxis.set_minor_locator(AutoMinorLocator()) + plt.tight_layout() + if output_file != "": + plt.savefig(output_file,dpi=250,facecolor='white',bbox_inches='tight') + plt.close() + + return fig + + + + +def plot_yearly_stripes(df,var_name='Hs',method='mean',ylabel='Y label',output_file='figure.pdf'): + """ + This function calculates a yearly statistic for one variable + And plots the result as a curve with colored stripes in the background + + Parameters + ---------- + df: pd.DataFrame with index as datetime + var_name: string + Variable name (column name) + method: string + Can be either 'min','mean', 'max', or a percentile (P99, 99%,...) + ylabel: string + Label of y axis + output_file: string + Figure name + + Returns + ------- + Figure + + Authors + ------- + Written by Dung M. Nguyen and clio-met + Inspired from Ed Hawkins' warming stripes https://en.wikipedia.org/wiki/Warming_stripes + """ + + df2=climate.table_yearly_stats(df,var=var_name,percentiles=method,output_file="") + df2=df2.iloc[:-1,0] + fig, ax = plt.subplots(figsize=(10, 5)) + colors = plt.cm.RdBu_r((df2 - df2.min()) / (df2.max() - df2.min())) + ax.bar(df2.index.to_numpy(), height=100, width=1.0, color=colors, bottom=df2.min()-1.5) + ax.set_xlim(df2.index.min()-0.5, df2.index.max()+0.5) + ax.set_ylim(df2.min()-0.5, df2.max()+0.5) + ax.plot(df2.index,df2,'k-',linewidth=5) + ax.plot(df2.index,df2,'w-',linewidth=2) + ax.scatter(df2.index,df2,marker='o',s=50,c='w',edgecolors='k',zorder=100) + props = dict(boxstyle='round', facecolor='white', alpha=0.5) + plt.text(.01, .97, 'Min-Max range = '+str(round(df2.max()-df2.min(),1)), ha='left', va='top', transform=ax.transAxes, bbox=props, fontsize=14) + ax.set_ylabel(ylabel, fontsize=14) + xpos=df2.index[::5].to_list() + labs=[str(int(l)) for l in xpos] + ax.set_xticks(ticks=xpos, labels=labs) + ax.tick_params(axis='x', labelsize=14) + ax.tick_params(axis='y', labelsize=14) + plt.tight_layout() + if output_file != "": + plt.savefig(output_file,dpi=250,facecolor='white',bbox_inches='tight') + plt.close() + + return fig + + +def plot_heatmap_monthly_yearly(df,var='air_temperature_2m',method='mean',cb_label='Mean Temperature [°C]',output_file='figure.pdf'): + """ + This function plots the monthly statistic for every year + + Parameters + ---------- + df: pd.DataFrame with index of datetime + var: string + Variable name + method: string + Can be either 'min','p1','p5','p50','mean','p95','p99' or 'max' + cb_label: string + Title of the color bar + output_file: string + Figure name + + Returns + ------- + Figure + + Authors + ------- + Written by Dung M. Nguyen and clio-met + Inspired from Ed Hawkins' warming stripes https://en.wikipedia.org/wiki/Warming_stripes + """ + # get month names + months_names=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','Year'] + + df2=general.stats_monthly_every_year(df,var=var,method=[method]) + df3=climate.table_yearly_stats(df,var=var,percentiles=[method],output_file="") + print(df2) + print(df3) + + years=np.unique(df2['year'].to_numpy()) + months=np.unique(df2['month'].to_numpy()) + + # Select first column + df2_1st=df2.iloc[:,0] + print(df2_1st) + min_value = np.floor(df2_1st.min()) + max_value = np.ceil(df2_1st.max()) + norm = mcolors.Normalize(vmin=min_value, vmax=max_value) + + arr=df2_1st.to_numpy().reshape(len(years),len(months)).T + # Add the whole year statistic + arr=np.concatenate([arr,df3.iloc[:-1,0].to_numpy().flatten()[np.newaxis,:]],axis=0) + print(arr) + print(df3[:-1]) + del df2_1st,df2,df3 + + fig, ax = plt.subplots(figsize=(12, 6)) + cmap = plt.cm.get_cmap('viridis',20) + cax = ax.imshow(arr, aspect='auto', cmap=cmap, norm=norm) + cbar = fig.colorbar(cax, ax=ax) + cbar.ax.tick_params(labelsize=14) + cbar.set_label(cb_label, size=14) + ax.set_xticks(ticks=range(len(years))[::5], labels=[str(y) for y in years.tolist()][::5]) + ax.set_yticks(ticks=range(len(months_names)), labels=months_names) + ax.set_xlabel('Years', fontsize=14) + ax.set_ylabel('Months', fontsize=14) + ax.tick_params(axis='x', labelsize=14) + ax.tick_params(axis='y', labelsize=14) + ax.xaxis.set_minor_locator(AutoMinorLocator()) + ax.axhline(y=11.5,linewidth=1, color='k') + plt.tight_layout() + if output_file != "": + plt.savefig(output_file,dpi=250,facecolor='white',bbox_inches='tight') + plt.close() + + return fig + + + +def plot_yearly_vertical_profiles(df, rad_colname='current_speed_', method='mean', yaxis_direction='down', xlabel='Current speed [m/s]', output_file='yearly_profile.pdf'): + """ + This function plot yearly mean vertical profiles + + Parameters + ---------- + df: pd.DataFrame with index of datetime + rad_colname: string + Prefix of the variable of interest (e.g. current_speed_) + method: string + A percentile e.g. P25, P50, P75, 30%, 40% etc. + or min, mean, max. + yaxis_direction: string + Can be 'up' for increasing y-axis (height) and 'down' for decreasing y-axis (depth) + xlabel: string + Label of x-axis + output_file: string + Name of the figure file + + Returns + ------- + Figure + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + df1=climate.table_yearly_1stat_vertical_levels(df, rad_colname=rad_colname, method=method, output_file='') + levs = np.array([str(re.search(r'\d+(\.\d+)?', s).group()) for s in df1.columns]) + # Delete the last line of the dataframe (All years) + df1=df1.iloc[:-1,:] + years = [str(yr) for yr in df1.index.to_list()] + n_years = len(df1) + + # To get a discrete colormap + cmap = plt.get_cmap('viridis', n_years) + norm = mcolors.BoundaryNorm(boundaries=np.arange(0,n_years+1,1), ncolors=n_years) + + fig, ax = plt.subplots(figsize=(10, 5)) + for y in range(n_years): + ax.plot(df1.iloc[y,:].to_numpy(),levs,c=cmap(y)) + sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm) + sm.set_array([]) + cbar = plt.colorbar(sm, ax=ax) + if n_years>10: + cbar.set_ticks(np.arange(0,n_years,1)[::5]+0.5) + cbar.set_ticklabels(years[::5]) + else: + cbar.set_ticks(np.arange(0,n_years,1)+0.5) + cbar.set_ticklabels(years) + cbar.set_label('Year', size=14) + cbar.ax.tick_params(labelsize=14) + if yaxis_direction=='down': + ax.invert_yaxis() + ax.set_ylim(levs[-1],levs[0]) + else: + ax.set_ylim(levs[0],levs[-1]) + ax.set_ylabel('z [m]', fontsize=14) + ax.set_xlabel(xlabel, fontsize=14) + ax.tick_params(axis='x', labelsize=14) + ax.tick_params(axis='y', labelsize=14) + plt.grid() + plt.tight_layout() + if output_file != "": + plt.savefig(output_file,dpi=250,facecolor='white',bbox_inches='tight') + plt.close() + + return fig + + + + + +def plot_linear_regression(df,var='air_temperature_2m',time='Jan',stat='mean',method=['Least-Squares','Theil-Sen'],confidence_interval=0.95,ylabel='2-m T [°C]',output_figure=None): + """ + This function plots the Ordinary Least Squares linear regression line and/or + the median, lower and upper slopes (within the confidence interval) using the Theil-Sen method + for monthly or yearly time series as a function of the year + + Parameters + ---------- + df: pd.DataFrame with index of datetime + var: string + Variable name + time: string + Can be 'Jan', 'Feb', 'January', 'Year' (for all years) + stat: string + Can be 'min', 'p5', 'p90', '50%', 'mean', 'max'. Default is 'mean' + method: list of string + By default 'Least-Squares', 'Theil-Sen' + confidence interval: float + To be specified for the Theil-Sen method, between 0.5 and 1. Default is 0.95 + ylabel: string + Label of the y-axis containing unit within [] + output_file: string + Name of the output file + + Returns + ------- + pd.DataFrame + For Least-Squares: slope, intercept, and coefficient of determination R^2 + For Theil-Sen: median slope, intercept, smallest, and highest slope + For Kendall: gives the tau and p-value + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + months_names = calendar.month_abbr[1:] + + df1,df_monthly,df_yearly=climate.table_linear_regression(df,var=var,stat=stat,method=method,confidence_interval=confidence_interval,intercept=True,output_file=None) + if time=='Year': + df2=df1.iloc[-1,:] # last row + yval=df_yearly.to_numpy() + else: + t=time[0:3] + ix=np.where(np.array(months_names)==t)[0][0]+1 + df2=df1[t] + yval=df_monthly[df_monthly['month']==ix].iloc[:,0].to_numpy() + + del df1 + + unit = (ylabel.split('['))[1].split(']')[0] + + xval=df.index.year.unique().to_numpy() + + fig, ax = plt.subplots(figsize=(10, 8)) + + ax.plot(xval,yval,marker='.',linewidth=0.5,color='k') + if 'Least-Squares' in method: + y_ls=df2['LS_slope']*xval+df2['LS_intercept'] + ax.plot(xval,y_ls,color='b',linewidth=1,label='Least Squares, slope = '+str(round(df2['LS_slope'],3))+' '+unit+'/yr') + del y_ls + + if 'Theil-Sen' in method: + y_ts=df2['TS_slope']*xval+df2['TS_intercept'] + y_ts_l=df2['TS_slope_lower']*xval+df2['TS_intercept'] + y_ts_u=df2['TS_slope_upper']*xval+df2['TS_intercept'] + x0 = (xval.min() + xval.max()) / 2.0 + y0 = df2['TS_slope'] * x0 + df2['TS_intercept'] + # Compute intercepts so the lo/hi slope lines cross at (x0, y0) + b_l = y0 - df2['TS_slope_lower'] * x0 + b_u = y0 - df2['TS_slope_upper'] * x0 + y_ts_l=df2['TS_slope_lower']*xval+b_l + y_ts_u=df2['TS_slope_upper']*xval+b_u + ax.plot(xval,y_ts,color='r',linewidth=1,linestyle='solid',label='Theil–Sen, slope = '+str(round(df2['TS_slope'],3))+' '+unit+'/yr') + ax.plot(xval,y_ts_l,color='r',linewidth=1,linestyle='-.',label='Theil–Sen lower, slope = '+str(round(df2['TS_slope_lower'],3))+' '+unit+'/yr') + ax.plot(xval,y_ts_u,color='r',linewidth=1,linestyle='--',label='Theil–Sen upper, slope = '+str(round(df2['TS_slope_upper'],3))+' '+unit+'/yr') + del y_ts,y_ts_l,y_ts_u,x0,y0,b_u,b_l + + ax.set_xlim(xval[0],xval[-1]) + + ax.set_xticks(ticks=xval[::5], labels=xval[::5],fontsize=14) + ax.tick_params(axis='y', labelsize=14) + ax.set_xlabel('Years',fontsize=14) + ax.set_ylabel(ylabel,fontsize=14) + + plt.legend() + ax.grid(True) + plt.tight_layout() + if output_figure != None: + plt.savefig(output_figure,dpi=250,facecolor='white',bbox_inches='tight') + plt.close() + + return fig + + + + diff --git a/metocean_stats/plots/extreme.py b/metocean_stats/plots/extreme.py index ccb65cf..42a953c 100644 --- a/metocean_stats/plots/extreme.py +++ b/metocean_stats/plots/extreme.py @@ -1106,6 +1106,6 @@ def plot_cca_profiles(data,var='current_speed_',month=None,percentile=None,retur ax.set_xlim(lev[0],lev[-1]) plt.grid(color='lightgray',linestyle=':') plt.tight_layout() - plt.savefig(output_file) + if output_file != "": plt.savefig(output_file,dpi=250,facecolor='white',bbox_inches='tight') return fig diff --git a/metocean_stats/plots/general.py b/metocean_stats/plots/general.py index ca01fe1..da2eaf5 100644 --- a/metocean_stats/plots/general.py +++ b/metocean_stats/plots/general.py @@ -720,7 +720,7 @@ def plot_tidal_levels(data, var='tide',start_time=None , end_time=None ,output_f plt.plot(data,color='lightgrey') plt.axhline(y=df.loc[df['Tidal Level'] == 'HAT'].values[0][1], color='r', linestyle='--',label='HAT') plt.axhline(y=df.loc[df['Tidal Level'] == 'MSL'].values[0][1], color='k', linestyle='-',label='MSL') - plt.axhline(y=df.loc[df['Tidal Level'] == 'LAT'].values[0][1], color='r', linestyle='--',label='LAT') + plt.axhline(y=df.loc[df['Tidal Level'] == 'LAT'].values[0][1], color='r', linestyle='-.',label='LAT') plt.ylabel('tidal elevation [m]') plt.grid() @@ -734,6 +734,7 @@ def plot_tidal_levels(data, var='tide',start_time=None , end_time=None ,output_f plt.tight_layout() # Save the figure plt.savefig(output_file) + plt.close() return fig diff --git a/metocean_stats/plots/verification.py b/metocean_stats/plots/verification.py index c4c98ce..39dccb1 100644 --- a/metocean_stats/plots/verification.py +++ b/metocean_stats/plots/verification.py @@ -359,7 +359,5 @@ def plotting(var_ref,var_comp,maxx,index): print('The option you have sent in is invalid.') if output_file != "": plt.savefig(output_file,dpi=200,facecolor='white',bbox_inches='tight') - - plt.show() plt.close() return fig \ No newline at end of file diff --git a/metocean_stats/stats/extreme.py b/metocean_stats/stats/extreme.py index d6b8a87..202dfdc 100644 --- a/metocean_stats/stats/extreme.py +++ b/metocean_stats/stats/extreme.py @@ -725,7 +725,7 @@ def joint_distribution_Hs_Tp(data,var_hs='hs',var_tp='tp',periods=[1,10,100,1000 start = 1 x = mean_hs[start:] y = variance_lnTp[start:] - parameters, covariance = curve_fit(aux_funcs.Gauss4, x, y)#,maxfev=10000) + parameters, covariance = curve_fit(aux_funcs.Gauss4, x, y, maxfev=10000) b1 = 0.005 b2 = parameters[0] b3 = parameters[1] diff --git a/metocean_stats/stats/general.py b/metocean_stats/stats/general.py index 56b4eaa..92c30f4 100644 --- a/metocean_stats/stats/general.py +++ b/metocean_stats/stats/general.py @@ -11,6 +11,7 @@ from scipy.signal import find_peaks from .aux_funcs import convert_latexTab_to_csv +from ..tables import general as tg def calculate_scatter(data,var1, step_var1, var2, step_var2, from_origin=False, labels_lower=False): ''' @@ -109,175 +110,6 @@ def scatter_diagram(data: pd.DataFrame, var1: str, step_var1: float, var2: str, return hi -def table_var_sorted_by_hs(data, var, var_hs='hs', output_file='var_sorted_by_Hs.txt'): - """ - The function is written by dung-manh-nguyen and KonstantinChri. - This will sort variable var e.g., 'tp' by 1 m interval og hs - then calculate min, percentile 5, mean, percentile 95 and max - data : panda series - var : variable - output_file: extension .txt for latex table or .csv for csv table - """ - Hs = data[var_hs] - Var = data[var] - binsHs = np.arange(0.,math.ceil(np.max(Hs))+0.1) # +0.1 to get the last one - temp_file = output_file.split('.')[0] - - Var_binsHs = {} - for j in range(len(binsHs)-1) : - Var_binsHs[str(int(binsHs[j]))+'-'+str(int(binsHs[j+1]))] = [] - - N = len(Hs) - for i in range(N): - for j in range(len(binsHs)-1) : - if binsHs[j] <= Hs.iloc[i] < binsHs[j+1] : - Var_binsHs[str(int(binsHs[j]))+'-'+str(int(binsHs[j+1]))].append(Var.iloc[i]) - - with open(temp_file, 'w') as f: - f.write('\\begin{tabular}{l p{1.5cm}|p{1.5cm} p{1.5cm} p{1.5cm} p{1.5cm} p{1.5cm}}' + '\n') - f.write(r'& & \multicolumn{5}{c}{'+var+'} \\\\' + '\n') - f.write(r'Hs & Entries & Min & 5\% & Mean & 95\% & Max \\\\' + '\n') - f.write(r'\hline' + '\n') - - for j in range(len(binsHs)-1) : - Var_binsHs_temp = Var_binsHs[str(int(binsHs[j]))+'-'+str(int(binsHs[j+1]))] - - Var_min = round(np.min(Var_binsHs_temp), 1) - Var_P5 = round(np.percentile(Var_binsHs_temp , 5), 1) - Var_mean = round(np.mean(Var_binsHs_temp), 1) - Var_P95 = round(np.percentile(Var_binsHs_temp, 95), 1) - Var_max = round(np.max(Var_binsHs_temp), 1) - - hs_bin_temp = str(int(binsHs[j]))+'-'+str(int(binsHs[j+1])) - f.write(hs_bin_temp + ' & '+str(len(Var_binsHs_temp))+' & '+str(Var_min)+' & '+str(Var_P5)+' & '+str(Var_mean)+' & '+str(Var_P95)+' & '+str(Var_max)+' \\\\' + '\n') - hs_bin_temp = str(int(binsHs[-1]))+'-'+str(int(binsHs[-1]+1)) # +1 for one empty row - f.write(hs_bin_temp + ' & 0 & - & - & - & - & - \\\\' + '\n') - - # annual row - Var_min = round(np.min(Var), 1) - Var_P5 = round(np.percentile(Var , 5), 1) - Var_mean = round(np.mean(Var), 1) - Var_P95 = round(np.percentile(Var, 95), 1) - Var_max = round(np.max(Var), 1) - - hs_bin_temp = str(int(binsHs[0]))+'-'+str(int(binsHs[-1]+1)) # +1 for one empty row - f.write(hs_bin_temp + ' & '+str(len(Var))+' & '+str(Var_min)+' & '+str(Var_P5)+' & '+str(Var_mean)+' & '+str(Var_P95)+' & '+str(Var_max)+' \\\\' + '\n') - f.write(r'\hline' + '\n') - f.write(r'\end{tabular}' + '\n') - - if output_file.split('.')[1] == 'csv': - convert_latexTab_to_csv(temp_file, output_file) - os.remove(temp_file) - else: - os.rename(temp_file, output_file) - - - return - -def table_monthly_percentile(data,var,output_file='var_monthly_percentile.txt'): - """ - The function is written by dung-manh-nguyen and KonstantinChri. - this function will sort variable var e.g., hs by month and calculate percentiles - data : panda series - var : variable - output_file: extension .txt for latex table or .csv for csv table - """ - - Var = data[var] - varName = data[var].name - Var_month = Var.index.month - M = Var_month.values - temp_file = output_file.split('.')[0] - - months = calendar.month_name[1:] # eliminate the first insane one - for i in range(len(months)) : - months[i] = months[i][:3] # get the three first letters - - monthlyVar = {} - for i in range(len(months)) : - monthlyVar[months[i]] = [] # create empty dictionaries to store data - - - for i in range(len(Var)) : - m_idx = int(M[i]-1) - monthlyVar[months[m_idx]].append(Var.iloc[i]) - - - with open(temp_file, 'w') as f: - f.write(r'\\begin{tabular}{l | p{1.5cm} p{1.5cm} p{1.5cm} p{1.5cm} p{1.5cm}}' + '\n') - f.write(r'& \multicolumn{5}{c}{' + varName + '} \\\\' + '\n') - f.write(r'Month & 5\% & 50\% & Mean & 95\% & 99\% \\\\' + '\n') - f.write(r'\hline' + '\n') - - for j in range(len(months)) : - Var_P5 = round(np.percentile(monthlyVar[months[j]],5),1) - Var_P50 = round(np.percentile(monthlyVar[months[j]],50),1) - Var_mean = round(np.mean(monthlyVar[months[j]]),1) - Var_P95 = round(np.percentile(monthlyVar[months[j]],95),1) - Var_P99 = round(np.percentile(monthlyVar[months[j]],99),1) - f.write(months[j] + ' & '+str(Var_P5)+' & '+str(Var_P50)+' & '+str(Var_mean)+' & '+str(Var_P95)+' & '+str(Var_P99)+' \\\\' + '\n') - - # annual row - f.write('Annual & '+str(Var_P5)+' & '+str(Var_P50)+' & '+str(Var_mean)+' & '+str(Var_P95)+' & '+str(Var_P99)+' \\\\' + '\n') - - f.write(r'\hline' + '\n') - f.write(r'\end{tabular}' + '\n') - - if output_file.split('.')[1] == 'csv': - convert_latexTab_to_csv(temp_file, output_file) - os.remove(temp_file) - else: - os.rename(temp_file, output_file) - - return - - -def table_monthly_min_mean_max(data, var,output_file='montly_min_mean_max.txt') : - """ - The function is written by dung-manh-nguyen and KonstantinChri. - It calculates monthly min, mean, max based on monthly maxima. - data : panda series - var : variable - output_file: extension .txt for latex table or .csv for csv table - """ - - var = data[var] - temp_file = output_file.split('.')[0] - months = calendar.month_name[1:] # eliminate the first insane one - for i in range(len(months)) : - months[i] = months[i][:3] # get the three first letters - - monthly_max = var.resample('M').max() # max in every month, all months - minimum = monthly_max.groupby(monthly_max.index.month).min() # min, sort by month - mean = monthly_max.groupby(monthly_max.index.month).mean() # mean, sort by month - maximum = monthly_max.groupby(monthly_max.index.month).max() # max, sort by month - - with open(temp_file, 'w') as f : - f.write(r'\\begin{tabular}{l | c c c }' + '\n') - f.write(r'Month & Minimum & Mean & Maximum \\\\' + '\n') - f.write(r'\hline' + '\n') - for i in range(len(months)): - f.write(months[i] + ' & ' + str(minimum.values[i]) + ' & ' + str(round(mean.values[i],1)) + ' & ' + str(maximum.values[i]) + ' \\\\' + '\n') - - ## annual row - annual_max = var.resample('Y').max() - min_year = annual_max.min() - mean_year = annual_max.mean() - max_year = annual_max.max() - f.write('Annual Max. & ' + str(min_year) + ' & ' + str(round(mean_year,1)) + ' & ' + str(max_year) + ' \\\\' + '\n') - - f.write(r'\hline' + '\n') - f.write(r'\end{tabular}' + '\n') - - - if output_file.split('.')[1] == 'csv': - convert_latexTab_to_csv(temp_file, output_file) - os.remove(temp_file) - else: - os.rename(temp_file, output_file) - - return - def Cmax(Hs,Tm,depth): @@ -425,74 +257,6 @@ def scatter(df,var1,var2,location,regression_line,qqplot=True): return fig -def calculate_weather_window(data: pd.DataFrame, var: str,threshold=5, window_size=12): - """ - Calculate the mean and percentiles of consecutive periods where a variable - stays below a certain threshold within a given window size. - - Parameters: - data (pd.DataFrame): The DataFrame containing the time series data. - var (str): The name of the variable in the DataFrame. - threshold (float, optional): The threshold value for the variable. - Defaults to 5. - window_size (int, optional): The size of the window in hours. - Defaults to 12. - - Returns: - tuple: A tuple containing the mean and percentiles of consecutive periods - where the variable stays below the threshold. - """ - #data=data['1958-02-04' :'1958-02-23'] - df = (data[var] 0 - #consecutive_periods = consecutive_periods.astype(int)*df - # Find consecutive sequences of ones - consecutive_periods = (df == 1).astype(int).groupby((df != 1).cumsum()).cumsum() - - # Find indices of zeros between sequences of five or more consecutive ones - #indices_zeros = df[df == 0].groupby(consecutive_periods).filter(lambda x: (x == 0).sum() >= window_size/dt).index - - - plt.plot(data[var][:],'r') - plt.axhline( y=threshold, color='k', linestyle='--') - plt.plot(consecutive_periods,'go') - #plt.plot(data[var].where(consecutive_periods==0)[:],'ro') - #plt.plot(data[var].where(consecutive_periods==1)[:],'go') - plt.grid() - plt.show() - #breakpoint() - # add all periods with zero waiting time (consecutive_periods==1) - # counter_list_no_waiting = [window_size]*int((np.sum(consecutive_periods)*dt)//window_size) - # add all periods with waiting time (consecutive_periods==0) - counter = 0 - counter_list_with_waiting = [[] for _ in range(12)] - for i in range(len(consecutive_periods)-1): - if consecutive_periods.iloc[i]==0 and consecutive_periods.iloc[i+1]==0: - counter = counter + dt - elif consecutive_periods.iloc[i]==0 and consecutive_periods.iloc[i+1]==1: - counter = counter + dt - counter_list_with_waiting[consecutive_periods.index[i].month-1].append(counter) - counter = 0 - counter_list = counter_list_with_waiting #+ counter_list_no_waiting - - mean = np.zeros(12) - p10 = np.zeros(12) - p50 = np.zeros(12) - p90 = np.zeros(12) - - for j in range(len(counter_list)): # 12 months - counter_list_days = np.array(counter_list[j])/24 - mean[j] = np.mean(counter_list_days) - p10[j] = np.percentile(counter_list_days, 10) - p50[j] = np.percentile(counter_list_days, 50) - #p75 = np.percentile(counter_list_days, 75) - p90[j] = np.percentile(counter_list_days, 90) - #breakpoint() - return mean, p10, p50,p90 def weather_window_length(time_series,threshold,op_duration,timestep,month=None): """ @@ -681,6 +445,7 @@ def nb_hours_below_threshold(df,var,thr_arr): del years,years_unique,delta_t return nbhr_arr + def linfitef(x, y, stdx: float=1.0, stdy: float=1.0) -> tuple[float, float]: """ Perform a linear fit considering uncertainties in both variables. @@ -821,4 +586,63 @@ def find_h_g(Xk,Tm): else: tidal_type = 'Tidal type = diurnal' - return tidal_type \ No newline at end of file + return tidal_type + + + +def stats_monthly_every_year(df,var='HS',method=["P25","mean","P75","P99","max"]): + """ + This function sorts data by years and by months and calculates the statistic desired + + Parameters + ---------- + df: dataframe with index of datetime + Contains the data + var: string + Variable name + method: string + Statistic: can be 'min', 'mean', 'max', 'P99' ,'99%', .... + + Returns + ------- + pd.DataFrame containing the statistic for every month of every year + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + + percentiles = tg._percentile_str_to_pd_format(method) + + res = pd.DataFrame() + + for m in percentiles: + ll=[] + for yr in range(df.index.year[0],df.index.year[-1]+1): + for mn in range(1,13): + a=df[(df.index.year==yr) & (df.index.month==mn)][var] + if m[-1]=='%': # percentile + ll.append(a.describe(percentiles=np.arange(0,1,0.01))[m]) + if m=='mean': + ll.append(a.mean()) + if m=='min': + ll.append(a.min()) + if m=='max': + ll.append(a.max()) + del a + if len(ll)!=0: + res[m]=ll + del ll + + years=[] + months=[] + for yr in range(df.index.year[0],df.index.year[-1]+1): + for mn in range(1,13): + years.append(yr) + months.append(mn) + + res['year']=years + res['month']=months + del years,months + + return res \ No newline at end of file diff --git a/metocean_stats/tables/__init__.py b/metocean_stats/tables/__init__.py index 283bbee..551b8db 100644 --- a/metocean_stats/tables/__init__.py +++ b/metocean_stats/tables/__init__.py @@ -8,4 +8,5 @@ from .dir import * from .extreme import * from .general import * +from .climate import * from .spectra import * diff --git a/metocean_stats/tables/climate.py b/metocean_stats/tables/climate.py new file mode 100644 index 0000000..f5ed626 --- /dev/null +++ b/metocean_stats/tables/climate.py @@ -0,0 +1,215 @@ +import pandas as pd +from ..stats import general as sg +from . import general +import numpy as np +import re +import calendar +from scipy.stats import theilslopes, linregress, kendalltau + + + +def table_yearly_stats(data:pd.DataFrame,var:str,percentiles:list[str]=["P25","mean","P75","P99","max"],output_file="table_yearly_stats.csv"): + """ + Group dataframe by years and calculate percentiles, min, mean, or max. + + Parameters + ------------ + data: pd.DataFrame + The data containing column var, and with a datetime index. + var: string + The column to calculate percentiles of. + percentiles: list of strings + List of percentiles, e.g. P25, P50, P75, 30%, 40% etc. + Some others are also allowed: count (number of data points), and min, mean, max. + output_file: string + Name of the output file + + Returns + ------- + pd.DataFrame and output_file if name specified + + Authors + ------- + Modified from table_monthly_percentile by clio-met + """ + series = data[var] + percentiles = general._percentile_str_to_pd_format(percentiles) + series.index = pd.to_datetime(series.index) + year_labels=series.index.year.unique().tolist()+["All years"] + table = [] + for m,g in series.groupby(series.index.year): + table.append(g.describe(percentiles=np.arange(0,1,0.01))[percentiles]) + table.append(series.describe(percentiles=np.arange(0,1,0.01))[percentiles]) + table = pd.DataFrame(table,year_labels) + if output_file != "": + table.to_csv(output_file) + return table + + +def table_yearly_1stat_vertical_levels(df, rad_colname='current_speed_', method='mean', output_file='table_yearly_stats_all_levels.csv'): + """ + This function calculates yearly statistics for all vertical levels + + Parameters + ---------- + df: pd.DataFrame + Should have datetime as index and + column name in format of depth and m at the end, for example 12.3m + rad_colname: string + Common part of the variable name of interest + cb_label: string + Label of color bar + method: string + Can be a percentile, e.g. P25, P50, P75, 30%, 40% etc. + Some others are also allowed: count (number of data points), and min, mean, max. + Only one should be specified. + + Returns + ------- + result: pd.DataFrame + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + filter_col = [col for col in df if col.startswith(rad_colname)] + df=df[filter_col] + result = pd.DataFrame(columns = filter_col) + for i in range(len(filter_col)): + result[filter_col[i]]=table_yearly_stats(df,var=filter_col[i],percentiles=[method],output_file="") + + result=result.dropna(axis=1, how='any') + if output_file != "": + result.to_csv(output_file) + + return result + + +def table_linear_regression(df,var='air_temperature_2m',stat='mean',method=['Least-Squares','Theil-Sen','Kendall-tau'],confidence_interval=0.95,intercept=True,output_file=None): + + """ + This function calculates linear regression parameters using Ordinary Least Squares, + the Theil-Sen method, and Kendall Tau test for months and year + + Parameters + ---------- + df: pd.DataFrame with index of datetime + var: string + Variable name + stat: string + Can be 'min', 'p5', 'P90', '50%', 'mean', 'max'. Default is 'mean' + method: list of string + By default 'Least-Squares', 'Theil-Sen', 'Kendall-tau' + confidence interval: float + To be specified for the Theil-Sen method, between 0.5 and 1. Default is 0.95 + intercept: Boolean + True to get intercepts of the regression lines (for Least-Squares and Theil-Sen). Default is True + output_file: string + Name of the output file + + Returns + ------- + pd.DataFrame + For Least-Squares: slope, intercept, and coefficient of determination R^2 + For Theil-Sen: median slope, intercept, smallest, and highest slope + For Kendall: gives the tau and p-value + + Authors + ------- + Written by Dung M. Nguyen and clio-met + """ + + df2=sg.stats_monthly_every_year(df,var=var,method=[stat]) + df3=table_yearly_stats(df,var=var,percentiles=[stat],output_file="") + + months_names = calendar.month_abbr[1:] + months_names=months_names+['Year'] + + df_out=pd.DataFrame() + + if 'Least-Squares' in method: + slop_ls=[] + inter_ls=[] + r2val_ls=[] + # Loop over the months + for mon in range(1,13): + y=df2[df2['month']==mon].iloc[:,0].values + x=df2[df2['month']==mon]['year'].values + slope, interc, r_value, p_value, std_err = linregress(x.astype(float), y) + slop_ls.append(slope) + inter_ls.append(interc) + r2val_ls.append(r_value**2) + del slope,interc,r_value,p_value,std_err,x,y + # For the yearly means + y=df3.iloc[:-1,0].values + x=df3.index[:-1].values + slope, interc, r_value, p_value, std_err = linregress(x.astype(float), y) + del x,y + slop_ls.append(slope) + inter_ls.append(interc) + r2val_ls.append(r_value**2) + del slope,interc,r_value,p_value,std_err + df_out['LS_slope']=slop_ls + if intercept: + df_out['LS_intercept']=inter_ls + df_out['LS_r_square']=r2val_ls + del slop_ls,inter_ls,r2val_ls + + if 'Theil-Sen' in method: + slop_ts=[] + inter_ts=[] + slopl_ts=[] + slopu_ts=[] + # Loop over the months + for mon in range(1,13): + y=df2[df2['month']==mon].iloc[:,0].values + x=df2[df2['month']==mon]['year'].values + slope_theil_sen, interc, lower, upper = theilslopes(y, x.astype(float), confidence_interval) + slop_ts.append(slope_theil_sen) + inter_ts.append(interc) + slopl_ts.append(lower) + slopu_ts.append(upper) + del slope_theil_sen,interc,lower,upper + # For the yearly means + y=df3.iloc[:-1,0].values + x=df3.index[:-1].values + slope_theil_sen, interc, lower, upper = theilslopes(y, x.astype(float), confidence_interval) + slop_ts.append(slope_theil_sen) + inter_ts.append(interc) + slopl_ts.append(lower) + slopu_ts.append(upper) + del slope_theil_sen,interc,lower,upper + df_out['TS_slope']=slop_ts + if intercept: + df_out['TS_intercept']=inter_ts + df_out['TS_slope_lower']=slopl_ts + df_out['TS_slope_upper']=slopu_ts + del slop_ts,inter_ts,slopl_ts,slopu_ts + + if 'Kendall-tau' in method: + tau_test=[] + tau_p_value=[] + # Loop over the months + for mon in range(1,13): + y=df2[df2['month']==mon].iloc[:,0].values + x=df2[df2['month']==mon]['year'].values + tau, p_value = kendalltau(x.astype(float), y) + tau_test.append(tau) + tau_p_value.append(p_value) + del tau,p_value + # For the yearly means + y=df3.iloc[:-1,0].values + x=df3.index[:-1].values + tau, p_value = kendalltau(x.astype(float), y) + tau_test.append(tau) + tau_p_value.append(p_value) + df_out['Kendall_tau']=tau_test + df_out['Kendal_p_value']=tau_p_value + + df_out['index']=months_names + df_out=df_out.set_index('index') + + if output_file != None: + df_out.to_csv(output_file, index=True) + + return df_out,df2,df3.iloc[:-1,:] \ No newline at end of file diff --git a/metocean_stats/tables/general.py b/metocean_stats/tables/general.py index d0789d4..835abb5 100644 --- a/metocean_stats/tables/general.py +++ b/metocean_stats/tables/general.py @@ -359,7 +359,7 @@ def table_monthly_non_exceedance(data: pd.DataFrame, var: str, step_var: float, pd.DataFrame: Monthly non-exceedance table with percentage of time each data level occurs in each month. """ -# Define bins + # Define bins bins = np.arange(int(data[var].min()), data[var].max() + step_var, step_var).tolist() labels = [f'<{num}' for num in [round(bin, 2) for bin in bins]] @@ -369,7 +369,6 @@ def table_monthly_non_exceedance(data: pd.DataFrame, var: str, step_var: float, # Group by month and var bin, then count occurrences grouped = data.groupby([data.index.month, var+'-level'], observed=True).size().unstack(fill_value=0) - # Calculate percentage of time each data bin occurs in each month percentage_by_month = grouped.div(grouped.sum(axis=1), axis=0) * 100 @@ -385,13 +384,6 @@ def table_monthly_non_exceedance(data: pd.DataFrame, var: str, step_var: float, cumulative_percentage.loc['P99'] = data.groupby(data.index.month, observed=True)[var].quantile(0.99) cumulative_percentage.loc['Maximum'] = data.groupby(data.index.month, observed=True)[var].max() cumulative_percentage['Year'] = cumulative_percentage.mean(axis=1)[:-6] - #cumulative_percentage['Year'].iloc[-7] = data[var].min() - #cumulative_percentage['Year'].iloc[-6] = data[var].mean() - #cumulative_percentage['Year'].iloc[-5] = data[var].quantile(0.50) - #cumulative_percentage['Year'].iloc[-4] = data[var].quantile(0.75) - #cumulative_percentage['Year'].iloc[-3] = data[var].quantile(0.95) - #cumulative_percentage['Year'].iloc[-2] = data[var].quantile(0.99) - #cumulative_percentage['Year'].iloc[-1] = data[var].max() cumulative_percentage.loc[cumulative_percentage.index[-7], 'Year'] = data[var].min() cumulative_percentage.loc[cumulative_percentage.index[-6], 'Year'] = data[var].mean() @@ -519,6 +511,10 @@ def table_monthly_percentile(data:pd.DataFrame, percentiles : list of strings List of percentiles, e.g. P25, P50, P75, 30%, 40% etc. Some others are also allowed: count (number of data points), and min, mean, max. + + Returns + ------- + pd.DataFrame """ series = data[var] percentiles = _percentile_str_to_pd_format(percentiles) @@ -638,21 +634,7 @@ def monthly_directional_percentiles( return monthly_tables -# For one variable -#def table_monthly_weather_window(data: pd.DataFrame, var: str,threshold=5, window_size=12, timestep=3, output_file: str = None): -# results = [] -# months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] -# for month in range(1, 13): -# avg_duration, p10, p50, p90 = stats.weather_window_length(time_series=data[var],month=month ,threshold=threshold,op_duration=window_size,timestep=timestep) -# results.append((p10,p50, avg_duration, p90)) -# results_df = pd.DataFrame(results, columns=['P10', 'P50', 'Mean', 'P90'], index=months).T.round(2) -# if output_file: -# # Save results to CSV -# results_df.to_csv('monthly_weather_window_results.csv') -# -# return results_df - -# For multivariable + def table_monthly_weather_window(data: pd.DataFrame, var: str, threshold: float, window_size=12, timestep=3, output_file: str = None): # var should be a list of variables, and threshold should be a list of thresholds # more outputs than table_monthly_weather_window diff --git a/tests/test_climate.py b/tests/test_climate.py new file mode 100644 index 0000000..c06bf7f --- /dev/null +++ b/tests/test_climate.py @@ -0,0 +1,51 @@ +import os +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +from metocean_stats import plots +from metocean_stats.plots.climate import * +from metocean_stats.stats.aux_funcs import readNora10File + +# Define TimeSeries-object for NORA3 +dirname = os.path.dirname(__file__) +ds = readNora10File(os.path.join(dirname, 'data/NORA_test.txt')) +ds_ocean = pd.read_csv(os.path.join(dirname, 'data/NorkystDA_test.csv'),comment='#',index_col=0, parse_dates=True) +depth = ['0m', '1m', '2.5m', '5m', '10m', '15m', '20m', '25m', '30m', '40m', '50m', '75m', '100m', '150m', '200m'] + + + +def test_plot_stripes(): + fig = plots.plot_yearly_stripes(ds,var_name='HS',method='mean',ylabel='Hs [m]',output_file='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." + + +def test_plot_heatmap_monthly_yearly(): + fig = plots.plot_heatmap_monthly_yearly(ds,var='T2m',method='mean',cb_label='2-m temperature [°C]',output_file='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." + + +def test_plot_yearly_depth_profiles(): + fig = plots.plot_yearly_vertical_profiles(ds_ocean, rad_colname='current_speed_', method='mean', yaxis_direction='down', xlabel='Current speed [m/s]', output_file='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." + + +def test_plot_yearly_depth_profiles_wind(): + fig = plots.plot_yearly_vertical_profiles(ds, rad_colname='W', method='mean', yaxis_direction='up', xlabel='Wind speed [m/s]', output_file='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." + + +def test_plot_linear_regression(): + fig = plots.plot_linear_regression(ds,var='W10',time='Year',stat='P90',method=['Least-Squares','Theil-Sen'],confidence_interval=0.95,ylabel='Wind speed [m/s]',output_figure='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." + + +def test_plot_heatmap_profiles_yearly(): + fig = plots.plot_heatmap_profiles_yearly(ds,rad_colname='W',cb_label='Wind speed [m/s]',yaxis_direction='up',method='P80',output_file='') + # Check that the output is a Matplotlib Figure + assert isinstance(fig, plt.Figure), "The output is not a Matplotlib Figure." \ No newline at end of file diff --git a/tests/test_plots.py b/tests/test_plots.py index 906b5eb..a754b94 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -4,6 +4,7 @@ import matplotlib.pyplot as plt from metocean_stats import plots +from metocean_stats.plots.climate import * from metocean_stats.stats.aux_funcs import readNora10File from .data import synthetic_dataset @@ -245,19 +246,19 @@ def test_plot_monthly_return_periods_cur_pot(ds=ds_ocean): else: raise ValueError("FigValue is not correct") -#def test_threshold_sensitivity(ds=ds): -# extreme_stats.threshold_sensitivity(data=ds.data, var='hs', -# thresholds=[1,1.5]) +# #def test_threshold_sensitivity(ds=ds): +# # extreme_stats.threshold_sensitivity(data=ds.data, var='hs', +# # thresholds=[1,1.5]) -#def test_joint_distribution_Hs_Tp(ds=ds): -# extreme_stats.joint_distribution_Hs_Tp(df=ds.data, file_out='test.png') -# os.remove('test.png') +# #def test_joint_distribution_Hs_Tp(ds=ds): +# # extreme_stats.joint_distribution_Hs_Tp(df=ds.data, file_out='test.png') +# # os.remove('test.png') -#def test_mean_profile(ds=ds): -# profile_stats.mean_profile(data = ds.data, vars = ['wind_speed_10m','wind_speed_20m','wind_speed_50m','wind_speed_100m','wind_speed_250m','wind_speed_500m','wind_speed_750m'],height_levels=[10,20,50,100,250,500,750], perc = [25,75], output_file=False) +# #def test_mean_profile(ds=ds): +# # profile_stats.mean_profile(data = ds.data, vars = ['wind_speed_10m','wind_speed_20m','wind_speed_50m','wind_speed_100m','wind_speed_250m','wind_speed_500m','wind_speed_750m'],height_levels=[10,20,50,100,250,500,750], perc = [25,75], output_file=False) -#def test_profile_shear(ds=ds): -# profile_stats.profile_shear(data = ds.data, vars = ['wind_speed_10m','wind_speed_20m','wind_speed_50m','wind_speed_100m','wind_speed_250m','wind_speed_500m','wind_speed_750m'],height_levels=[10,20,50,100,250,500,750], z=[20,250], perc = [25,75], output_file=False) +# #def test_profile_shear(ds=ds): +# # profile_stats.profile_shear(data = ds.data, vars = ['wind_speed_10m','wind_speed_20m','wind_speed_50m','wind_speed_100m','wind_speed_250m','wind_speed_500m','wind_speed_750m'],height_levels=[10,20,50,100,250,500,750], z=[20,250], perc = [25,75], output_file=False) def test_plot_nb_hours_below_threshold(ds=ds): output_file = 'test_plot_nb_hr_below_t.png' @@ -282,7 +283,7 @@ def test_plot_multi_diagnostic_with_uncertainty(ds=ds): def test_plot_multi_joint_distribution_Hs_Tp_var3(ds=ds): - output_file = 'test_mulit_joint_distribution_Hs_Tp_var3.png' + output_file = 'test_mutli_joint_distribution_Hs_Tp_var3.png' fig = plots.plot_multi_joint_distribution_Hs_Tp_var3(ds,var_hs='HS',var_tp='TP',var3='W10',var3_units='m/s',periods=[100],var3_bin=10,threshold_min=100,output_file=output_file) if os.path.exists(output_file): os.remove(output_file) @@ -424,4 +425,5 @@ def test_plot_spectra_2d(): output_file = 'test_plot_dir_mean_2dspectrum.png' fig = plots.plot_spectra_2d(data=ds_synthetic_spectra, var='SPEC', method='monthly_mean', output_file=output_file) if os.path.exists(output_file): - os.remove(output_file) \ No newline at end of file + os.remove(output_file) + diff --git a/tests/test_tables.py b/tests/test_tables.py index b0f4c08..b15a52b 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -3,6 +3,7 @@ import numpy as np from metocean_stats import tables +from metocean_stats.tables.climate import * from metocean_stats.stats.aux_funcs import readNora10File from .data import synthetic_dataset @@ -587,8 +588,21 @@ def test_table_cca_profile(): else: raise ValueError("Shape is not correct") + +def test_table_linear_regression(): + output_file='test_table_linear_regerssion.csv' + df, _, _ = tables.table_linear_regression(df=ds,var='HS',stat='mean',method=['Least-Squares','Theil-Sen','Kendall-tau'],confidence_interval=0.95,intercept=True,output_file=output_file) + if os.path.exists(output_file): + os.remove(output_file) + if df.shape == (13, 9): + pass + else: + raise ValueError("Shape linear regression table is not correct") + + def test_table_monthly_freq_1dspectrum(): output_file='test_table_monthly_freq_1dspectrum.csv' df = tables.table_monthly_freq_1dspectrum(data=ds_synthetic_spectra,var='SPEC',output_file=output_file) if os.path.exists(output_file): os.remove(output_file) +