Skip to content

Commit 00731f5

Browse files
committed
bugfixes
1 parent c591909 commit 00731f5

File tree

3 files changed

+75
-50
lines changed

3 files changed

+75
-50
lines changed

sgdml/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2323
# SOFTWARE.
2424

25-
__version__ = '0.5.2.dev2'
25+
__version__ = '0.5.2.dev3'
2626

2727
MAX_PRINT_WIDTH = 100
2828
LOG_LEVELNAME_WIDTH = 7 # do not modify

sgdml/cli.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ def create( # noqa: C901
729729

730730
if sigs is None:
731731
log.info(
732-
'Kernel hyper-parameter sigma was automatically set to range \'10:10:100\'.'
732+
'Kernel hyper-parameter sigma (length scale) was automatically set to range \'10:10:100\'.'
733733
)
734734
sigs = list(range(10, 100, 10)) # default range
735735

@@ -919,14 +919,23 @@ def save_progr_callback(
919919
has_converged_once = False
920920

921921
for i, task_file_name in enumerate(task_file_names):
922-
if n_tasks > 1:
923-
if i > 0:
924-
print()
925-
print(ui.color_str('Task {:d} of {:d}'.format(i + 1, n_tasks), bold=True))
926922

927923
task_file_path = os.path.join(task_dir, task_file_name)
928924
with np.load(task_file_path, allow_pickle=True) as task:
929925

926+
if n_tasks > 1:
927+
if i > 0:
928+
print()
929+
930+
n_train = len(task['idxs_train'])
931+
n_valid = len(task['idxs_valid'])
932+
ui.print_two_column_str(
933+
ui.color_str('Task {:d} of {:d}'.format(i + 1, n_tasks), bold=True),
934+
'{:,} + {:,} points (training + validation), sigma (length scale): {}'.format(
935+
n_train, n_valid, task['sig']
936+
),
937+
)
938+
930939
model_file_name = io.model_file_name(task, is_extended=False)
931940
model_file_path = os.path.join(task_dir, model_file_name)
932941

@@ -1625,9 +1634,16 @@ def test(
16251634
e_rmse_pct = (e_rmse / e_err['rmse'] - 1.0) * 100
16261635
f_rmse_pct = (f_rmse / f_err['rmse'] - 1.0) * 100
16271636

1628-
# if func_called_directly and n_models == 1:
16291637
if is_test and n_models == 1:
1630-
print(ui.color_str('\nTest errors (MAE/RMSE)', bold=True))
1638+
n_train = len(model['idxs_train'])
1639+
n_valid = len(model['idxs_valid'])
1640+
print()
1641+
ui.print_two_column_str(
1642+
ui.color_str('Test errors (MAE/RMSE)', bold=True),
1643+
'{:,} + {:,} points (training + validation), sigma (length scale): {}'.format(
1644+
n_train, n_valid, model['sig']
1645+
),
1646+
)
16311647

16321648
r_unit = 'unknown unit'
16331649
e_unit = 'unknown unit'
@@ -1805,7 +1821,7 @@ def select(model_dir, overwrite, model_file=None, command=None, **kwargs): # no
18051821
sig_col = [row[0] for row in rows]
18061822
if best_sig == min(sig_col) or best_sig == max(sig_col):
18071823
log.warning(
1808-
'The optimal sigma lies on the boundary of the search grid.\n'
1824+
'The optimal sigma (length scale) lies on the boundary of the search grid.\n'
18091825
+ 'Model performance might improve if the search grid is extended in direction sigma {} {:d}.'.format(
18101826
'<' if best_idx == 0 else '>', best_sig
18111827
)
@@ -1992,7 +2008,7 @@ def _add_argument_dir_with_file_type(parser, type, or_file=False):
19922008
metavar=('<s1>', '<s2>'),
19932009
dest='sigs',
19942010
type=io.parse_list_or_range,
1995-
help='integer list and/or range <start>:[<step>:]<stop> for the kernel hyper-parameter sigma',
2011+
help='integer list and/or range <start>:[<step>:]<stop> for the kernel hyper-parameter sigma (length scale)',
19962012
nargs='+',
19972013
)
19982014

sgdml/train.py

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def _assemble_kernel_mat_wkr(
144144

145145
else: # Sequential indexing
146146
K_j = j * dim_i if j < n_train else n_train * dim_i + (j % n_train)
147-
blk_j = slice(K_j, K_j + dim_i) if j < n_train else slice(K_j, K_j+1) ######
147+
blk_j = slice(K_j, K_j + dim_i) if j < n_train else slice(K_j, K_j + 1)
148148
keep_idxs_3n = slice(None) # same as [:]
149149

150150
# Note: The modulo-operator wraps around the index pointer on the training points when
@@ -179,7 +179,7 @@ def _assemble_kernel_mat_wkr(
179179
j < n_train
180180
): # This column only contrains second and first derivative constraints.
181181

182-
#for i in range(j if exploit_sym else 0, n_train):
182+
# for i in range(j if exploit_sym else 0, n_train):
183183
for i in range(0, n_train):
184184

185185
blk_i = slice(i * dim_i, (i + 1) * dim_i)
@@ -233,23 +233,23 @@ def _assemble_kernel_mat_wkr(
233233

234234
K_fe = -np.einsum('ik,jki -> j', K_fe, rj_d_desc_perms)
235235

236-
E_off_i = n_train * dim_i#, K.shape[1] - n_train
236+
E_off_i = n_train * dim_i # , K.shape[1] - n_train
237237
K[E_off_i + i, blk_j] = K_fe
238238

239239
else:
240240

241241
if use_E_cstr:
242242

243-
#rj_d_desc = desc_func.d_desc_from_comp(R_d_desc[j % n_train, :, :])[0][
243+
# rj_d_desc = desc_func.d_desc_from_comp(R_d_desc[j % n_train, :, :])[0][
244244
# :, :
245-
#] # convert descriptor back to full representation
245+
# ] # convert descriptor back to full representation
246246

247-
#rj_d_desc_perms = np.reshape(
247+
# rj_d_desc_perms = np.reshape(
248248
# np.tile(rj_d_desc.T, n_perms)[:, tril_perms_lin], (-1, dim_d, n_perms)
249-
#)
249+
# )
250250

251-
E_off_i = n_train * dim_i # Account for 'alloc_extra_rows'!.
252-
#blk_j_full = slice((j % n_train) * dim_i, ((j % n_train) + 1) * dim_i)
251+
E_off_i = n_train * dim_i # Account for 'alloc_extra_rows'!.
252+
# blk_j_full = slice((j % n_train) * dim_i, ((j % n_train) + 1) * dim_i)
253253
# for i in range((j % n_train) if exploit_sym else 0, n_train):
254254
for i in range(0, n_train):
255255

@@ -259,9 +259,12 @@ def _assemble_kernel_mat_wkr(
259259
order='F',
260260
)
261261

262-
ri_d_desc = desc_func.d_desc_from_comp(R_d_desc[i, :, :])[0] # convert descriptor back to full representation
262+
ri_d_desc = desc_func.d_desc_from_comp(R_d_desc[i, :, :])[
263+
0
264+
] # convert descriptor back to full representation
263265
ri_d_desc_perms = np.reshape(
264-
np.tile(ri_d_desc.T, n_perms)[:, tril_perms_lin], (-1, dim_d, n_perms)
266+
np.tile(ri_d_desc.T, n_perms)[:, tril_perms_lin],
267+
(-1, dim_d, n_perms),
265268
)
266269

267270
diff_ab_perms = R_desc[j % n_train, :] - ri_desc_perms
@@ -956,7 +959,7 @@ def train( # noqa: C901
956959
self.log.debug('Iterative solver not installed.')
957960
use_analytic_solver = True
958961

959-
#use_analytic_solver = False # remove me!
962+
# use_analytic_solver = False # remove me!
960963

961964
if use_analytic_solver:
962965

@@ -1063,10 +1066,10 @@ def train( # noqa: C901
10631066
if E_train_mean is None
10641067
else E_train_mean
10651068
)
1066-
#if c is None:
1069+
# if c is None:
10671070
# # Something does not seem right. Turn off energy predictions for this model, only output force predictions.
10681071
# model['use_E'] = False
1069-
#else:
1072+
# else:
10701073
# model['c'] = c
10711074

10721075
model['c'] = c
@@ -1115,8 +1118,8 @@ def _recov_int_const(
11151118
If inconsistent/corrupted energy labels are detected
11161119
in the provided dataset.
11171120
ValueError
1118-
If different scales in energy vs. force labels are
1119-
detected in the provided dataset.
1121+
If potentially inconsistent scales in energy vs.
1122+
force labels are detected in the provided dataset.
11201123
"""
11211124

11221125
gdml_predict = GDMLPredict(
@@ -1156,16 +1159,22 @@ def _recov_int_const(
11561159

11571160
if np.sign(e_fact) == -1:
11581161
self.log.warning(
1159-
'The provided dataset contains gradients instead of force labels (flipped sign). Please correct!\n'
1160-
+ ui.color_str('Note:', bold=True)
1161-
+ 'Note: The energy prediction accuracy of the model will thus neither be validated nor tested in the following steps!'
1162+
'It looks like the provided dataset may contain gradients instead of force labels (flipped sign).\n\n'
1163+
+ ui.color_str('Troubleshooting tips:\n', bold=True)
1164+
+ ui.wrap_indent_str(
1165+
'(1) ',
1166+
'Verify the sign of your force labels.',
1167+
)
1168+
+ '\n'
1169+
+ ui.wrap_indent_str(
1170+
'(2) ', 'This issue might very well just be a sympthom of using too few trainnig data and your labels are correct.'
1171+
)
11621172
)
1163-
#return None
11641173

11651174
if corrcoef < 0.95:
11661175
self.log.warning(
1167-
'Inconsistent energy labels detected!\n'
1168-
+ 'The predicted energies for the training data are only weakly correlated with the reference labels (correlation coefficient {:.2f}) which indicates that the issue is most likely NOT just a unit conversion error.\n\n'.format(
1176+
'Potentially inconsistent energy labels detected!\n'
1177+
+ 'The predicted energies for the training data are only weakly correlated with the reference labels (correlation coefficient {:.2f}). Note that correlation is independent of scale, which indicates that the issue is most likely not just a unit conversion error.\n\n'.format(
11691178
corrcoef
11701179
)
11711180
+ ui.color_str('Troubleshooting tips:\n', bold=True)
@@ -1175,37 +1184,37 @@ def _recov_int_const(
11751184
)
11761185
+ '\n'
11771186
+ ui.wrap_indent_str(
1178-
'(2) ', 'Verify the consistency between energy and force labels.'
1187+
'(2) ', 'This issue might very well just be a sympthom of using too few trainnig data and your labels are correct.'
11791188
)
11801189
+ '\n'
1181-
+ ui.wrap_indent_str(' - ', 'Correspondence correct?')
1190+
+ ui.wrap_indent_str(
1191+
'(3) ', 'Verify the consistency between energy and force labels.'
1192+
)
11821193
+ '\n'
1183-
+ ui.wrap_indent_str(' - ', 'Same level of theory?')
1194+
+ ui.wrap_indent_str(' - ', 'Correspondence between force and energy labels correct?')
11841195
+ '\n'
1185-
+ ui.wrap_indent_str(' - ', 'Accuracy of forces?')
1196+
+ ui.wrap_indent_str(' - ', 'Accuracy of forces (convergence of your ab-initio calculations)?')
1197+
+ '\n'
1198+
+ ui.wrap_indent_str(' - ', 'Was the same level of theory used to compute forces and energies?')
11861199
+ '\n'
11871200
+ ui.wrap_indent_str(
1188-
'(3) ',
1201+
'(4) ',
11891202
'Is the training data spread too broadly (i.e. weakly sampled transitions between example clusters)?',
11901203
)
11911204
+ '\n'
11921205
+ ui.wrap_indent_str(
1193-
'(4) ', 'Are there duplicate geometries in the training data?'
1206+
'(5) ', 'Are there duplicate geometries in the training data?'
11941207
)
11951208
+ '\n'
11961209
+ ui.wrap_indent_str(
1197-
'(5) ', 'Are there any corrupted data points (e.g. parsing errors)?'
1210+
'(6) ', 'Are there any corrupted data points (e.g. parsing errors)?'
11981211
)
1199-
+ '\n\n'
1200-
+ ui.color_str('Note:', bold=True)
1201-
+ ' The energy prediction accuracy of the model will thus neither be validated nor tested in the following steps!'
12021212
)
1203-
#return None
12041213

12051214
if np.abs(e_fact - 1) > 1e-1:
12061215
self.log.warning(
1207-
'Different scales in energy vs. force labels detected!\n'
1208-
+ 'The integrated forces differ from the energy labels by factor ~{:.2f}, meaning that the trained model will likely fail to predict energies accurately.\n\n'.format(
1216+
'Potentially inconsistent scales in energy vs. force labels detected!\n'
1217+
+ 'The integrated force predictions differ from the reference energy labels by factor ~{:.2f} (for the training data), meaning that this model will likely fail to predict energies accurately in real-world use.\n\n'.format(
12091218
e_fact
12101219
)
12111220
+ ui.color_str('Troubleshooting tips:\n', bold=True)
@@ -1214,14 +1223,14 @@ def _recov_int_const(
12141223
)
12151224
+ '\n'
12161225
+ ui.wrap_indent_str(
1217-
'(2) ',
1226+
'(2) ', 'This issue might very well just be a sympthom of using too few trainnig data and your labels are correct.'
1227+
)
1228+
+ '\n'
1229+
+ ui.wrap_indent_str(
1230+
'(3) ',
12181231
'Is the training data spread too broadly (i.e. weakly sampled transitions between example clusters)?',
12191232
)
1220-
+ '\n\n'
1221-
+ ui.color_str('Note:', bold=True)
1222-
+ ' The energy prediction accuracy of the model will thus neither be validated nor tested in the following steps!'
12231233
)
1224-
#return None
12251234

12261235
# Least squares estimate for integration constant.
12271236
return np.sum(E_ref - E_pred) / E_ref.shape[0]

0 commit comments

Comments
 (0)