39
39
from scipy import linalg
40
40
from scipy .optimize import minimize
41
41
from scipy .special import comb
42
- from sklearn .covariance import empirical_covariance
43
- from sklearn .covariance .empirical_covariance_ import log_likelihood
42
+ from sklearn .covariance import empirical_covariance , log_likelihood
44
43
from sklearn .linear_model import LassoLars
45
44
from sklearn .utils import Bunch , check_array
46
45
from sklearn .utils .extmath import fast_logdet
51
50
def mk_all_ugs (n_dim ):
52
51
"""Utility for generating all possible graphs."""
53
52
nedges = int (comb (n_dim , 2 ))
54
- m = 2 ** nedges
53
+ m = 2 ** nedges
55
54
56
- ind = np .array ([list (binary_repr (x , width = len (binary_repr (m - 1 )))) for x in range (m )]).astype (int )
55
+ ind = np .array (
56
+ [list (binary_repr (x , width = len (binary_repr (m - 1 )))) for x in range (m )]
57
+ ).astype (int )
57
58
ord = np .argsort (ind .sum (axis = 1 ))
58
59
ind = ind [ord ]
59
60
@@ -98,7 +99,12 @@ def score_blankets(blankets, X, alphas=(0.01, 0.5, 1)):
98
99
X_mb = np .zeros ((X .shape [0 ], 1 ))
99
100
100
101
y_mb = X [:, i ]
101
- score = np .sum ([LassoLars (alpha = alpha ).fit (X_mb , y_mb ).score (X_mb , y_mb ) for alpha in alphas ])
102
+ score = np .sum (
103
+ [
104
+ LassoLars (alpha = alpha ).fit (X_mb , y_mb ).score (X_mb , y_mb )
105
+ for alpha in alphas
106
+ ]
107
+ )
102
108
103
109
scores .append (score )
104
110
scores_all .append (scores )
@@ -109,7 +115,12 @@ def score_blankets(blankets, X, alphas=(0.01, 0.5, 1)):
109
115
110
116
111
117
def _get_graphs (blankets , scores , n_dim , n_resampling = 200 ):
112
- idx = np .array ([np .random .choice (scores .shape [1 ], p = scores [i ], size = n_resampling ) for i in range (n_dim )])
118
+ idx = np .array (
119
+ [
120
+ np .random .choice (scores .shape [1 ], p = scores [i ], size = n_resampling )
121
+ for i in range (n_dim )
122
+ ]
123
+ )
113
124
114
125
graphs_ = np .array ([blankets [i ][idx [i ]] for i in range (n_dim )]).transpose (1 , 0 , 2 )
115
126
# symmetrise with AND operator -> product
@@ -268,7 +279,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method="bic"):
268
279
269
280
logdetHdiag = sum (np .log (- diagH ))
270
281
lognormconst = dof * np .log (2 * np .pi ) / 2 + logh - logdetHdiag / 2.0
271
- score = lognormconst - GWprior .lognormconst - n_samples * n_dim * np .log (2 * np .pi ) / 2
282
+ score = (
283
+ lognormconst
284
+ - GWprior .lognormconst
285
+ - n_samples * n_dim * np .log (2 * np .pi ) / 2
286
+ )
272
287
GWpost .lognormconst = lognormconst
273
288
274
289
elif score_method == "laplace" :
@@ -294,7 +309,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method="bic"):
294
309
# neg Hessian will be posdef
295
310
logdetH = 2 * sum (np .log (np .diag (linalg .cholesky (- H ))))
296
311
lognormconst = dof * np .log (2 * np .pi ) / 2 + logh - logdetH / 2.0
297
- score = lognormconst - GWprior .lognormconst - n_samples * n_dim * np .log (2 * np .pi ) / 2
312
+ score = (
313
+ lognormconst
314
+ - GWprior .lognormconst
315
+ - n_samples * n_dim * np .log (2 * np .pi ) / 2
316
+ )
298
317
GWpost .lognormconst = lognormconst
299
318
300
319
GWpost .score = score
@@ -315,7 +334,9 @@ def GWishartScore(X, G, d0=3, S0=None, score_method="bic", mode="covsel"):
315
334
noData = np .zeros ((0 , n_dim ))
316
335
317
336
P0 , S_noData = GWishartFit (noData , G , GWprior )
318
- GWtemp = compute_score (noData , G , P0 , S_noData , GWprior = GWprior , score_method = score_method )
337
+ GWtemp = compute_score (
338
+ noData , G , P0 , S_noData , GWprior = GWprior , score_method = score_method
339
+ )
319
340
GWprior .lognormconst = GWtemp .lognormconst
320
341
321
342
# Compute the map precision matrix P
@@ -344,13 +365,17 @@ def bayesian_graphical_lasso(
344
365
alphas = np .logspace (- 2 , 0 , 20 )
345
366
346
367
# get a series of Markov blankets for vaiours alphas
347
- mdl = GraphicalLasso (assume_centered = assume_centered , tol = tol , max_iter = max_iter , verbose = False )
368
+ mdl = GraphicalLasso (
369
+ assume_centered = assume_centered , tol = tol , max_iter = max_iter , verbose = False
370
+ )
348
371
precisions = [mdl .set_params (alpha = a ).fit (X ).precision_ for a in alphas ]
349
372
mblankets = markov_blankets (precisions , tol = tol , unique = True )
350
373
351
374
normalized_scores = score_blankets (mblankets , X = X , alphas = [0.01 , 0.5 , 1 ])
352
375
353
- graphs = _get_graphs (mblankets , normalized_scores , n_dim = n_dim , n_resampling = n_resampling )
376
+ graphs = _get_graphs (
377
+ mblankets , normalized_scores , n_dim = n_dim , n_resampling = n_resampling
378
+ )
354
379
355
380
nonzeros_all = [np .triu (g , 1 ) + np .eye (n_dim , dtype = bool ) for g in graphs ]
356
381
@@ -361,7 +386,10 @@ def bayesian_graphical_lasso(
361
386
# Find non-zero elements of upper triangle of G
362
387
# make sure diagonal is non-zero
363
388
# G = nonzeros_all[1] # probably can discard if all zeros?
364
- res = [GWishartScore (X , G , d0 = d0 , S0 = S0 , mode = mode , score_method = scoring ) for G in nonzeros_all ]
389
+ res = [
390
+ GWishartScore (X , G , d0 = d0 , S0 = S0 , mode = mode , score_method = scoring )
391
+ for G in nonzeros_all
392
+ ]
365
393
366
394
top_n = [x .P for x in sorted (res , key = lambda x : x .score )[::- 1 ][:top_n ]]
367
395
return np .mean (top_n , axis = 0 )
@@ -439,7 +467,12 @@ def __init__(
439
467
top_n = 1 ,
440
468
):
441
469
super (GraphicalLasso , self ).__init__ (
442
- alpha = alpha , tol = tol , max_iter = max_iter , verbose = verbose , assume_centered = assume_centered , mode = mode
470
+ alpha = alpha ,
471
+ tol = tol ,
472
+ max_iter = max_iter ,
473
+ verbose = verbose ,
474
+ assume_centered = assume_centered ,
475
+ mode = mode ,
443
476
)
444
477
self .alphas = alphas
445
478
self .n_resampling = n_resampling
0 commit comments