|
21 | 21 | import emcee
|
22 | 22 | import multiprocessing
|
23 | 23 | import numpy as np
|
24 |
| -from scipy.linalg import lapack |
25 | 24 |
|
26 | 25 | from bayesian_inference import common_base
|
27 | 26 | from bayesian_inference import data_IO
|
28 | 27 | from bayesian_inference import emulation
|
| 28 | +from bayesian_inference import log_posterior |
29 | 29 |
|
30 | 30 | logger = logging.getLogger(__name__)
|
31 | 31 |
|
@@ -75,13 +75,13 @@ def run_mcmc(config, closure_index=-1):
|
75 | 75 | # NOTE: We use `get_context` here to avoid having to globally specify the context. Plus, it then should be fine
|
76 | 76 | # to repeated call this function. (`set_context` can only be called once - otherwise, it's a runtime error).
|
77 | 77 | ctx = multiprocessing.get_context('spawn')
|
78 |
| - with ctx.Pool() as pool: |
| 78 | + with ctx.Pool(initializer=log_posterior.initialize_pool_variables, initargs=[min, max, emulation_config, emulation_results, experimental_results, emulator_cov_unexplained]) as pool: |
79 | 79 |
|
80 | 80 | # Construct sampler (we create a dummy daughter class from emcee.EnsembleSampler, to add some logging info)
|
81 | 81 | # Note: we pass the emulators and experimental data as args to the log_posterior function
|
82 | 82 | logger.info('Initializing sampler...')
|
83 |
| - sampler = LoggingEnsembleSampler(config.n_walkers, ndim, _log_posterior, |
84 |
| - args=[min, max, emulation_config, emulation_results, experimental_results, emulator_cov_unexplained], |
| 83 | + sampler = LoggingEnsembleSampler(config.n_walkers, ndim, log_posterior.log_posterior, |
| 84 | + #args=[min, max, emulation_config, emulation_results, experimental_results, emulator_cov_unexplained], |
85 | 85 | pool=pool)
|
86 | 86 |
|
87 | 87 | # Generate random starting positions for each walker
|
@@ -183,112 +183,6 @@ def map_parameters(posterior, method='quantile'):
|
183 | 183 |
|
184 | 184 | return map_parameters
|
185 | 185 |
|
186 |
| -#--------------------------------------------------------------- |
187 |
| -def _log_posterior(X, min, max, emulation_config, emulation_results, experimental_results, emulator_cov_unexplained): |
188 |
| - """ |
189 |
| - Function to evaluate the log-posterior for a given set of input parameters. |
190 |
| -
|
191 |
| - This function is called by https://emcee.readthedocs.io/en/stable/user/sampler/ |
192 |
| -
|
193 |
| - :param X input ndarray of parameter space values |
194 |
| - :param min list of minimum boundaries for each emulator parameter |
195 |
| - :param max list of maximum boundaries for each emulator parameter |
196 |
| - :param config emulation_configuration object |
197 |
| - :param emulation_results dict of emulation groups |
198 |
| - :param experimental_results arrays of experimental results |
199 |
| - """ |
200 |
| - |
201 |
| - # Convert to 2darray of shape (n_samples, n_parameters) |
202 |
| - X = np.array(X, copy=False, ndmin=2) |
203 |
| - |
204 |
| - # Initialize log-posterior array, which we will populate and return |
205 |
| - log_posterior = np.zeros(X.shape[0]) |
206 |
| - |
207 |
| - # Check if any samples are outside the parameter bounds, and set log-posterior to -inf for those |
208 |
| - inside = np.all((X > min) & (X < max), axis=1) |
209 |
| - log_posterior[~inside] = -np.inf |
210 |
| - |
211 |
| - # Evaluate log-posterior for samples inside parameter bounds |
212 |
| - n_samples = np.count_nonzero(inside) |
213 |
| - n_features = experimental_results['y'].shape[0] |
214 |
| - if n_samples > 0: |
215 |
| - |
216 |
| - # Get experimental data |
217 |
| - data_y = experimental_results['y'] |
218 |
| - data_y_err = experimental_results['y_err'] |
219 |
| - |
220 |
| - # Compute emulator prediction |
221 |
| - # Returns dict of matrices of emulator predictions: |
222 |
| - # emulator_predictions['central_value'] -- (n_samples, n_features) |
223 |
| - # emulator_predictions['cov'] -- (n_samples, n_features, n_features) |
224 |
| - emulator_predictions = emulation.predict(X[inside], emulation_config, |
225 |
| - emulation_group_results=emulation_results, |
226 |
| - emulator_cov_unexplained=emulator_cov_unexplained) |
227 |
| - |
228 |
| - # Construct array to store the difference between emulator prediction and experimental data |
229 |
| - # (using broadcasting to subtract each data point from each emulator prediction) |
230 |
| - assert data_y.shape[0] == emulator_predictions['central_value'].shape[1] |
231 |
| - dY = emulator_predictions['central_value'] - data_y |
232 |
| - |
233 |
| - # Construct the covariance matrix |
234 |
| - # TODO: include full experimental data covariance matrix -- currently we only include uncorrelated data uncertainty |
235 |
| - #------------------------- |
236 |
| - covariance_matrix = np.zeros((n_samples, n_features, n_features)) |
237 |
| - covariance_matrix += emulator_predictions['cov'] |
238 |
| - covariance_matrix += np.diag(data_y_err**2) |
239 |
| - |
240 |
| - # Compute log likelihood at each point in the sample |
241 |
| - # We take constant priors, so the log-likelihood is just the log-posterior |
242 |
| - # (since above we set the log-posterior to -inf for samples outside the parameter bounds) |
243 |
| - log_posterior[inside] += list(map(_loglikelihood, dY, covariance_matrix)) |
244 |
| - |
245 |
| - return log_posterior |
246 |
| - |
247 |
| -#--------------------------------------------------------------- |
248 |
| -def _loglikelihood(y, cov): |
249 |
| - """ |
250 |
| - Evaluate the multivariate-normal log-likelihood for difference vector `y` |
251 |
| - and covariance matrix `cov`: |
252 |
| -
|
253 |
| - log_p = -1/2*[(y^T).(C^-1).y + log(det(C))] + const. |
254 |
| -
|
255 |
| - The likelihood is NOT NORMALIZED, since this does not affect MCMC. |
256 |
| - The normalization const = -n/2*log(2*pi), where n is the dimensionality. |
257 |
| -
|
258 |
| - Arguments `y` and `cov` MUST be np.arrays with dtype == float64 and shapes |
259 |
| - (n) and (n, n), respectively. These requirements are NOT CHECKED. |
260 |
| -
|
261 |
| - The calculation follows algorithm 2.1 in Rasmussen and Williams (Gaussian |
262 |
| - Processes for Machine Learning). |
263 |
| -
|
264 |
| - """ |
265 |
| - # Compute the Cholesky decomposition of the covariance. |
266 |
| - # Use bare LAPACK function to avoid scipy.linalg wrapper overhead. |
267 |
| - L, info = lapack.dpotrf(cov, clean=False) |
268 |
| - |
269 |
| - if info < 0: |
270 |
| - raise ValueError( |
271 |
| - 'lapack dpotrf error: ' |
272 |
| - 'the {}-th argument had an illegal value'.format(-info) |
273 |
| - ) |
274 |
| - elif info < 0: |
275 |
| - raise np.linalg.LinAlgError( |
276 |
| - 'lapack dpotrf error: ' |
277 |
| - 'the leading minor of order {} is not positive definite' |
278 |
| - .format(info) |
279 |
| - ) |
280 |
| - |
281 |
| - # Solve for alpha = cov^-1.y using the Cholesky decomp. |
282 |
| - alpha, info = lapack.dpotrs(L, y) |
283 |
| - |
284 |
| - if info != 0: |
285 |
| - raise ValueError( |
286 |
| - 'lapack dpotrs error: ' |
287 |
| - 'the {}-th argument had an illegal value'.format(-info) |
288 |
| - ) |
289 |
| - |
290 |
| - return -.5*np.dot(y, alpha) - np.log(L.diagonal()).sum() |
291 |
| - |
292 | 186 | ####################################################################################################################
|
293 | 187 | class LoggingEnsembleSampler(emcee.EnsembleSampler):
|
294 | 188 | '''
|
|
0 commit comments