mirror of
https://github.com/vale981/bachelor_thesis
synced 2025-03-06 10:01:40 -05:00
233 lines
9.8 KiB
TeX
233 lines
9.8 KiB
TeX
\section{Monte-Carlo Integration}%
|
|
\label{sec:mcint}
|
|
|
|
Consider a function
|
|
\(f\colon \vb{x}\in\Omega\subset\mathbb{R}^n\mapsto\mathbb{R}\) and a
|
|
probability density on
|
|
\(\rho\colon \vb{x}\in\Omega\mapsto\mathbb{R}_{\geq 0}\) with
|
|
\(\int_{\Omega}\rho(\vb{x})\dd{\vb{x}} = 1\). By multiplying \(f\)
|
|
with a \(1\) in the fashion of \cref{eq:baseintegral}, the Integral
|
|
of \(f\) over \(\Omega\) can be interpreted as the expected value
|
|
\(\EX{F/\Rho}\) of the random variable \(F/\Rho\) under the
|
|
distribution \(\rho\). This is the key to most monte-carlo methods.
|
|
|
|
\begin{equation}
|
|
\label{eq:baseintegral}
|
|
I = \int_\Omega f(\vb{x}) \dd{\vb{x}} = \int_\Omega
|
|
\qty[\frac{f(\vb{x})}{\rho(\vb{x})}] \rho(\vb{x}) \dd{\vb{x}} = \EX{\frac{F}{\Rho}}
|
|
\end{equation}
|
|
|
|
The expected value \(\EX{F/\Rho}\) can be approximated by taking the
|
|
mean of \(F/\Rho\) with \(N\) finite samples
|
|
\(\{\vb{x}_i\}_{i\in\overline{1,N}}\sim\rho\) (distributed according
|
|
to \(\rho\)), where \(N\) is a very large integer.
|
|
|
|
\begin{equation}
|
|
\label{eq:approxexp}
|
|
\EX{\frac{F}{\Rho}} \approx
|
|
\frac{1}{N}\sum_{i=1}^N\frac{f(\vb{x_i})}{\rho(\vb{x_i})}
|
|
\xrightarrow{N\rightarrow\infty} I
|
|
\end{equation}
|
|
|
|
The convergence of \cref{eq:approxexp} is due to the nature of the
|
|
expected value \cref{eq:evalue-mean} and
|
|
variance \cref{eq:variance-mean} of the mean
|
|
\(\overline{X} = \frac{1}{N}\sum_i X_i\) of \(N\) uncorrelated random
|
|
variables \(\{X_i\}_{i\in\overline{1,N}}\) with the same distribution,
|
|
expected value \(\EX{X_i}=\mathbb{E}\) and variance
|
|
\(\sigma_i^2 = \sigma^2\).
|
|
|
|
\begin{gather}
|
|
\EX{\overline{X}} = \frac{1}{N}\sum_i\EX{X_i} = \mathbb{E} \label{eq:evalue-mean}\\
|
|
\sigma^2_{\overline{X}} = \sum_i\frac{\sigma_i^2}{N^2} =
|
|
\frac{\sigma^2}{N} \label{eq:variance-mean}
|
|
\end{gather}
|
|
|
|
Because
|
|
\(\frac{\sigma^2}{N}\xrightarrow{N\rightarrow\infty}
|
|
0\) \cref{eq:approxexp} really converges to \(I\). For finite \(N\)
|
|
value of \cref{eq:approxexp} varies around \(I\) with the variance
|
|
|
|
\(\VAR{F/\Rho}\cdot N^{-1}\) as in \cref{eq:varI}.
|
|
|
|
\begin{align}
|
|
\VAR{\frac{F}{\Rho}} &= \int_\Omega \qty[I -
|
|
\frac{f(\vb{x})}{\rho(\vb{x})}]^2 \rho({\vb{x}}) \dd{\vb{x}} =
|
|
\int_\Omega \qty[\qty(\frac{f(\vb{x})}{\rho(\vb{x})})^2 -
|
|
I^2]\rho({\vb{x}}) \dd{\vb{x}} \label{eq:varI}
|
|
\\
|
|
&\approx \frac{1}{N - 1}\sum_i \qty[I -
|
|
\frac{f(\vb{x_i})}{\rho(\vb{x_i})}]^2 \label{eq:varI-approx}
|
|
\end{align}
|
|
|
|
The name of the game now is to reduce \(\VAR{F/\Rho}\) to speed up the
|
|
convergence of \cref{eq:approxexp} and achieve higher accuracy with
|
|
fewer function evaluations. Some ways variance reductions can be
|
|
accomplished are choosing a suitable \(\rho\) (importance sampling),
|
|
by transforming the integral onto another variable, a combination of
|
|
both approaches or by subdividing integration volume into several
|
|
sub-volumes of different size while keeping the sample size constant
|
|
in all sub-volumes (stratified sampling). Combining ideas from
|
|
importance sampling and stratified sampling leads to the \vegas\
|
|
algorithm~\cite{Lepage:19781an} that approximates the optimal
|
|
distribution of importance sampling by adaptive subdivision of the
|
|
integration volume into a grid.
|
|
|
|
The convergence of \cref{eq:approxexp} is not dependent on the
|
|
dimensionality of the integration volume as opposed to many other
|
|
numerical integration algorithms (trapezoid rule, Simpsons rule) that
|
|
usually converge like \(N^{-\frac{k}{n}}\) with \(k\in\mathbb{N}\) as
|
|
opposed to \(N^{-\frac{k}{n}}\) with monte-carlo. Because integrals in
|
|
particle physics usually have a high dimensionality, monte-carlo
|
|
integration is suitable there. When implementing monte-carlo methods,
|
|
the random samples can be obtained through hardware or software random
|
|
number generators (RNGs). Most implementations utilize software RNGs
|
|
because supply pseudo-random numbers in a reproducible way, which
|
|
facilitates deniability and comparability.
|
|
|
|
\subsection{Naive Monte-Carlo Integration and Change of Variables}
|
|
\label{sec:naivechange}
|
|
|
|
The simplest choice for \(\rho\) is given
|
|
by \cref{eq:simplep}, the uniform distribution.
|
|
|
|
\begin{equation}
|
|
\label{eq:simplep}
|
|
\rho(\vb{x}) = \frac{1}{\int_{\Omega}1\dd{\vb{x'}}} =
|
|
\frac{1}{\abs{\Omega}}
|
|
\end{equation}
|
|
|
|
With this distribution \cref{eq:approxexp}
|
|
becomes \cref{eq:approxexp-uniform}. In other words, \(I\) is just
|
|
the mean of \(f\) in \(\Omega\), henceforth
|
|
called \(\bar{f}\), multiplied with the volume.
|
|
|
|
\begin{equation}
|
|
\label{eq:approxexp-uniform}
|
|
\EX{\frac{F}{\Rho}} \approx
|
|
\frac{\abs{\Omega}}{N}\sum_{i=1}^N f(\vb{x_i}) = \abs{\Omega}\cdot\bar{f}
|
|
\end{equation}
|
|
|
|
The variance \(\VAR{I}=\VAR{F/\Rho}\) is now given
|
|
by \cref{eq:approxvar-I}. Note that the factor \(\abs{\Omega}\) gets
|
|
squared when approximating the integral by the sum.
|
|
|
|
\begin{equation}
|
|
\label{eq:approxvar-I}
|
|
\VAR{I} = \abs{\Omega}\int_\Omega f(\vb{x})^2 -
|
|
I^2 \dd{\vb{x}} \equiv \abs{\Omega}\cdot\sigma_f^2 \approx
|
|
\frac{\abs{\Omega}^2}{N-1}\sum_{i}\qty[f(\vb{x}_i) - \bar{f}]^2
|
|
\end{equation}
|
|
|
|
Applying this method to integrate
|
|
\(2\pi\sin(\theta)\cdot\dv{\sigma}{\Omega}\) (see \cref{eq:crossec})
|
|
over a \(\theta\) interval, equivalent to \(\eta\in [-2.5, 2.5]\) with
|
|
a target accuracy of \(\varepsilon=10^{-3}\) results in
|
|
\result{xs/python/xs_mc} with a sample size of
|
|
\result{xs/python/xs_mc_N}.
|
|
|
|
Changing variables and integrating \cref{eq:xs-eta} over \(\eta\)
|
|
with the same target accuracy yields~\result{xs/python/xs_mc_eta} with
|
|
a sample size of just~\result{xs/python/xs_mc_eta_N}. The dramatic
|
|
reduction in variance and sample size can be understood qualitatively
|
|
by studying \cref{fig:xs-int-comp}. The differential cross section in
|
|
terms of \(\eta\)~(\cref{fig:xs-int-eta}) is less steep than the
|
|
differential cross section in terms of
|
|
\(\theta\)~(\cref{fig:xs-int-theta}) and takes on significant values
|
|
over most of the integration interval.
|
|
|
|
\begin{figure}[ht]
|
|
\centering
|
|
\begin{subfigure}[c]{.49\textwidth}
|
|
\plot{xs/xs_integrand}
|
|
\caption[\(2\pi\dv{\sigma}{\theta}\) with integration
|
|
boundaries]{\label{fig:xs-int-theta} The integrand arising from
|
|
differential cross section \(\dv{\sigma}{\theta}\) with the
|
|
integration borders visualized as gray lines.}
|
|
\end{subfigure}
|
|
\begin{subfigure}[c]{.49\textwidth}
|
|
\plot{xs/xs_integrand_eta}
|
|
\caption[Differential cross section for \(\qqgg\) with integration
|
|
boundaries]{\label{fig:xs-int-eta} The differential cross section
|
|
\(\dv{\sigma}{\eta}\) (see \cref{eq:xs-eta}) scaled by \(2\pi\)
|
|
with the integration borders visualized as gray lines.}
|
|
\end{subfigure}
|
|
\caption{\label{fig:xs-int-comp} Comparison of two parametrisations
|
|
of the differential cross section.}
|
|
\end{figure}
|
|
|
|
\subsection{Integration with \vegas}
|
|
\label{sec:mcintvegas}
|
|
|
|
Stratified sampling gives optimal results, when the variance in every
|
|
sub-volume is the same\cite{Lepage:19781an}. In importance sampling
|
|
the optimal probability distribution is given
|
|
by \cref{eq:optimalrho}, where \(f(\Omega) \geq 0\) is presumed
|
|
without loss of generality. When applying \vegas\ to multi dimensional
|
|
integrals, \cref{eq:optimalrho} is usually modified to factorize into
|
|
distributions for each variable.
|
|
|
|
\begin{equation}
|
|
\label{eq:optimalrho}
|
|
\rho(\vb{x}) = \frac{f(\vb{x})}{\int_\Omega f(\vb{y})\dd{y}}
|
|
\end{equation}
|
|
|
|
The idea behind \vegas\ is to subdivide \(\Omega\) into hypercubes,
|
|
define \(\rho\) as step-function on those hypercubes and iteratively
|
|
approximating \cref{eq:optimalrho}, instead of trying to minimize the
|
|
variance directly~\cite{Lepage:19781an}. In the end, the samples are
|
|
concentrated where \(f\) takes on the highest values and changes most
|
|
rapidly. This is done by subdividing the hypercubes into smaller
|
|
chunks, based on their contribution to the integral and then varying
|
|
the hypercube borders until all hypercubes contain the same number of
|
|
chunks. Note that no knowledge about the integrand is required. The
|
|
probability density used by \vegas\ is given in \cref{eq:vegasrho}
|
|
with \(K\) being the number of hypercubes and \(\Omega_i\) being the
|
|
hypercubes themselves.
|
|
|
|
\begin{equation}
|
|
\label{eq:vegasrho}
|
|
\rho(\vb{x}) = K\cdot
|
|
\begin{cases}
|
|
\frac{1}{\abs{\Omega_i}} & \vb{x}\in\Omega_{i\in\overline{1,K}} \\
|
|
0 & \text{otherwise}
|
|
\end{cases}
|
|
\end{equation}
|
|
|
|
\begin{figure}[ht]
|
|
\centering \plot{xs/xs_integrand_vegas}
|
|
\caption[\(2\pi\dv{\sigma}{\theta}\) scaled to increments found by
|
|
\vegas\ ]{\label{fig:xs-int-vegas} The same integrand as
|
|
in \cref{fig:xs-int-theta} with \vegas-generated increments and
|
|
weighting applied (\(f/\rho\)).}
|
|
\end{figure}
|
|
|
|
In one dimension the hypercubes become simple interval
|
|
increments. Applying \vegas\ to \cref{eq:crossec} with
|
|
\result{xs/python/xs_mc_θ_vegas_K} increments yields
|
|
\result{xs/python/xs_mc_θ_vegas} with
|
|
\result{xs/python/xs_mc_θ_vegas_N} samples. This result is comparable
|
|
with tho one obtained by parameter transformation
|
|
in \cref{sec:naivechange}. The sample count \(N\) is the number of
|
|
samples of the final integration and not the total number of
|
|
evaluations of \(f\) and is mentioned here, to illustrate the effect
|
|
of using stratified sampling. The resulting increments and the
|
|
weighted integrand \(f/\rho\) are depicted in \cref{fig:xs-int-vegas},
|
|
along with the original integrand and it is intuitively clear, that
|
|
the variance is being reduced. Smaller increments correspond to higher
|
|
sample density and lower weights, flattening out the integrand.
|
|
|
|
Generally the result gets better with more increments, but at the cost
|
|
of more \vegas\ iterations. The intermediate values of those
|
|
iterations can be accumulated to improve the accuracy of the end
|
|
result~\cite[197]{Lepage:19781an}.
|
|
|
|
The \vegas\ algorithm can be adapted to \(n\) dimensions by using a
|
|
grid of hypercubes instead of intervals and using the algorithm along
|
|
each axis with a slightly altered weighting
|
|
mechanism~\cite[197]{Lepage:19781an}.
|
|
|
|
%%% Local Variables:
|
|
%%% mode: latex
|
|
%%% TeX-master: "../../document"
|
|
%%% End:
|