From: Andrew Lorimer Date: Mon, 9 Sep 2019 01:31:58 +0000 (+1000) Subject: [methods] organise preamble of main doc X-Git-Tag: yr12~38 X-Git-Url: https://git.lorimer.id.au/notes.git/diff_plain/5c2e93960f48f37d65fa730c97f9138722e16c6e [methods] organise preamble of main doc --- diff --git a/methods/methods-collated.pdf b/methods/methods-collated.pdf index 9c6cbbb..12df5ce 100644 Binary files a/methods/methods-collated.pdf and b/methods/methods-collated.pdf differ diff --git a/methods/methods-collated.tex b/methods/methods-collated.tex index 2257b41..b064130 100644 --- a/methods/methods-collated.tex +++ b/methods/methods-collated.tex @@ -1,4 +1,4 @@ -\documentclass[a4paper, twocolumn]{article} +\documentclass[a4paper]{article} \usepackage[dvipsnames, table]{xcolor} \usepackage{adjustbox} \usepackage{amsmath} @@ -19,6 +19,7 @@ \usepackage{pgfplots} \usepackage{pst-plot} \usepackage{standalone} +\usepackage{subfiles} \usepackage{tabularx} \usepackage{tabu} \usepackage{tcolorbox} @@ -41,7 +42,7 @@ } \newcommand{\midarrow}{\tikz \draw[-triangle 90] (0,0) -- +(.1,0);} \usepgflibrary{arrows.meta} -\pgfplotsset{compat=1.8} +\pgfplotsset{compat=1.6} \psset{dimen=monkey,fillstyle=solid,opacity=.5} \def\object{% \psframe[linestyle=none,fillcolor=blue](-2,-1)(2,1) @@ -96,6 +97,8 @@ \date{} \maketitle +\begin{multicols}{2} + \section{Functions} @@ -365,281 +368,8 @@ For \(x^n\), parity of \(n \equiv\) parity of function \input{circ-functions} \input{calculus} + \subfile{statistics-ref} + \end{multicols} - \section{Statistics} - - \subsection*{Probability} - - \begin{align*} - \Pr(A \cup B) &= \Pr(A) + \Pr(B) - \Pr(A \cap B) \\ - \Pr(A \cap B) &= \Pr(A|B) \times \Pr(B) \\ - \Pr(A|B) &= \frac{\Pr(A \cap B)}{\Pr(B)} \\ - \Pr(A) &= \Pr(A|B) \cdot \Pr(B) + \Pr(A|B^{\prime}) \cdot \Pr(B^{\prime}) - \end{align*} - - Mutually exclusive \(\implies \Pr(A \cup B) = 0\) \\ - - Independent events: - \begin{flalign*} - \quad \Pr(A \cap B) &= \Pr(A) \times \Pr(B)& \\ - \Pr(A|B) &= \Pr(A) \\ - \Pr(B|A) &= \Pr(B) - \end{flalign*} - - \subsection*{Combinatorics} - - \begin{itemize} - \item Arrangements \({n \choose k} = \frac{n!}{(n-k)}\) - \item \colorbox{important}{Combinations} \({n \choose k} = \frac{n!}{k!(n-k)!}\) - \item Note \({n \choose k} = {n \choose k-1}\) - \end{itemize} - - \subsection*{Distributions} - - \subsubsection*{Mean \(\mu\)} - - \textbf{Mean} \(\mu\) or \textbf{expected value} \(E(X)\) - - \begin{align*} - E(X) &= \frac{\Sigma \left[ x \cdot f(x) \right]}{\Sigma f} \tag{\(f =\) absolute frequency} \\ - &= \sum_{i=1}^n \left[ x_i \cdot \Pr(X=x_i) \right] \tag{discrete}\\ - &= \int_\textbf{X} (x \cdot f(x)) \> dx - \end{align*} - - \subsubsection*{Mode} - - Most popular value (has highest probability of all \(X\) values). Multiple modes can exist if \(>1 \> X\) value have equal-highest probability. Number must exist in distribution. - - \subsubsection*{Median} - - If \(m > 0.5\), then value of \(X\) that is reached is the median of \(X\). If \(m = 0.5 = 0.5\), then \(m\) is halfway between this value and the next. To find \(m\), add values of \(X\) from smallest to alrgest until the sum reaches 0.5. - - \[ m = X \> \text{such that} \> \int_{-\infty}^{m} f(x) dx = 0.5 \] - - \subsubsection*{Variance \(\sigma^2\)} - - \begin{align*} - \operatorname{Var}(x) &= \sum_{i=1}^n p_i (x_i-\mu)^2 \\ - &= \sum (x-\mu)^2 \times \Pr(X=x) \\ - &= \sum x^2 \times p(x) - \mu^2 \\ - &= \operatorname{E}(X^2) - [\operatorname{E}(X)]^2 - &= E\left[(X-\mu)^2\right] - \end{align*} - - \subsubsection*{Standard deviation \(\sigma\)} - - \begin{align*} - \sigma &= \operatorname{sd}(X) \\ - &= \sqrt{\operatorname{Var}(X)} - \end{align*} - - \subsection*{Binomial distributions} - - Conditions for a \textit{binomial distribution}: - \begin{enumerate} - \item Two possible outcomes: \textbf{success} or \textbf{failure} - \item \(\Pr(\text{success})\) is constant across trials (also denoted \(p\)) - \item Finite number \(n\) of independent trials - \end{enumerate} - - - \subsubsection*{Properties of \(X \sim \operatorname{Bi}(n,p)\)} - - \begin{align*} - \mu(X) &= np \\ - \operatorname{Var}(X) &= np(1-p) \\ - \sigma(X) &= \sqrt{np(1-p)} \\ - \Pr(X=x) &= {n \choose x} \cdot p^x \cdot (1-p)^{n-x} - \end{align*} - - \begin{cas} - Interactive \(\rightarrow\) Distribution \(\rightarrow\) \verb;binomialPdf; then input - \begin{description}[nosep, style=multiline, labelindent=0.5cm, leftmargin=3cm, font=\normalfont] - \item [x:] no. of successes - \item [numtrial:] no. of trials - \item [pos:] probability of success - \end{description} - \end{cas} - - \subsection*{Continuous random variables} - - A continuous random variable \(X\) has a pdf \(f\) such that: - - \begin{enumerate} - \item \(f(x) \ge 0 \forall x \) - \item \(\int^\infty_{-\infty} f(x) \> dx = 1\) - \end{enumerate} - - \begin{align*} - E(X) &= \int_\textbf{X} (x \cdot f(x)) \> dx \\ - \operatorname{Var}(X) &= E\left[(X-\mu)^2\right] - \end{align*} - - \[ \Pr(X \le c) = \int^c_{-\infty} f(x) \> dx \] - - - \subsection*{Two random variables \(X, Y\)} - - If \(X\) and \(Y\) are independent: - \begin{align*} - \operatorname{E}(aX+bY) & = a\operatorname{E}(X)+b\operatorname{E}(Y) \\ - \operatorname{Var}(aX \pm bY \pm c) &= a^2 \operatorname{Var}(X) + b^2 \operatorname{Var}(Y) - \end{align*} - - \subsection*{Linear functions \(X \rightarrow aX+b\)} - - \begin{align*} - \Pr(Y \le y) &= \Pr(aX+b \le y) \\ - &= \Pr\left(X \le \dfrac{y-b}{a}\right) \\ - &= \int^{\frac{y-b}{a}}_{-\infty} f(x) \> dx - \end{align*} - - \begin{align*} - \textbf{Mean:} && \operatorname{E}(aX+b) & = a\operatorname{E}(X)+b \\ - \textbf{Variance:} && \operatorname{Var}(aX+b) &= a^2 \operatorname{Var}(X) \\ - \end{align*} - - \subsection*{Expectation theorems} - - For some non-linear function \(g\), the expected value \(E(g(X))\) is not equal to \(g(E(X))\). - - \begin{align*} - E(X^2) &= \operatorname{Var}(X) - \left[E(X)\right]^2 \\ - E(X^n) &= \Sigma x^n \cdot p(x) \tag{non-linear} \\ - &\ne [E(X)]^n \\ - E(aX \pm b) &= aE(X) \pm b \tag{linear} \\ - E(b) &= b \tag{\(\forall b \in \mathbb{R}\)}\\ - E(X+Y) &= E(X) + E(Y) \tag{two variables} - \end{align*} - - \subsection*{Sample mean} - - Approximation of the \textbf{population mean} determined experimentally. - - \[ \overline{x} = \dfrac{\Sigma x}{n} \] - - where - \begin{description}[nosep, labelindent=0.5cm] - \item \(n\) is the size of the sample (number of sample points) - \item \(x\) is the value of a sample point - \end{description} - - \begin{cas} - \begin{enumerate}[leftmargin=3mm] - \item Spreadsheet - \item In cell A1:\\ \path{mean(randNorm(sd, mean, sample size))} - \item Edit \(\rightarrow\) Fill \(\rightarrow\) Fill Range - \item Input range as A1:An where \(n\) is the number of samples - \item Graph \(\rightarrow\) Histogram - \end{enumerate} - \end{cas} - - \subsubsection*{Sample size of \(n\)} - - \[ \overline{X} = \sum_{i=1}^n \frac{x_i}{n} = \dfrac{\sum x}{n} \] - - Sample mean is distributed with mean \(\mu\) and sd \(\frac{\sigma}{\sqrt{n}}\) (approaches these values for increasing sample size \(n\)). - - For a new distribution with mean of \(n\) trials, \(\operatorname{E}(X^\prime) = \operatorname{E}(X), \quad \operatorname{sd}(X^\prime) = \dfrac{\operatorname{sd}(X)}{\sqrt{n}}\) - - \begin{cas} - - \begin{itemize} - \item Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left - \item To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable - \end{itemize} - - \end{cas} - - \subsection*{Normal distributions} - - - \[ Z = \frac{X - \mu}{\sigma} \] - - Normal distributions must have area (total prob.) of 1 \(\implies \int^\infty_{-\infty} f(x) \> dx = 1\) \\ - \(\text{mean} = \text{mode} = \text{median}\) - - \begin{warning} - Always express \(z\) as +ve. Express confidence \textit{interval} as ordered pair. - \end{warning} - - \pgfmathdeclarefunction{gauss}{2}{% - \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}% - } - \pgfkeys{/pgf/decoration/.cd, - distance/.initial=10pt - } \pgfdeclaredecoration{add dim}{final}{ - \state{final}{% - \pgfmathsetmacro{\dist}{5pt*\pgfkeysvalueof{/pgf/decoration/distance}/abs(\pgfkeysvalueof{/pgf/decoration/distance})} - \pgfpathmoveto{\pgfpoint{0pt}{0pt}} - \pgfpathlineto{\pgfpoint{0pt}{2*\dist}} - \pgfpathmoveto{\pgfpoint{\pgfdecoratedpathlength}{0pt}} - \pgfpathlineto{\pgfpoint{(\pgfdecoratedpathlength}{2*\dist}} - \pgfsetarrowsstart{latex} - \pgfsetarrowsend{latex} - \pgfpathmoveto{\pgfpoint{0pt}{\dist}} - \pgfpathlineto{\pgfpoint{\pgfdecoratedpathlength}{\dist}} - \pgfusepath{stroke} - \pgfpathmoveto{\pgfpoint{0pt}{0pt}} - \pgfpathlineto{\pgfpoint{\pgfdecoratedpathlength}{0pt}} - }} - \tikzset{dim/.style args={#1,#2}{decoration={add dim,distance=#2}, - decorate, - postaction={decorate,decoration={text along path, - raise=#2, - text align={align=center}, - text={#1}}}}} - \begin{figure*}[hb] - \centering - \begin{tikzpicture} - \begin{axis}[every axis plot post/.style={ - mark=none,domain=-3:3,samples=50,smooth}, - axis x line=bottom, - axis y line=left, - enlargelimits=upper, - x=\textwidth/10, - ytick={0.55}, - yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)}, - xtick={-2,-1,0,1,2}, - x tick label style = {font=\footnotesize}, - xticklabels={\((\mu-2\sigma)\), \((\mu-\sigma)\), \(\mu\), \((\mu+\sigma)\), \((\mu+2\sigma)\)}, - xlabel={\(x\)}, - every axis x label/.style={at={(current axis.right of origin)},anchor=north west}, - every axis y label/.style={at={(axis description cs:-0.02,0.2)}, anchor=south west, rotate=90}, - ylabel={\(\Pr(X=x)\)}] - \addplot {gauss(0,0.75)}; - \fill[red!30] (-3,0) -- plot[id=f3,domain=-3:3,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-((x)^2)/(2*0.75^2))} -- (3,0) -- cycle; - \fill[darkgray!30] (3,0) -- plot[id=f3,domain=-3:3,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (3,0) -- cycle; - \fill[lightgray!30] (-2,0) -- plot[id=f3,domain=-2:2,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (2,0) -- cycle; - \fill[white!30] (-1,0) -- plot[id=f3,domain=-1:1,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (1,0) -- cycle; - \begin{scope}[<->] - \draw (-1,0.35) -- (1,0.35) node [midway, fill=white] {68.3\%}; - \draw (-2,0.25) -- (2,0.25) node [midway, fill=white] {95.5\%}; - \draw (-3,0.15) -- (3,0.15) node [midway, fill=white] {99.7\%}; - \end{scope} - \begin{scope}[-, dashed, gray] - \draw (-1,0) -- (-1, 0.35); - \draw (1,0) -- (1, 0.35); - \draw (-2,0) -- (-2, 0.25); - \draw (2,0) -- (2, 0.25); - \draw (-3,0) -- (-3, 0.15); - \draw (3,0) -- (3, 0.15); - \end{scope} - \end{axis} - \begin{axis}[every axis plot post/.append style={ - mark=none,domain=-3:3,samples=50,smooth}, - axis x line=bottom, - enlargelimits=upper, - x=\textwidth/10, - xtick={-2,-1,0,1,2}, - axis x line shift=30pt, - hide y axis, - x tick label style = {font=\footnotesize}, - xlabel={\(Z\)}, - every axis x label/.style={at={(axis description cs:1,-0.25)},anchor=south west}] - \addplot {gauss(0,0.75)}; - \end{axis} - \end{tikzpicture} - \end{figure*} - \end{document} +\end{document}