1\documentclass[a4paper]{article} 2\usepackage[a4paper,margin=2cm]{geometry} 3\usepackage{array} 4\usepackage{amsmath} 5\usepackage{amssymb} 6\usepackage{tcolorbox} 7\usepackage{fancyhdr} 8\usepackage{pgfplots} 9\usepackage{tabularx} 10\usepackage{keystroke} 11\usepackage{listings} 12\usepackage{xcolor} % used only to show the phantomed stuff 13\definecolor{cas}{HTML}{e6f0fe} 14\usepackage{mathtools} 15 16\pagestyle{fancy} 17\fancyhead[LO,LE]{Unit 3 Methods --- Statistics} 18\fancyhead[CO,CE]{Andrew Lorimer} 19 20\setlength\parindent{0pt} 21 22\begin{document} 23 24 \title{Statistics} 25 \author{} 26 \date{} 27 %\maketitle 28 29 \section{Probability} 30 31 \subsection*{Probability theorems} 32 33 \begin{align*} 34 \textbf{Union:} &&\Pr(A \cup B) &= \Pr(A) + \Pr(B) - \Pr(A \cap B) \\ 35 \textbf{Multiplication theorem:} &&\Pr(A \cap B) &= \Pr(A|B) \times \Pr(B) \\ 36 \textbf{Conditional:} &&\Pr(A|B) &= \frac{\Pr(A \cap B)}{\Pr(B)} \\ 37 \textbf{Law of total probability:} &&\Pr(A) &= \Pr(A|B) \cdot \Pr(B) + \Pr(A|B^{\prime}) \cdot \Pr(B^{\prime}) \\ 38 \end{align*} 39 40 Mutually exclusive \(\implies \Pr(A \cup B) = 0\) \\ 41 42 Independent events: 43 \begin{flalign*} 44 \quad \Pr(A \cap B) &= \Pr(A) \times \Pr(B)& \\ 45 \Pr(A|B) &= \Pr(A) \\ 46 \Pr(B|A) &= \Pr(B) 47 \end{flalign*} 48 49 \subsection*{Discrete random distributions} 50 51 Any experiment or activity involving chance will have a probability associated with each result or \textit{outcome}. If the outcomes have a reference to \textbf{discrete numeric values} (outcomes that can be counted), and the result is unknown, then the activity is a \textit{discrete random probability distribution}. 52 53 \subsubsection*{Discrete probability distributions} 54 55 If an activity has outcomes whose probability values are all positive and less than one ($\implies 0 \le p(x) \le 1$), and for which the sum of all outcome probabilities is unity ($\implies \sum p(x) = 1$), then it is called a \textit{probability distribution} or \textit{probability mass} function. 56 57 \begin{itemize} 58 \item \textbf{Probability distribution graph} - a series of points on a cartesian axis representing results of outcomes. $\Pr(X=x)$ is on $y$-axis, $x$ is on $x$ axis. 59 \item \textbf{Mean $\mu$} or \textbf{expected value} \(E(X)\) - measure of central tendency. Also known as \textit{balance point}. Centre of a symmetrical distribution. 60 \begin{align*} 61 \overline{x} = \mu = E(X) &= \frac{\Sigma \left[ x \cdot f(x) \right]}{\Sigma f} \tag{where \(f =\) absolute frequency} \\ 62 &= \sum_{i=1}^n \left[ x_i \cdot \Pr(X=x_i) \right] \tag{for \(n\) values of \(x\)}\\ 63 &= \int_{-\infty}^{\infty} (x\cdot f(x)) \> dx \tag{for pdf \(f\)} 64 \end{align*} 65 \item \textbf{Mode} - most popular value (has highest probability of \(X\) values). Multiple modes can exist if \(>1 \> X\) value have equal-highest probability. Number must exist in distribution. 66 \item \textbf{Median \(m\)} - the value of \(x\) such that \(\Pr(X \le m) = \Pr(X \ge m) = 0.5\). If \(m > 0.5\), then value of \(X\) that is reached is the median of \(X\). If \(m = 0.5 = 0.5\), then \(m\) is halfway between this value and the next. To find \(m\), add values of \(X\) from smallest to alrgest until the sum reaches 0.5. 67 \[ m = X \> \text{such that} \> \int_{-\infty}^{m} f(x) dx = 0.5 \] 68 \item \textbf{Variance $\sigma^2$} - measure of spread of data around the mean. Not the same magnitude as the original data. For distribution \(x_1 \mapsto p_1, x_2 \mapsto p_2, \dots, x_n \mapsto p_n\): 69 \begin{align*} 70 \sigma^2=\operatorname{Var}(x) &= \sum_{i=1}^n p_i (x_i-\mu)^2 \\ 71 &= \sum (x-\mu)^2 \times \Pr(X=x) \\ 72 &= \sum x^2 \times p(x) - \mu^2 73 \end{align*} 74 \item \textbf{Standard deviation $\sigma$} - measure of spread in the original magnitude of the data. Found by taking square root of the variance: 75 \begin{align*} 76 \sigma &= \operatorname{sd}(X) \\ 77 &= \sqrt{\operatorname{Var}(X)} 78 \end{align*} 79 \end{itemize} 80 81 \subsubsection*{Expectation theorems} 82 83 For some non-linear function \(g\), the expected value \(E(g(X))\) is not equal to \(g(E(X))\). 84 85 \begin{align*} 86 E(X^n) &= \Sigma x^n \cdot p(x) \tag{non-linear function} \\ 87 &\ne [E(X)]^n \\ 88 E(aX \pm b) &= aE(X) \pm b \tag{linear function} \\ 89 E(b) &= b \tag{for constant \(b \in \mathbb{R}\)}\\ 90 E(X+Y) &= E(X) + E(Y) \tag{for two random variables} 91 \end{align*} 92 93 94 \section{Binomial Theorem} 95 96 \begin{align*} 97 (x+y)^n &= {n \choose 0} x^n y^0 + {n \choose 1} x^{n-1}y^1 + {n \choose 2} x^{n-2}y^2 + \dots + {n \choose n-1}x^1 y^{n-1} + {n \choose n} x^0 y^n \\ 98 &= \sum_{k=0}^n {n \choose k} x^{n-k} y^k \\ 99 &= \sum_{k=0}^n {n \choose k} x^k y^{n-k} 100 \end{align*} 101 102 \subsubsection*{Patterns} 103 \begin{enumerate} 104 \item powers of \(x\) decrease \(n \rightarrow 0\) 105 \item powers of \(y\) increase \(0 \rightarrow n\) 106 \item coefficients are given by \(n\)th row of Pascal's Triangle where \(n=0\) has one term 107 \item Number of terms in \((x+a)^n\) expanded \& simplified is \(n+1\) 108 \end{enumerate} 109 110 \subsubsection*{Combinatorics} 111 112 \[ \text{Binomial coefficient:} \quad ^n\text{C}_r = {N\choose k} \] 113 114 \begin{itemize} 115 \item Arrangements \({n \choose k} = \frac{n!}{(n-r)}\) 116 \item Combinations \({n \choose k} = \frac{n!}{r!(n-r)!}\) 117 \item Note \({n \choose k} = {n \choose k-1}\) 118 \end{itemize} 119 120 \colorbox{cas}{On CAS:} (soft keyboard) \keystroke{\(\downarrow\)} \(\rightarrow\) \keystroke{Advanced} \(\rightarrow\) \verb;nCr(n,cr); 121 122 \subsubsection*{Pascal's Triangle} 123 124 \begin{tabular}{>{$}l<{$\hspace{12pt}}*{13}{c}} 125 n=\cr0&&&&&&&1&&&&&&\\ 126 1&&&&&&1&&1&&&&&\\ 127 2&&&&&1&&2&&1&&&&\\ 128 3&&&&1&&3&&3&&1&&&\\ 129 4&&&1&&4&&6&&4&&1&&\\ 130 5&&1&&5&&10&&10&&5&&1&\\ 131 6&1&&6&&15&&20&&15&&6&&1 132 \end{tabular} 133 134 \section{Binomial distributions} 135 136 (aka Bernoulli distributions) 137 138 \begin{align*} 139 \text{Defined by} \quad X &\sim \operatorname{Bi}(n,p) \\ 140 \implies \Pr(X=x) &= {n \choose x} p^x (1-p)^{n-x} \\ 141 &= {n \choose x} p^x q^{n-x} 142 \end{align*} 143 144 where: 145 \begin{description} 146 \item \(n\) is the number of trials 147 \item There are two possible outcomes: \(S\) or \(F\) 148 \item \(\Pr(\text{success}) = p\) 149 \item \(\Pr(\text{failure}) = 1-p = q\) 150 \end{description} 151 152 \subsection*{Conditions for a binomial variable/distribution} 153 \begin{enumerate} 154 \item Two possible outcomes: \textbf{success} or \textbf{failure} 155 \item \(\Pr(\text{success})\) is constant across trials (also denoted \(p\)) 156 \item Finite number \(n\) of independent trials 157 \end{enumerate} 158 159 \subsection*{\colorbox{cas}{Solve on CAS}} 160 161 Main \(\rightarrow\) Interactive \(\rightarrow\) Distribution \(\rightarrow\) \verb;binomialPDf; 162 163 \hspace{2em} Input \verb;x; (no. of successes), \verb;numtrial; (no. of trials), \verb;pos; (probbability of success) 164 165 \subsection*{Properties of \(X \sim \operatorname{Bi}(n,p)\)} 166 167 \begin{align*} 168 \textbf{Mean} \hspace{-4cm} &&\mu(X) &= np \\ 169 \textbf{Variance} \hspace{-4cm} &&\sigma^2(X) &= np(1-p) \\ 170 \textbf{s.d.} \hspace{-4cm} &&\sigma(X) &= \sqrt{np(1-p)} 171 \end{align*} 172 173 \subsection*{Applications of binomial distributions} 174 175 \[ \Pr(X \ge a) = 1 - \Pr(X < a) \] 176 177 \section{Continuous probability} 178 179 \subsection*{Continuous random variables} 180 181 \begin{itemize} 182 \item a variable that can take any real value in an interval 183 \end{itemize} 184 185 \subsection*{Probability density functions} 186 187 \begin{itemize} 188 \item area under curve \( = 1 \implies \int f(x) \> dx = 1\) 189 \item \(f(x) \ge 0 \forall x\) 190 \item pdfs may be linear 191 \item must show sections where \(f(x) = 0\) (use open/closed circles) 192 \end{itemize} 193 194 \[ Pr(a \le X \le b) = \int^b_a f(x) \> dx \] 195 196\end{document}