1\documentclass[a4paper]{article} 2\usepackage[a4paper,margin=2cm]{geometry} 3\usepackage{array} 4\usepackage{amsmath} 5\usepackage{amssymb} 6\usepackage{tcolorbox} 7\usepackage{fancyhdr} 8\usepackage{pgfplots} 9\usepackage{tabularx} 10\usepackage{keystroke} 11\usepackage{listings} 12\usepackage{xcolor}% used only to show the phantomed stuff 13\definecolor{cas}{HTML}{e6f0fe} 14 15\pagestyle{fancy} 16\fancyhead[LO,LE]{Unit 3 Methods Statistics} 17\fancyhead[CO,CE]{Andrew Lorimer} 18 19\setlength\parindent{0pt} 20 21\begin{document} 22 23\title{Statistics} 24\author{} 25\date{} 26\maketitle 27 28\section{Probability} 29 30 \[\Pr(A \cup B) = \Pr(A) + \Pr(B) - \Pr(A \cap B) \] 31 \[\Pr(A \cup B) = 0\tag{mutually exclusive} \] 32 33\section{Conditional probability} 34 35 \[\Pr(A|B) = \frac{\Pr(A \cap B)}{\Pr(B)}\quad \text{where }\Pr(B) \ne0 \] 36 37 \[\Pr(A) = \Pr(A|B) \cdot \Pr(B) + \Pr(A|B^{\prime}) \cdot \Pr(B^{\prime}) \tag{law of total probability} \] 38 39 \[\Pr(A \cap B) = \Pr(A|B) \times \Pr(B) \tag{multiplication theorem} \] 40 41 For independent events: 42 43\begin{itemize} 44\item \(\Pr(A \cap B) = \Pr(A) \times \Pr(B)\) 45\item \(\Pr(A|B) = \Pr(A)\) 46\item \(\Pr(B|A) = \Pr(B)\) 47\end{itemize} 48 49\subsection{Discrete random distributions} 50 51 Any experiment or activity involving chance will have a probability associated with each result or \textit{outcome}. If the outcomes have a reference to \textbf{discrete numeric values} (outcomes that can be counted), and the result is unknown, then the activity is a \textit{discrete random probability distribution}. 52 53\subsubsection{Discrete probability distributions} 54 55 If an activity has outcomes whose probability values are all positive and less than one ($\implies0\le p(x) \le1$), and for which the sum of all outcome probabilities is unity ($\implies \sum p(x) = 1$), then it is called a \textit{probability distribution} or \textit{probability mass} function. 56 57\begin{itemize} 58\item \textbf{Probability distribution graph} - a series of points on a cartesian axis representing results of outcomes. $\Pr(X=x)$ is on $y$-axis, $x$ is on $x$ axis. 59\item \textbf{Mean $\mu$} or \textbf{expected value} \(E(X)\) - measure of central tendency. Also known as \textit{balance point}. Centre of a symmetrical distribution. 60\begin{align*} 61\overline{x} = \mu = E(X) &= \frac{\Sigma(xf)}{\Sigma(f)} \\ 62 &= \sum_{i=1}^n (x_i \cdot P(X=x_i)) \\ 63 &= \int_{-\infty}^{\infty} x\cdot f(x) \> dx \quad \text{(for pdf } f \text{)} 64 &= \sum_{-\infty}^{\infty} 65\end{align*} 66\item \textbf{Mode} - most popular value (has highest probability of \(X\) values). Multiple modes can exist if \(>1 \> X\) value have equal-highest probability. Number must exist in distribution. 67\item \textbf{Median \(m\)} - the value of \(x\) such that \(\Pr(X \le m) = \Pr(X \ge m) = 0.5\). If \(m > 0.5\), then value of \(X\) that is reached is the median of \(X\). If \(m = 0.5 = 0.5\), then \(m\) is halfway between this value and the next. 68 \[ m = X \> \text{such that} \> \int_{-\infty}^{m} f(x) dx = 0.5 \] 69\item \textbf{Variance $\sigma^2$} - measure of spread of data around the mean. Not the same magnitude as the original data. For distribution \(x_1 \mapsto p_1, x_2 \mapsto p_2, \dots, x_n \mapsto p_n\): 70\begin{align*} 71\sigma^2=\operatorname{Var}(x) &= \sum_{i=1}^n p_i (x_i-\mu)^2 \\ 72 &= \sum (x-\mu)^2\times \Pr(X=x) \\ 73 &= \sum x^2\times p(x) - \mu^2 74\end{align*} 75\item \textbf{Standard deviation $\sigma$} - measure of spread in the original magnitude of the data. Found by taking square root of the variance: $\sigma =\operatorname{sd}(X)=\sqrt{\operatorname{Var}(X)}$ 76\end{itemize} 77 78\subsubsection{Expectation theorems} 79 80\begin{align*} 81 E(aX \pm b) &= aE(X) \pm b \\ 82 E(z) &= z \\ 83 E(X+Y) &= E(X) + E(Y) \\ 84 E(X)^n &= \Sigma x^n \cdot p(x) \\ 85 &\ne[E(X)]^2 86\end{align*} 87 88 89\section{Binomial Theorem} 90 91\begin{align*} 92 (x+y)^n &= {n \choose0} x^n y^0 + {n \choose1} x^{n-1}y^1 + {n \choose2} x^{n-2}y^2 + \dots + {n \choose n-1}x^1 y^{n-1} + {n \choose n} x^0 y^n \\ 93 &= \sum_{k=0}^n {n \choose k} x^{n-k} y^k \\ 94 &= \sum_{k=0}^n {n \choose k} x^k y^{n-k} 95\end{align*} 96 97\begin{enumerate} 98\item powers of \(x\) decrease \(n \rightarrow0\) 99\item powers of \(y\) increase \(0\rightarrow n\) 100\item coefficients are given by \(n\)th row of Pascal's Triangle where \(n=0\) has one term 101\item Number of terms in \((x+a)^n\) expanded \& simplified is \(n+1\) 102\end{enumerate} 103 104 Combinations: \(^n\text{C}_r = {N\choose k}\) (binomial coefficient) 105\begin{itemize} 106\item Arrangements \({n \choose k} = \frac{n!}{(n-r)}\) 107\item Combinations \({n \choose k} = \frac{n!}{r!(n-r)!}\) 108\item Note \({n \choose k} = {n \choose k-1}\) 109\end{itemize} 110 111\subsubsection{Pascal's Triangle} 112 113\begin{tabular}{>{$}l<{$\hspace{12pt}}*{13}{c}} 114 n=\cr0&&&&&&&1&&&&&&\\ 1151&&&&&&1&&1&&&&&\\ 1162&&&&&1&&2&&1&&&&\\ 1173&&&&1&&3&&3&&1&&&\\ 1184&&&1&&4&&6&&4&&1&&\\ 1195&&1&&5&&10&&10&&5&&1&\\ 1206&1&&6&&15&&20&&15&&6&&1 121\end{tabular} 122 123\colorbox{cas}{On CAS:} (soft keys) \keystroke{\(\downarrow\)} \(\rightarrow\) \keystroke{Advanced} \(\rightarrow\) \verb;nCr(n,cr); 124 125\section{Binomial distributions} 126 127 (aka Bernoulli distributions) 128 129\begin{align*} 130\Pr(X=x) &= {n \choose x} p^x (1-p)^{n-x} \\ 131 &= {n \choose x} p^x q^{n-x} 132\end{align*} 133 134\begin{enumerate} 135\item Two possible outcomes: \textbf{success} or \textbf{failure} 136\item \(\Pr(\text{success})\) is constant across trials (also denoted \(p\)) 137\item Finite number \(n\) of independent trials 138\end{enumerate} 139 140 If these conditions are met, then it is a Binomial Random Variable. This variable is said to have a \textit{binomial probability distribution}. 141 142\begin{itemize} 143\item \(n\) is the number of trials 144\item There are two possible outcomes: \(S\) or \(F\) 145\item \(\Pr(\text{success}) = p\) 146\item \(\Pr(\text{failure}) = 1-p = q\) 147\item Shorthand notation: \(X \sim \operatorname{Bi}(n,p)\) 148\end{itemize} 149 150\colorbox{cas}{On CAS:} Main \(\rightarrow\) Interactive \(\rightarrow\) Distribution \(\rightarrow\) \verb;binomialPDf; \\ 151 Input \verb;x; (no. of successes), \verb;numtrial; (no. of trials), \verb;pos; (probbability of success) 152 153\subsection{Applications of binomial distributions} 154 155 \[\Pr(X \ge a) = 1 - \Pr(X < a) \] 156 157\subsection{Expected value of a binomial distribution} 158 159 \[ E(X \sim \operatorname{Bi}(n,p))=np \] 160 161\subsection{Variance} 162 163 \[\sigma^2(X) = np(1-p) \] 164 165\subsection{Standard deviation} 166 167 \[\sigma(X) = \sqrt{np(1-p)} \] 168 169\end{document}