Andrew's git - notes.git/blob - methods/statistics.tex

   1\documentclass[a4paper]{article}
   2\usepackage[a4paper,margin=2cm]{geometry}
   3\usepackage{array}
   4\usepackage{amsmath}
   5\usepackage{amssymb}
   6\usepackage{tcolorbox}
   7\usepackage{fancyhdr}
   8\usepackage{pgfplots}
   9\usepackage{tabularx}
  10\usepackage{keystroke}
  11\usepackage{listings}
  12\usepackage{xcolor} % used only to show the phantomed stuff
  13\definecolor{cas}{HTML}{e6f0fe}
  14
  15\pagestyle{fancy}
  16\fancyhead[LO,LE]{Unit 3 Methods Statistics}
  17\fancyhead[CO,CE]{Andrew Lorimer}
  18
  19\setlength\parindent{0pt}
  20
  21\begin{document}
  22
  23  \title{Statistics}
  24  \author{}
  25  \date{}
  26  \maketitle
  27
  28  \section{Probability}
  29
  30  \[ \Pr(A \cup B) = \Pr(A) + \Pr(B) - \Pr(A \cap B) \]
  31  \[ \Pr(A \cup B) = 0 \tag{mutually exclusive} \]
  32
  33  \section{Conditional probability}
  34
  35  \[ \Pr(A|B) = \frac{\Pr(A \cap B)}{\Pr(B)} \quad \text{where } \Pr(B) \ne 0 \]
  36  
  37  \[ \Pr(A) = \Pr(A|B) \cdot \Pr(B) + \Pr(A|B^{\prime}) \cdot \Pr(B^{\prime}) \tag{law of total probability} \]
  38  
  39  \[ \Pr(A \cap B) = \Pr(A|B) \times \Pr(B) \tag{multiplication theorem} \]
  40
  41  For independent events:
  42  
  43  \begin{itemize}
  44    \item \(\Pr(A \cap B) = \Pr(A) \times \Pr(B)\)
  45    \item \(\Pr(A|B) = \Pr(A)\)
  46    \item \(\Pr(B|A) = \Pr(B)\)
  47  \end{itemize}
  48
  49  \subsection{Discrete random distributions}
  50
  51  Any experiment or activity involving chance will have a probability associated with each result or \textit{outcome}. If the outcomes have a reference to \textbf{discrete numeric values} (outcomes that can be counted), and the result is unknown, then the activity is a \textit{discrete random probability distribution}.
  52
  53  \subsubsection{Discrete probability distributions}
  54  
  55  If an activity has outcomes whose probability values are all positive and less than one ($\implies 0 \le p(x) \le 1$), and for which the sum of all outcome probabilities is unity ($\implies \sum p(x) = 1$), then it is called a \textit{probability distribution} or \textit{probability mass} function.
  56
  57  \begin{itemize}
  58    \item \textbf{Probability distribution graph} - a series of points on a cartesian axis representing results of outcomes. $\Pr(X=x)$ is on $y$-axis, $x$ is on $x$ axis.
  59    \item \textbf{Mean $\mu$} or \textbf{expected value} \(E(X)\) - measure of central tendency. Also known as \textit{balance point}. Centre of a symmetrical distribution.
  60      \begin{align*}
  61        \overline{x} = \mu = E(X) &= \frac{\Sigma(xf)}{\Sigma(f)} \\
  62        &= \sum_{i=1}^n (x_i \cdot P(X=x_i)) \\
  63        &= \int_{-\infty}^{\infty} x\cdot f(x) \> dx \quad \text{(for pdf } f \text{)}
  64        &= \sum_{-\infty}^{\infty} 
  65      \end{align*}
  66    \item \textbf{Mode} - most popular value (has highest probability of \(X\) values). Multiple modes can exist if \(>1 \> X\) value have equal-highest probability. Number must exist in distribution.
  67    \item \textbf{Median \(m\)} - the value of \(x\) such that \(\Pr(X \le m) = \Pr(X \ge m) = 0.5\). If \(m > 0.5\), then value of \(X\) that is reached is the median of \(X\). If \(m = 0.5 = 0.5\), then \(m\) is halfway between this value and the next.
  68      \[ m = X \> \text{such that} \> \int_{-\infty}^{m} f(x) dx = 0.5 \]
  69    \item \textbf{Variance $\sigma^2$} - measure of spread of data around the mean. Not the same magnitude as the original data. For distribution \(x_1 \mapsto p_1, x_2 \mapsto p_2, \dots, x_n \mapsto p_n\):
  70      \begin{align*}
  71        \sigma^2=\operatorname{Var}(x) &= \sum_{i=1}^n p_i (x_i-\mu)^2 \\
  72        &= \sum (x-\mu)^2 \times \Pr(X=x) \\
  73        &= \sum x^2 \times p(x) - \mu^2
  74      \end{align*}
  75    \item \textbf{Standard deviation $\sigma$} - measure of spread in the original magnitude of the data. Found by taking square root of the variance: $\sigma =\operatorname{sd}(X)=\sqrt{\operatorname{Var}(X)}$
  76  \end{itemize}
  77
  78  \subsubsection{Expectation theorems}
  79
  80  \begin{align*}
  81    E(aX \pm b) &= aE(X) \pm b \\
  82    E(z) &= z \\
  83    E(X+Y) &= E(X) + E(Y) \\
  84    E(X)^n &= \Sigma x^n \cdot p(x) \\
  85    &\ne [E(X)]^2
  86  \end{align*}
  87
  88
  89  \section{Binomial Theorem}
  90
  91  \begin{align*}
  92    (x+y)^n &= {n \choose 0} x^n y^0 + {n \choose 1} x^{n-1}y^1 + {n \choose 2} x^{n-2}y^2 + \dots + {n \choose n-1}x^1 y^{n-1} + {n \choose n} x^0 y^n \\
  93    &= \sum_{k=0}^n {n \choose k} x^{n-k} y^k \\
  94    &= \sum_{k=0}^n {n \choose k} x^k y^{n-k}
  95  \end{align*}
  96
  97  \begin{enumerate}
  98    \item powers of \(x\) decrease \(n \rightarrow 0\)
  99    \item powers of \(y\) increase \(0 \rightarrow n\)
 100    \item coefficients are given by \(n\)th row of Pascal's Triangle where \(n=0\) has one term
 101    \item Number of terms in \((x+a)^n\) expanded \& simplified is \(n+1\)
 102  \end{enumerate}
 103
 104  Combinations: \(^n\text{C}_r = {N\choose k}\) (binomial coefficient) 
 105  \begin{itemize}
 106    \item Arrangements \({n \choose k} = \frac{n!}{(n-r)}\)
 107    \item Combinations \({n \choose k} = \frac{n!}{r!(n-r)!}\)
 108    \item Note \({n \choose k} = {n \choose k-1}\)
 109  \end{itemize}
 110
 111  \subsubsection{Pascal's Triangle}
 112
 113  \begin{tabular}{>{$}l<{$\hspace{12pt}}*{13}{c}}
 114    n=\cr0&&&&&&&1&&&&&&\\
 115    1&&&&&&1&&1&&&&&\\
 116    2&&&&&1&&2&&1&&&&\\
 117    3&&&&1&&3&&3&&1&&&\\
 118    4&&&1&&4&&6&&4&&1&&\\
 119    5&&1&&5&&10&&10&&5&&1&\\
 120    6&1&&6&&15&&20&&15&&6&&1
 121  \end{tabular}
 122
 123  \colorbox{cas}{On CAS:} (soft keys) \keystroke{\(\downarrow\)} \(\rightarrow\) \keystroke{Advanced} \(\rightarrow\) \verb;nCr(n,cr);
 124
 125  \section{Binomial distributions}
 126
 127  (aka Bernoulli distributions)
 128
 129  \begin{align*}
 130    \Pr(X=x) &= {n \choose x} p^x (1-p)^{n-x} \\
 131    &= {n \choose x} p^x q^{n-x}
 132  \end{align*}
 133
 134  \begin{enumerate}
 135    \item Two possible outcomes: \textbf{success} or \textbf{failure}
 136    \item \(\Pr(\text{success})\) is constant across trials (also denoted \(p\))
 137    \item Finite number \(n\) of independent trials
 138  \end{enumerate}
 139
 140  If these conditions are met, then it is a Binomial Random Variable. This variable is said to have a \textit{binomial probability distribution}.
 141
 142  \begin{itemize}
 143    \item \(n\) is the number of trials
 144    \item There are two possible outcomes: \(S\) or \(F\)
 145    \item \(\Pr(\text{success}) = p\)
 146    \item \(\Pr(\text{failure}) = 1-p = q\)
 147    \item Shorthand notation: \(X \sim \operatorname{Bi}(n,p)\)
 148  \end{itemize}
 149
 150  \colorbox{cas}{On CAS:} Main \(\rightarrow\) Interactive \(\rightarrow\) Distribution \(\rightarrow\) \verb;binomialPDf; \\
 151  Input \verb;x; (no. of successes), \verb;numtrial; (no. of trials), \verb;pos; (probbability of success)
 152
 153\end{document}