1\documentclass[a4paper]{article} 2\usepackage[a4paper, margin=2cm]{geometry} 3\usepackage{array} 4\usepackage{amsmath} 5\usepackage{amssymb} 6\usepackage{tcolorbox} 7\usepackage{fancyhdr} 8\usepackage{pgfplots} 9\usepackage{tabularx} 10\usepackage{keystroke} 11\usepackage{listings} 12\usepackage{xcolor}% used only to show the phantomed stuff 13\definecolor{cas}{HTML}{e6f0fe} 14\usepackage{mathtools} 15\pgfplotsset{compat=1.16} 16 17\pagestyle{fancy} 18\fancyhead[LO,LE]{Unit 4 Specialist --- Statistics} 19\fancyhead[CO,CE]{Andrew Lorimer} 20 21\setlength\parindent{0pt} 22 23\begin{document} 24 25\title{Statistics} 26\author{} 27\date{} 28\maketitle 29 30\section{Linear combinations of random variables} 31 32\subsection*{Continuous random variables} 33 34 A continuous random variable \(X\) has a pdf \(f\) such that: 35 36\begin{enumerate} 37\item \(f(x) \ge0\forall x \) 38\item \(\int^\infty_{-\infty} f(x) \> dx = 1\) 39\end{enumerate} 40 41 \[\Pr(X \le c) = \int^c_{-\infty} f(x) \> dx \] 42 43\subsubsection*{Linear functions \(X \rightarrow aX+b\)} 44 45\begin{align*} 46\Pr(Y \le y) &= \Pr(aX+b \le y) \\ 47 &= \Pr\left(X \le \dfrac{y-b}{a}\right) \\ 48 &= \int^{\frac{y-b}{a}}_{-\infty} f(x) \> dx 49\end{align*} 50 51\begin{align*} 52\textbf{Mean:} && \operatorname{E}(aX+b) & = a\operatorname{E}(X)+b \\ 53\textbf{Variance:} && \operatorname{Var}(aX+b) &= a^2\operatorname{Var}(X) \\ 54\end{align*} 55 56\subsection*{Linear combination of two random variables} 57 58\begin{align*} 59\textbf{Mean:} && \operatorname{E}(aX+bY) & = a\operatorname{E}(X)+b\operatorname{E}(Y) \\ 60\textbf{Variance:} && \operatorname{Var}(aX+bY) &= a^2\operatorname{Var}(X) + b^2\operatorname{Var}(Y) \tag{if \(X\) and \(Y\) are independent}\\ 61\end{align*} 62 63\section{Sample mean} 64 65 Approximation of the \textbf{population mean} determined experimentally. 66 67 \[\overline{x} = \dfrac{\Sigma x}{n} \] 68 69 where \(n\) is the size of the sample (number of sample points) and \(x\) is the value of a sample point 70 71\begin{tcolorbox}[colframe=cas!75!black, title=On CAS] 72 73\begin{enumerate} 74\item Spreadsheet 75\item In cell A1: \verb;mean(randNorm(sd, mean, sample size)); 76\item Edit \(\rightarrow\) Fill \(\rightarrow\) Fill Range 77\item Input range as A1:An where \(n\) is the number of samples 78\item Graph \(\rightarrow\) Histogram 79\end{enumerate} 80\end{tcolorbox} 81 82\subsubsection*{Sample size of \(n\)} 83 84 \[\overline{X} = \sum_{i=1}^n \frac{x_i}{n} = \dfrac{\sum x}{n} \] 85 86 Sample mean is distributed with mean \(\mu\) and sd \(\frac{\sigma}{\sqrt{n}}\) (approaches these values for increasing sample size \(n\)). 87 88\begin{tcolorbox}[colframe=cas!75!black, title=On CAS] 89 90\begin{itemize} 91\item Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left 92\item To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable 93\end{itemize} 94\end{tcolorbox} 95 96\section{Normal distributions} 97 98 mean = mode = median 99 100 \[ Z = \frac{X - \mu}{\sigma} \] 101 102 Normal distributions must have area (total prob.) of 1 \(\implies \int^\infty_{-\infty} f(x) \> dx = 1\) 103\pgfmathdeclarefunction{gauss}{2}{% 104\pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}% 105} 106 107{\begin{center}\begin{tikzpicture} 108\pgfplotsset{set layers} 109\begin{axis}[every axis plot post/.append style={ 110 mark=none,domain=-3:3,samples=50,smooth}, 111 axis x line=bottom, 112 axis y line=left, 113 enlargelimits=upper, 114 x=\textwidth/10, 115 ytick={0.55}, 116 yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)}, 117 xtick={-2,-1,0,1,2}, 118 x tick label style = {font=\footnotesize}, 119 xticklabels={\((\mu-2\sigma)\), \((\mu-\sigma)\), \(\mu\), \((\mu+\sigma)\), \((\mu+2\sigma)\)}, 120 xlabel={\(x\)}, 121 every axis x label/.style={at={(current axis.right of origin)},anchor=north west}, 122 every axis y label/.style={at={(axis description cs:-0.02,0.2)}, anchor=south west, rotate=90}, 123 ylabel={\(\Pr(X=x)\)}] 124\addplot{gauss(0,0.75)}; 125\end{axis} 126\begin{axis}[every axis plot post/.append style={ 127 mark=none,domain=-3:3,samples=50,smooth}, 128 axis x line=bottom, 129 enlargelimits=upper, 130 x=\textwidth/10, 131 xtick={-2,-1,0,1,2}, 132 axis x line shift=30pt, 133 hide y axis, 134 x tick label style = {font=\footnotesize}, 135 xlabel={\(Z\)}, 136 every axis x label/.style={at={(axis description cs:1,-0.25)},anchor=south west}] 137\addplot{gauss(0,0.75)}; 138\end{axis} 139\end{tikzpicture}\end{center}} 140 141\section{Central limit theorem} 142 143 If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\). 144 145\end{document}