1\documentclass[a4paper]{article} 2\usepackage[a4paper, margin=2cm]{geometry} 3\usepackage{array} 4\usepackage{amsmath} 5\usepackage{amssymb} 6\usepackage{tcolorbox} 7\usepackage{fancyhdr} 8\usepackage{pgfplots} 9\usepackage{tabularx} 10\usepackage{keystroke} 11\usepackage{listings} 12\usepackage{xcolor}% used only to show the phantomed stuff 13\definecolor{cas}{HTML}{e6f0fe} 14\usepackage{mathtools} 15\pgfplotsset{compat=1.16} 16 17\pagestyle{fancy} 18\fancyhead[LO,LE]{Unit 4 Specialist --- Statistics} 19\fancyhead[CO,CE]{Andrew Lorimer} 20 21\setlength\parindent{0pt} 22 23\begin{document} 24 25\title{Statistics} 26\author{} 27\date{} 28\maketitle 29 30\section{Linear combinations of random variables} 31 32\subsection*{Continuous random variables} 33 34 A continuous random variable \(X\) has a pdf \(f\) such that: 35 36\begin{enumerate} 37\item \(f(x) \ge0\forall x \) 38\item \(\int^\infty_{-\infty} f(x) \> dx = 1\) 39\end{enumerate} 40 41 \[\Pr(X \le c) = \int^c_{-\infty} f(x) \> dx \] 42 43\subsubsection*{Linear functions \(X \rightarrow aX+b\)} 44 45\begin{align*} 46\Pr(Y \le y) &= \Pr(aX+b \le y) \\ 47 &= \Pr\left(X \le \dfrac{y-b}{a}\right) \\ 48 &= \int^{\dfrac{y-b}{a}}_{-\infty} f(x) \> dx 49\end{align*} 50 51\begin{align*} 52\textbf{Mean:} && \operatorname{E}(aX+b) & = a\operatorname{E}(X)+b \\ 53\textbf{Variance:} && \operatorname{Var}(aX+b) &= a^2\operatorname{Var}(X) \\ 54\end{align*} 55 56\subsection*{Linear combination of two random variables} 57 58\begin{align*} 59\textbf{Mean:} && \operatorname{E}(aX+bY) & = a\operatorname{E}(X)+b\operatorname{E}(Y) \\ 60\textbf{Variance:} && \operatorname{Var}(aX+bY) &= a^2\operatorname{Var}(X) + b^2\operatorname{Var}(Y) \tag{if \(X\) and \(Y\) are independent}\\ 61\end{align*} 62 63\section{Sample mean} 64 65 Approximation of the \textbf{population mean} determined experimentally. 66 67 \[\overline{x} = \dfrac{\Sigma x}{n} \] 68 69 where \(n\) is the size of the sample (number of sample points) and \(x\) is the value of a sample point 70 71\subsubsection*{\colorbox{cas}{On CAS:}} 72 73\begin{enumerate} 74\item Spreadsheet 75\item In cell A1: \verb;mean(randNorm(sd, mean, sample size)); 76\item Edit \(\rightarrow\) Fill \(\rightarrow\) Fill Range 77\item Input range as A1:An where \(n\) is the number of samples 78\item Graph \(\rightarrow\) Histogram 79\end{enumerate} 80 81\subsubsection*{Sample size of \(n\)} 82 83 \[\overline{X} = \sum_{i=1}^n \frac{x_i}{n} = \dfrac{\sum x}{n} \] 84 85 Sample mean is distributed with mean \(\mu\) and sd \(\frac{\sigma}{\sqrt{n}}\) (approaches these values for increasing sample size \(n\)). 86 87\colorbox{cas}{On CAS:} Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left \\ 88 To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable 89 90\section{Normal distributions} 91 92 mean = mode = median 93 94 \[ Z = \frac{X - \mu}{\sigma} \] 95 96 Normal distributions must have are (total prob.) of 1 \(\implies \int^\infty_{-\infty} f(x) \> dx = 1\) 97\pgfmathdeclarefunction{gauss}{2}{% 98\pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}% 99} 100 101\begin{tikzpicture} 102\begin{axis}[every axis plot post/.append style={ 103 mark=none,domain=-3:3,samples=50,smooth}, % All plots: from -2:2, 50 samples, smooth, no marks 104 axis x line*=bottom, % no box around the plot, only x and y axis 105 axis y line*=left, % the * suppresses the arrow tips 106 enlargelimits=upper, 107 ytick={0.5}, 108 yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)}, 109 xtick={-2,-1,0,1,2}, 110 xticklabels={\(\mu-2\sigma\), \(\mu-\sigma\), \(\mu\), \(\mu+\sigma\), \(\mu+2\sigma\)}, 111 xlabel={\(x\)}, 112 every axis x label/.style={at={(current axis.right of origin)},anchor=north west}, 113 ylabel={\(\Pr(X=x)\)}] 114\addplot{gauss(0,0.75)}; 115\end{axis} 116\end{tikzpicture} 117 118\section{Central limit theorem} 119 120 If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\). 121 122\end{document}