0b6f4ce416680b27d9f2e5c8b673b6c19a035e10
1\documentclass[a4paper]{article}
2\usepackage[a4paper, margin=2cm]{geometry}
3\usepackage{array}
4\usepackage{amsmath}
5\usepackage{amssymb}
6\usepackage{tcolorbox}
7\usepackage{fancyhdr}
8\usepackage{pgfplots}
9\usepackage{tabularx}
10\usepackage{keystroke}
11\usepackage{listings}
12\usepackage{xcolor} % used only to show the phantomed stuff
13\definecolor{cas}{HTML}{e6f0fe}
14\usepackage{mathtools}
15\pgfplotsset{compat=1.16}
16
17\pagestyle{fancy}
18\fancyhead[LO,LE]{Unit 4 Specialist --- Statistics}
19\fancyhead[CO,CE]{Andrew Lorimer}
20
21\setlength\parindent{0pt}
22
23\begin{document}
24
25 \title{Statistics}
26 \author{}
27 \date{}
28 \maketitle
29
30 \section{Linear combinations of random variables}
31
32 \subsection*{Continuous random variables}
33
34 A continuous random variable \(X\) has a pdf \(f\) such that:
35
36 \begin{enumerate}
37 \item \(f(x) \ge 0 \forall x \)
38 \item \(\int^\infty_{-\infty} f(x) \> dx = 1\)
39 \end{enumerate}
40
41 \[ \Pr(X \le c) = \int^c_{-\infty} f(x) \> dx \]
42
43 \subsubsection*{Linear functions \(X \rightarrow aX+b\)}
44
45 \begin{align*}
46 \Pr(Y \le y) &= \Pr(aX+b \le y) \\
47 &= \Pr\left(X \le \dfrac{y-b}{a}\right) \\
48 &= \int^{\frac{y-b}{a}}_{-\infty} f(x) \> dx
49 \end{align*}
50
51 \begin{align*}
52 \textbf{Mean:} && \operatorname{E}(aX+b) & = a\operatorname{E}(X)+b \\
53 \textbf{Variance:} && \operatorname{Var}(aX+b) &= a^2 \operatorname{Var}(X) \\
54 \end{align*}
55
56 \subsection*{Linear combination of two random variables}
57
58 \begin{align*}
59 \textbf{Mean:} && \operatorname{E}(aX+bY) & = a\operatorname{E}(X)+b\operatorname{E}(Y) \\
60 \textbf{Variance:} && \operatorname{Var}(aX+bY) &= a^2 \operatorname{Var}(X) + b^2 \operatorname{Var}(Y) \tag{if \(X\) and \(Y\) are independent}\\
61 \end{align*}
62
63 \section{Sample mean}
64
65 Approximation of the \textbf{population mean} determined experimentally.
66
67 \[ \overline{x} = \dfrac{\Sigma x}{n} \]
68
69 where \(n\) is the size of the sample (number of sample points) and \(x\) is the value of a sample point
70
71 \begin{tcolorbox}[colframe=cas!75!black, title=On CAS]
72
73 \begin{enumerate}
74 \item Spreadsheet
75 \item In cell A1: \verb;mean(randNorm(sd, mean, sample size));
76 \item Edit \(\rightarrow\) Fill \(\rightarrow\) Fill Range
77 \item Input range as A1:An where \(n\) is the number of samples
78 \item Graph \(\rightarrow\) Histogram
79 \end{enumerate}
80 \end{tcolorbox}
81
82 \subsubsection*{Sample size of \(n\)}
83
84 \[ \overline{X} = \sum_{i=1}^n \frac{x_i}{n} = \dfrac{\sum x}{n} \]
85
86 Sample mean is distributed with mean \(\mu\) and sd \(\frac{\sigma}{\sqrt{n}}\) (approaches these values for increasing sample size \(n\)).
87
88 \begin{tcolorbox}[colframe=cas!75!black, title=On CAS]
89
90 \begin{itemize}
91 \item Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left
92 \item To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable
93 \end{itemize}
94 \end{tcolorbox}
95
96 \section{Normal distributions}
97
98 mean = mode = median
99
100 \[ Z = \frac{X - \mu}{\sigma} \]
101
102 Normal distributions must have area (total prob.) of 1 \(\implies \int^\infty_{-\infty} f(x) \> dx = 1\)
103\pgfmathdeclarefunction{gauss}{2}{%
104 \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
105}
106
107{\begin{center} \begin{tikzpicture}
108 \pgfplotsset{set layers}
109\begin{axis}[every axis plot post/.append style={
110 mark=none,domain=-3:3,samples=50,smooth},
111 axis x line=bottom,
112 axis y line=left,
113 enlargelimits=upper,
114 x=\textwidth/10,
115 ytick={0.55},
116 yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)},
117 xtick={-2,-1,0,1,2},
118 x tick label style = {font=\footnotesize},
119 xticklabels={\((\mu-2\sigma)\), \((\mu-\sigma)\), \(\mu\), \((\mu+\sigma)\), \((\mu+2\sigma)\)},
120 xlabel={\(x\)},
121 every axis x label/.style={at={(current axis.right of origin)},anchor=north west},
122 every axis y label/.style={at={(axis description cs:-0.02,0.2)}, anchor=south west, rotate=90},
123 ylabel={\(\Pr(X=x)\)}]
124 \addplot {gauss(0,0.75)};
125\end{axis}
126\begin{axis}[every axis plot post/.append style={
127 mark=none,domain=-3:3,samples=50,smooth},
128 axis x line=bottom,
129 enlargelimits=upper,
130 x=\textwidth/10,
131 xtick={-2,-1,0,1,2},
132 axis x line shift=30pt,
133 hide y axis,
134 x tick label style = {font=\footnotesize},
135 xlabel={\(Z\)},
136 every axis x label/.style={at={(axis description cs:1,-0.25)},anchor=south west}]
137 \addplot {gauss(0,0.75)};
138\end{axis}
139\end{tikzpicture}\end{center}}
140
141 \section{Central limit theorem}
142
143 If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\).
144
145\end{document}