This commit is contained in:
Andy Sotheran 2019-04-24 20:10:30 +01:00
parent 48f63b5f50
commit 1a27685cfb
15 changed files with 605 additions and 323 deletions

View File

@ -31,14 +31,14 @@
\newlabel{glossary}{{}{3}{Glossary}{section*.3}{}} \newlabel{glossary}{{}{3}{Glossary}{section*.3}{}}
\abx@aux@cite{1} \abx@aux@cite{1}
\abx@aux@segm{0}{0}{1} \abx@aux@segm{0}{0}{1}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Introduction}{6}{section*.5}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Introduction}{7}{section*.5}}
\newlabel{introduction}{{}{6}{Introduction}{section*.5}{}} \newlabel{introduction}{{}{7}{Introduction}{section*.5}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Problem Articulation}{8}{section*.6}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Problem Articulation}{9}{section*.6}}
\newlabel{problem}{{}{8}{Problem Articulation}{section*.6}{}} \newlabel{problem}{{}{9}{Problem Articulation}{section*.6}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Problem Statement}{8}{section*.7}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Problem Statement}{9}{section*.7}}
\newlabel{statement}{{}{8}{Problem Statement}{section*.7}{}} \newlabel{statement}{{}{9}{Problem Statement}{section*.7}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Stakeholders}{8}{section*.8}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Stakeholders}{9}{section*.8}}
\newlabel{stakeholders}{{}{8}{Stakeholders}{section*.8}{}} \newlabel{stakeholders}{{}{9}{Stakeholders}{section*.8}{}}
\abx@aux@cite{2} \abx@aux@cite{2}
\abx@aux@segm{0}{0}{2} \abx@aux@segm{0}{0}{2}
\abx@aux@cite{3} \abx@aux@cite{3}
@ -46,27 +46,27 @@
\abx@aux@cite{4} \abx@aux@cite{4}
\abx@aux@segm{0}{0}{4} \abx@aux@segm{0}{0}{4}
\abx@aux@segm{0}{0}{1} \abx@aux@segm{0}{0}{1}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Motivation}{9}{section*.9}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Motivation}{10}{section*.9}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Technical Specification}{10}{section*.10}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Technical Specification}{11}{section*.10}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Constraints}{11}{section*.11}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Constraints}{12}{section*.11}}
\newlabel{constraints}{{}{11}{Project Constraints}{section*.11}{}} \newlabel{constraints}{{}{12}{Project Constraints}{section*.11}{}}
\abx@aux@segm{0}{0}{3} \abx@aux@segm{0}{0}{3}
\abx@aux@segm{0}{0}{1} \abx@aux@segm{0}{0}{1}
\abx@aux@cite{5} \abx@aux@cite{5}
\abx@aux@segm{0}{0}{5} \abx@aux@segm{0}{0}{5}
\abx@aux@cite{6} \abx@aux@cite{6}
\abx@aux@segm{0}{0}{6} \abx@aux@segm{0}{0}{6}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Literature Review}{12}{section*.12}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Literature Review}{13}{section*.12}}
\newlabel{literature}{{}{12}{Literature Review}{section*.12}{}} \newlabel{literature}{{}{13}{Literature Review}{section*.12}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Existing Tools}{12}{section*.13}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Existing Tools}{13}{section*.13}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Related research}{12}{section*.14}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Related research}{13}{section*.14}}
\abx@aux@cite{7} \abx@aux@cite{7}
\abx@aux@segm{0}{0}{7} \abx@aux@segm{0}{0}{7}
\abx@aux@cite{8} \abx@aux@cite{8}
\abx@aux@segm{0}{0}{8} \abx@aux@segm{0}{0}{8}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data Collection}{13}{section*.15}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data Collection}{14}{section*.15}}
\newlabel{tweet_collection}{{}{13}{Data Collection}{section*.15}{}} \newlabel{tweet_collection}{{}{14}{Data Collection}{section*.15}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Twitter and Twitter API}{13}{section*.16}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Twitter and Twitter API}{14}{section*.16}}
\abx@aux@cite{9} \abx@aux@cite{9}
\abx@aux@segm{0}{0}{9} \abx@aux@segm{0}{0}{9}
\abx@aux@segm{0}{0}{7} \abx@aux@segm{0}{0}{7}
@ -75,11 +75,11 @@
\abx@aux@segm{0}{0}{6} \abx@aux@segm{0}{0}{6}
\abx@aux@cite{10} \abx@aux@cite{10}
\abx@aux@segm{0}{0}{10} \abx@aux@segm{0}{0}{10}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Tweepy Python Package}{14}{section*.17}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Tweepy Python Package}{15}{section*.17}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{14}{section*.18}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{15}{section*.18}}
\newlabel{sentiment}{{}{14}{Sentiment Analysis}{section*.18}{}} \newlabel{sentiment}{{}{15}{Sentiment Analysis}{section*.18}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Natural Language Processing}{14}{section*.19}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Natural Language Processing}{15}{section*.19}}
\newlabel{algorithms}{{}{14}{Natural Language Processing}{section*.19}{}} \newlabel{algorithms}{{}{15}{Natural Language Processing}{section*.19}{}}
\abx@aux@cite{11} \abx@aux@cite{11}
\abx@aux@segm{0}{0}{11} \abx@aux@segm{0}{0}{11}
\abx@aux@cite{12} \abx@aux@cite{12}
@ -95,10 +95,10 @@
\abx@aux@segm{0}{0}{14} \abx@aux@segm{0}{0}{14}
\abx@aux@cite{15} \abx@aux@cite{15}
\abx@aux@segm{0}{0}{15} \abx@aux@segm{0}{0}{15}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{15}{section*.20}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{16}{section*.20}}
\newlabel{Vader}{{}{15}{Valence Aware Dictionary and sEntiment Reasoning}{section*.20}{}} \newlabel{Vader}{{}{16}{Valence Aware Dictionary and sEntiment Reasoning}{section*.20}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Networks}{15}{section*.21}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Networks}{16}{section*.21}}
\newlabel{networks}{{}{15}{Neural Networks}{section*.21}{}} \newlabel{networks}{{}{16}{Neural Networks}{section*.21}{}}
\abx@aux@cite{16} \abx@aux@cite{16}
\abx@aux@segm{0}{0}{16} \abx@aux@segm{0}{0}{16}
\abx@aux@segm{0}{0}{15} \abx@aux@segm{0}{0}{15}
@ -108,8 +108,8 @@
\abx@aux@segm{0}{0}{18} \abx@aux@segm{0}{0}{18}
\abx@aux@cite{19} \abx@aux@cite{19}
\abx@aux@segm{0}{0}{19} \abx@aux@segm{0}{0}{19}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{16}{section*.22}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{17}{section*.22}}
\newlabel{types}{{}{16}{Recurrent Neural Network (RNN)}{section*.22}{}} \newlabel{types}{{}{17}{Recurrent Neural Network (RNN)}{section*.22}{}}
\abx@aux@cite{20} \abx@aux@cite{20}
\abx@aux@segm{0}{0}{20} \abx@aux@segm{0}{0}{20}
\abx@aux@cite{21} \abx@aux@cite{21}
@ -119,8 +119,8 @@
\abx@aux@segm{0}{0}{22} \abx@aux@segm{0}{0}{22}
\abx@aux@cite{23} \abx@aux@cite{23}
\abx@aux@segm{0}{0}{23} \abx@aux@segm{0}{0}{23}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{17}{section*.23}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{18}{section*.23}}
\newlabel{lstms}{{}{17}{Long-Short Term Memory (LSTM)}{section*.23}{}} \newlabel{lstms}{{}{18}{Long-Short Term Memory (LSTM)}{section*.23}{}}
\abx@aux@segm{0}{0}{23} \abx@aux@segm{0}{0}{23}
\abx@aux@segm{0}{0}{22} \abx@aux@segm{0}{0}{22}
\abx@aux@segm{0}{0}{23} \abx@aux@segm{0}{0}{23}
@ -132,7 +132,7 @@
\abx@aux@segm{0}{0}{25} \abx@aux@segm{0}{0}{25}
\abx@aux@cite{26} \abx@aux@cite{26}
\abx@aux@segm{0}{0}{26} \abx@aux@segm{0}{0}{26}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Keras and TensorFlow}{18}{section*.24}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Keras and TensorFlow}{19}{section*.24}}
\abx@aux@cite{27} \abx@aux@cite{27}
\abx@aux@segm{0}{0}{27} \abx@aux@segm{0}{0}{27}
\abx@aux@cite{28} \abx@aux@cite{28}
@ -143,60 +143,77 @@
\abx@aux@segm{0}{0}{29} \abx@aux@segm{0}{0}{29}
\abx@aux@cite{30} \abx@aux@cite{30}
\abx@aux@segm{0}{0}{30} \abx@aux@segm{0}{0}{30}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Optimisers}{19}{section*.25}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Optimisers}{20}{section*.25}}
\abx@aux@cite{31} \abx@aux@cite{31}
\abx@aux@segm{0}{0}{31} \abx@aux@segm{0}{0}{31}
\abx@aux@segm{0}{0}{30} \abx@aux@segm{0}{0}{30}
\abx@aux@cite{32} \abx@aux@cite{32}
\abx@aux@segm{0}{0}{32} \abx@aux@segm{0}{0}{32}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Machine Learning}{20}{section*.26}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Machine Learning}{21}{section*.26}}
\newlabel{machine}{{}{20}{Machine Learning}{section*.26}{}} \newlabel{machine}{{}{21}{Machine Learning}{section*.26}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Naive Bayes}{20}{section*.27}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Naive Bayes}{21}{section*.27}}
\abx@aux@segm{0}{0}{32} \abx@aux@segm{0}{0}{32}
\abx@aux@cite{33} \abx@aux@cite{33}
\abx@aux@segm{0}{0}{33} \abx@aux@segm{0}{0}{33}
\abx@aux@segm{0}{0}{8} \abx@aux@segm{0}{0}{8}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Solution Approach}{22}{section*.28}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Solution Approach}{23}{section*.28}}
\newlabel{solution}{{}{22}{Solution Approach}{section*.28}{}} \newlabel{solution}{{}{23}{Solution Approach}{section*.28}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data gathering}{22}{section*.29}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data gathering}{23}{section*.29}}
\abx@aux@segm{0}{0}{23} \abx@aux@segm{0}{0}{12}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Spam Filtering}{23}{section*.30}} \abx@aux@segm{0}{0}{12}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Language Detection}{23}{section*.31}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data pre-processing}{24}{section*.30}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Solution Summary}{23}{section*.32}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Spam Filtering}{24}{section*.31}}
\newlabel{sumary}{{}{23}{Solution Summary}{section*.32}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Language Detection}{24}{section*.32}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Requirements}{23}{section*.33}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{24}{section*.33}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data flow Overview}{23}{section*.34}} \abx@aux@segm{0}{0}{12}
\newlabel{data-flow}{{}{23}{Data flow Overview}{section*.34}{}} \abx@aux@segm{0}{0}{11}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Packages, Tools and Techniques}{23}{section*.35}} \abx@aux@segm{0}{0}{22}
\newlabel{tools}{{}{23}{Packages, Tools and Techniques}{section*.35}{}} \abx@aux@segm{0}{0}{5}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{System Design and Implementation}{24}{section*.36}} \abx@aux@segm{0}{0}{25}
\newlabel{implementation}{{}{24}{System Design and Implementation}{section*.36}{}} \abx@aux@segm{0}{0}{22}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data collection}{24}{section*.37}} \abx@aux@segm{0}{0}{25}
\newlabel{collection}{{}{24}{Data collection}{section*.37}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Network}{25}{section*.34}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Price Time-series Data}{24}{section*.38}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Price Forecasting}{26}{section*.35}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data processing}{24}{section*.39}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Frontend Application}{26}{section*.36}}
\newlabel{processing}{{}{24}{Data processing}{section*.39}{}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{With reference to Initial PID}{26}{section*.37}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Preprocessing}{24}{section*.40}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Solution Summary}{27}{section*.38}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Filtering}{24}{section*.41}} \newlabel{summary}{{}{27}{Solution Summary}{section*.38}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Text Cleaning}{24}{section*.42}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data flow Overview}{27}{section*.39}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Ngram based Language detection filtering}{24}{section*.43}} \newlabel{data-flow}{{}{27}{Data flow Overview}{section*.39}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Spam Filtering}{24}{section*.44}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{System Design}{28}{section*.40}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Processing}{24}{section*.45}} \newlabel{Design}{{}{28}{System Design}{section*.40}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Naive Bayes model}{24}{section*.46}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Dataflow Designs}{28}{section*.41}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{24}{section*.47}} \abx@aux@segm{0}{0}{12}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{VADER}{24}{section*.48}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{UML Component Design}{34}{section*.42}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Recurrent Neural Network - LSTM}{24}{section*.49}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Interface Design}{34}{section*.43}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Training and Testing Model}{24}{section*.50}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Implementation}{35}{section*.44}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Scoring and Validation}{24}{section*.51}} \newlabel{implementation}{{}{35}{Implementation}{section*.44}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Future Prediction Forecasting}{24}{section*.52}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data collection}{35}{section*.45}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Testing: Verification and Reflection}{25}{section*.53}} \newlabel{collection}{{}{35}{Data collection}{section*.45}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Discussion: Contribution and Reflection}{26}{section*.54}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Price Time-series Data}{35}{section*.46}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Limitations}{26}{section*.55}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data processing}{35}{section*.47}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Conclusion and Future Improvements}{27}{section*.56}} \newlabel{processing}{{}{35}{Data processing}{section*.47}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Conclusion}{27}{section*.57}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Preprocessing}{35}{section*.48}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Future Improvements}{27}{section*.58}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Filtering}{35}{section*.49}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Appendices}{31}{section*.60}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Text Cleaning}{35}{section*.50}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix A - Project Initiation Document}{31}{section*.61}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Ngram based Language detection filtering}{35}{section*.51}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Spam Filtering}{35}{section*.52}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Processing}{35}{section*.53}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Naive Bayes model}{35}{section*.54}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{35}{section*.55}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{VADER}{35}{section*.56}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Recurrent Neural Network - LSTM}{35}{section*.57}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Training and Testing Model}{35}{section*.58}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Scoring and Validation}{35}{section*.59}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Future Prediction Forecasting}{35}{section*.60}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Testing: Verification and Reflection}{36}{section*.61}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Discussion: Contribution and Reflection}{37}{section*.62}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Limitations}{37}{section*.63}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Conclusion and Future Improvements}{38}{section*.64}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Conclusion}{38}{section*.65}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Future Improvements}{38}{section*.66}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Appendices}{42}{section*.68}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix A - Project Initiation Document}{42}{section*.69}}
\abx@aux@refcontextdefaultsdone \abx@aux@refcontextdefaultsdone
\abx@aux@defaultrefcontext{0}{1}{none/global//global/global} \abx@aux@defaultrefcontext{0}{1}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{2}{none/global//global/global} \abx@aux@defaultrefcontext{0}{2}{none/global//global/global}
@ -231,4 +248,4 @@
\abx@aux@defaultrefcontext{0}{31}{none/global//global/global} \abx@aux@defaultrefcontext{0}{31}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{32}{none/global//global/global} \abx@aux@defaultrefcontext{0}{32}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{33}{none/global//global/global} \abx@aux@defaultrefcontext{0}{33}{none/global//global/global}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix B - Log book}{44}{section*.62}} \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix B - Log book}{55}{section*.70}}

View File

@ -1981,7 +1981,16 @@
<bcf:citekey order="55">32</bcf:citekey> <bcf:citekey order="55">32</bcf:citekey>
<bcf:citekey order="56">33</bcf:citekey> <bcf:citekey order="56">33</bcf:citekey>
<bcf:citekey order="57">8</bcf:citekey> <bcf:citekey order="57">8</bcf:citekey>
<bcf:citekey order="58">23</bcf:citekey> <bcf:citekey order="58">12</bcf:citekey>
<bcf:citekey order="59">12</bcf:citekey>
<bcf:citekey order="60">12</bcf:citekey>
<bcf:citekey order="61">11</bcf:citekey>
<bcf:citekey order="62">22</bcf:citekey>
<bcf:citekey order="63">5</bcf:citekey>
<bcf:citekey order="64">25</bcf:citekey>
<bcf:citekey order="65">22</bcf:citekey>
<bcf:citekey order="66">25</bcf:citekey>
<bcf:citekey order="67">12</bcf:citekey>
<bcf:citekey order="0">*</bcf:citekey> <bcf:citekey order="0">*</bcf:citekey>
</bcf:section> </bcf:section>
<!-- SORTING TEMPLATES --> <!-- SORTING TEMPLATES -->

View File

@ -1,20 +1,20 @@
[0] Config.pm:302> INFO - This is Biber 2.9 [0] Config.pm:302> INFO - This is Biber 2.9
[0] Config.pm:305> INFO - Logfile is 'document.blg' [0] Config.pm:305> INFO - Logfile is 'document.blg'
[20] biber:313> INFO - === Tue Apr 23, 2019, 21:27:28 [20] biber:313> INFO - === Wed Apr 24, 2019, 16:48:01
[37] Biber.pm:371> INFO - Reading 'document.bcf' [35] Biber.pm:371> INFO - Reading 'document.bcf'
[87] Biber.pm:854> INFO - Using all citekeys in bib section 0 [84] Biber.pm:854> INFO - Using all citekeys in bib section 0
[97] Biber.pm:3981> INFO - Processing section 0 [95] Biber.pm:3981> INFO - Processing section 0
[106] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0 [103] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0
[108] bibtex.pm:1468> INFO - LaTeX decoding ... [105] bibtex.pm:1468> INFO - LaTeX decoding ...
[139] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib' [134] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib'
[203] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly. [184] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name
[245] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name [235] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly.
[260] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing) [249] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing)
[260] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing) [249] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing)
[279] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable' [268] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
[280] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized' [268] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
[280] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US' [268] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US'
[280] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US' [268] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US'
[300] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii' [288] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii'
[312] bbl.pm:720> INFO - Output to document.bbl [299] bbl.pm:720> INFO - Output to document.bbl
[312] Biber.pm:110> INFO - WARNINGS: 4 [300] Biber.pm:110> INFO - WARNINGS: 4

View File

@ -1,4 +1,4 @@
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 23 APR 2019 21:45 This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 24 APR 2019 19:04
entering extended mode entering extended mode
restricted \write18 enabled. restricted \write18 enabled.
%&-line parsing enabled. %&-line parsing enabled.
@ -945,7 +945,7 @@ LaTeX Info: Redefining \nameref on input line 63.
\@outlinefile=\write5 \@outlinefile=\write5
\openout5 = `document.out'. \openout5 = `document.out'.
<images/reading_logo.png, id=225, 504.88625pt x 183.68625pt> <images/reading_logo.png, id=257, 504.88625pt x 183.68625pt>
File: images/reading_logo.png Graphic file (type png) File: images/reading_logo.png Graphic file (type png)
<use images/reading_logo.png> <use images/reading_logo.png>
Package pdftex.def Info: images/reading_logo.png used on input line 66. Package pdftex.def Info: images/reading_logo.png used on input line 66.
@ -963,11 +963,11 @@ File: umsb.fd 2013/01/14 v3.01 AMS symbols B
{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/reading_logo.png> {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/reading_logo.png>
] [2] [3] ] [2] [3]
(./document.toc [4]) (./document.toc [4] [5])
\tf@toc=\write6 \tf@toc=\write6
\openout6 = `document.toc'. \openout6 = `document.toc'.
[5] [6] [7] [6] [7] [8]
LaTeX Font Info: Try loading font information for OMS+cmr on input line 162. LaTeX Font Info: Try loading font information for OMS+cmr on input line 162.
@ -976,7 +976,7 @@ File: omscmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
) )
LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 162. (Font) Font shape `OMS/cmsy/m/n' tried instead on input line 162.
[8] [9] [9] [10]
Underfull \hbox (badness 10000) in paragraph at lines 178--180 Underfull \hbox (badness 10000) in paragraph at lines 178--180
[] []
@ -999,103 +999,168 @@ Underfull \hbox (badness 10000) in paragraph at lines 207--209
[] []
[10] [11]
Underfull \hbox (badness 10000) in paragraph at lines 216--218 Underfull \hbox (badness 10000) in paragraph at lines 216--218
[] []
[11] [12] [13] [14] [12] [13] [14] [15]
Missing character: There is no â in font cmr10! Missing character: There is no â in font cmr10!
Missing character: There is no € in font cmr10! Missing character: There is no € in font cmr10!
Missing character: There is no ™ in font cmr10! Missing character: There is no ™ in font cmr10!
<images/perceptron.png, id=390, 706.64pt x 392.46625pt> [16]
<images/perceptron.png, id=441, 706.64pt x 392.46625pt>
File: images/perceptron.png Graphic file (type png) File: images/perceptron.png Graphic file (type png)
<use images/perceptron.png> <use images/perceptron.png>
Package pdftex.def Info: images/perceptron.png used on input line 302. Package pdftex.def Info: images/perceptron.png used on input line 308.
(pdftex.def) Requested size: 284.52713pt x 170.72142pt. (pdftex.def) Requested size: 284.52713pt x 170.72142pt.
[15] <images/rnn_ffn.png, id=447, 844.15375pt x 342.27875pt>
<images/rnn_ffn.png, id=405, 844.15375pt x 342.27875pt>
File: images/rnn_ffn.png Graphic file (type png) File: images/rnn_ffn.png Graphic file (type png)
<use images/rnn_ffn.png> <use images/rnn_ffn.png>
Package pdftex.def Info: images/rnn_ffn.png used on input line 322. Package pdftex.def Info: images/rnn_ffn.png used on input line 328.
(pdftex.def) Requested size: 426.80307pt x 170.72112pt. (pdftex.def) Requested size: 426.80307pt x 170.72112pt.
[16 <./images/perceptron.png>] [17 <./images/perceptron.png>]
<images/lstm.png, id=419, 1160.335pt x 1029.8475pt> <images/lstm.png, id=463, 1160.335pt x 1029.8475pt>
File: images/lstm.png Graphic file (type png) File: images/lstm.png Graphic file (type png)
<use images/lstm.png> <use images/lstm.png>
Package pdftex.def Info: images/lstm.png used on input line 340. Package pdftex.def Info: images/lstm.png used on input line 346.
(pdftex.def) Requested size: 256.07123pt x 199.1616pt. (pdftex.def) Requested size: 256.07123pt x 199.1616pt.
[17 <./images/rnn_ffn.png>] [18 <./images/lstm.png>] [18 <./images/rnn_ffn.png>] [19 <./images/lstm.png>]
Missing character: There is no â in font cmr10! Missing character: There is no â in font cmr10!
Missing character: There is no € in font cmr10! Missing character: There is no € in font cmr10!
Missing character: There is no ™ in font cmr10! Missing character: There is no ™ in font cmr10!
[19] [20]
Missing character: There is no â in font cmr10! Missing character: There is no â in font cmr10!
Missing character: There is no € in font cmr10! Missing character: There is no € in font cmr10!
Missing character: There is no ™ in font cmr10! Missing character: There is no ™ in font cmr10!
[20] [21] [21] [22]
Underfull \hbox (badness 10000) in paragraph at lines 450--452 Underfull \hbox (badness 10000) in paragraph at lines 456--458
[] []
Underfull \hbox (badness 10000) in paragraph at lines 454--456 Underfull \hbox (badness 10000) in paragraph at lines 460--462
[] []
Underfull \hbox (badness 10000) in paragraph at lines 457--461 Underfull \hbox (badness 10000) in paragraph at lines 463--467
[] []
Underfull \hbox (badness 10000) in paragraph at lines 462--464 Underfull \hbox (badness 10000) in paragraph at lines 468--470
[] []
Underfull \hbox (badness 10000) in paragraph at lines 465--469 Underfull \hbox (badness 10000) in paragraph at lines 471--475
[] []
[22] [23]
<images/Generic_Flow.png, id=489, 1900.09875pt x 529.98pt> LaTeX Font Info: Font shape `OMS/cmr/m/it' in size <10> not available
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 488.
[24] [25] [26]
<images/Generic_Flow.png, id=555, 1900.09875pt x 529.98pt>
File: images/Generic_Flow.png Graphic file (type png) File: images/Generic_Flow.png Graphic file (type png)
<use images/Generic_Flow.png> <use images/Generic_Flow.png>
Package pdftex.def Info: images/Generic_Flow.png used on input line 489. Package pdftex.def Info: images/Generic_Flow.png used on input line 565.
(pdftex.def) Requested size: 483.69247pt x 227.62009pt. (pdftex.def) Requested size: 512.13474pt x 227.62009pt.
Overfull \hbox (42.84502pt too wide) in paragraph at lines 489--492 Overfull \hbox (71.28728pt too wide) in paragraph at lines 565--566
[][]
[]
[27 <./images/Generic_Flow.png (PNG copy)>]
Underfull \hbox (badness 10000) in paragraph at lines 576--578
[]
<images/Dataflow.png, id=560, 3767.07375pt x 1526.70375pt>
File: images/Dataflow.png Graphic file (type png)
<use images/Dataflow.png>
Package pdftex.def Info: images/Dataflow.png used on input line 581.
(pdftex.def) Requested size: 512.09683pt x 227.62125pt.
Overfull \hbox (71.24937pt too wide) in paragraph at lines 581--583
[] []
[] []
[23 <./images/Generic_Flow.png (PNG copy)>] [24] [25] [26] [27] [28 <./images/Dataflow.png (PNG copy)>]
Overfull \hbox (1.46323pt too wide) in paragraph at lines 548--548 <images/Data_Collector.png, id=565, 1152.305pt x 647.41875pt>
File: images/Data_Collector.png Graphic file (type png)
<use images/Data_Collector.png>
Package pdftex.def Info: images/Data_Collector.png used on input line 589.
(pdftex.def) Requested size: 426.78574pt x 227.61746pt.
[29 <./images/Data_Collector.png (PNG copy)>]
<images/Analysis_Engine.png, id=570, 1658.195pt x 719.68875pt>
File: images/Analysis_Engine.png Graphic file (type png)
<use images/Analysis_Engine.png>
Package pdftex.def Info: images/Analysis_Engine.png used on input line 604.
(pdftex.def) Requested size: 483.67276pt x 227.62561pt.
Overfull \hbox (42.8253pt too wide) in paragraph at lines 604--606
[]
[]
[30 <./images/Analysis_Engine.png (PNG copy)>]
<images/Neural_Network.png, id=576, 1502.61375pt x 1032.85875pt>
File: images/Neural_Network.png Graphic file (type png)
<use images/Neural_Network.png>
Package pdftex.def Info: images/Neural_Network.png used on input line 620.
(pdftex.def) Requested size: 483.6893pt x 341.42757pt.
Overfull \hbox (42.84184pt too wide) in paragraph at lines 620--622
[]
[]
[31 <./images/Neural_Network.png (PNG copy)>]
<images/Future_Predictions.png, id=582, 1596.96625pt x 490.83376pt>
File: images/Future_Predictions.png Graphic file (type png)
<use images/Future_Predictions.png>
Package pdftex.def Info: images/Future_Predictions.png used on input line 634.
(pdftex.def) Requested size: 512.1362pt x 227.62119pt.
Overfull \hbox (71.28874pt too wide) in paragraph at lines 634--636
[]
[]
[32 <./images/Future_Predictions.png (PNG copy)>]
<images/Frontend_Application.png, id=587, 804.00375pt x 599.23875pt>
File: images/Frontend_Application.png Graphic file (type png)
<use images/Frontend_Application.png>
Package pdftex.def Info: images/Frontend_Application.png used on input line 64
8.
(pdftex.def) Requested size: 284.52162pt x 256.07664pt.
[33 <./images/Frontend_Application.png (PNG copy)>] [34] [35] [36] [37] [38]
Overfull \hbox (1.46323pt too wide) in paragraph at lines 717--717
\OT1/cmr/m/n/10 [On-line]. Avail-able: []$\OT1/cmtt/m/n/10 https : / / www . co \OT1/cmr/m/n/10 [On-line]. Avail-able: []$\OT1/cmtt/m/n/10 https : / / www . co
deproject . com / Articles / 1201444 / Stock -[] Predictions -[] deproject . com / Articles / 1201444 / Stock -[] Predictions -[]
[] []
[28] [39]
Overfull \hbox (22.26572pt too wide) in paragraph at lines 548--548 Overfull \hbox (22.26572pt too wide) in paragraph at lines 717--717
\OT1/cmr/m/n/10 able: []$\OT1/cmtt/m/n/10 https : / / towardsdatascience . com \OT1/cmr/m/n/10 able: []$\OT1/cmtt/m/n/10 https : / / towardsdatascience . com
/ recurrent -[] neural -[] networks -[] and -[] lstm -[] 4b601dd822a5$[]\OT1/cm / recurrent -[] neural -[] networks -[] and -[] lstm -[] 4b601dd822a5$[]\OT1/cm
r/m/n/10 . r/m/n/10 .
[] []
Overfull \hbox (59.98181pt too wide) in paragraph at lines 548--548 Overfull \hbox (59.98181pt too wide) in paragraph at lines 717--717
[]$\OT1/cmtt/m/n/10 http : / / deeplearning . stanford . edu / tutorial / super []$\OT1/cmtt/m/n/10 http : / / deeplearning . stanford . edu / tutorial / super
vised / OptimizationStochasticGradientDescent$[]\OT1/cmr/m/n/10 . vised / OptimizationStochasticGradientDescent$[]\OT1/cmr/m/n/10 .
[] []
[29] [30] [40] [41]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=587, 597.55246pt x 845.07718pt> <PID.pdf, id=689, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf> <use PID.pdf>
Package pdftex.def Info: PID.pdf used on input line 553. Package pdftex.def Info: PID.pdf used on input line 722.
(pdftex.def) Requested size: 597.551pt x 845.07512pt. (pdftex.def) Requested size: 597.551pt x 845.07512pt.
@ -1103,7 +1168,7 @@ pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed rsion <1.7>, but at most version <1.5> allowed
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf> <use PID.pdf>
Package pdftex.def Info: PID.pdf used on input line 553. Package pdftex.def Info: PID.pdf used on input line 722.
(pdftex.def) Requested size: 597.551pt x 845.07512pt. (pdftex.def) Requested size: 597.551pt x 845.07512pt.
@ -1113,233 +1178,233 @@ rsion <1.7>, but at most version <1.5> allowed
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=590, page=1, 597.55246pt x 845.07718pt> <PID.pdf, id=692, page=1, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 1> <use PID.pdf, page 1>
Package pdftex.def Info: PID.pdf , page1 used on input line 553. Package pdftex.def Info: PID.pdf , page1 used on input line 722.
(pdftex.def) Requested size: 597.551pt x 845.07512pt. (pdftex.def) Requested size: 597.551pt x 845.07512pt.
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 1> <use PID.pdf, page 1>
Package pdftex.def Info: PID.pdf , page1 used on input line 553. Package pdftex.def Info: PID.pdf , page1 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt. (pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[31] [42]
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 1> <use PID.pdf, page 1>
Package pdftex.def Info: PID.pdf , page1 used on input line 553. Package pdftex.def Info: PID.pdf , page1 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt. (pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 1> <use PID.pdf, page 1>
Package pdftex.def Info: PID.pdf , page1 used on input line 553. Package pdftex.def Info: PID.pdf , page1 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt. (pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf) File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 1> <use PID.pdf, page 1>
Package pdftex.def Info: PID.pdf , page1 used on input line 553. Package pdftex.def Info: PID.pdf , page1 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt. (pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[32 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=621, page=2, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[33 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=627, page=3, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[34 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=642, page=4, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[35 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=648, page=5, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[36 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=654, page=6, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[37 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=661, page=7, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 553.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[38 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=667, page=8, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[39 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=676, page=9, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[40 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=686, page=10, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[41 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=698, page=11, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[42 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=704, page=12, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 553.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[43 <./PID.pdf>] [43 <./PID.pdf>]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 557.
[44] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
Package atveryend Info: Empty hook `AfterLastShipout' on input line 557. rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=723, page=2, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 2>
Package pdftex.def Info: PID.pdf , page2 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[44 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=729, page=3, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 3>
Package pdftex.def Info: PID.pdf , page3 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[45 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=743, page=4, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 4>
Package pdftex.def Info: PID.pdf , page4 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[46 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=749, page=5, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 5>
Package pdftex.def Info: PID.pdf , page5 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[47 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=755, page=6, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 6>
Package pdftex.def Info: PID.pdf , page6 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[48 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=761, page=7, 597.55246pt x 845.07718pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 7>
Package pdftex.def Info: PID.pdf , page7 used on input line 722.
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
[49 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=768, page=8, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 8>
Package pdftex.def Info: PID.pdf , page8 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[50 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=778, page=9, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 9>
Package pdftex.def Info: PID.pdf , page9 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[51 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=788, page=10, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 10>
Package pdftex.def Info: PID.pdf , page10 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[52 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=800, page=11, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 11>
Package pdftex.def Info: PID.pdf , page11 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[53 <./PID.pdf>]
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
rsion <1.7>, but at most version <1.5> allowed
<PID.pdf, id=806, page=12, 845.07718pt x 597.55246pt>
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
File: PID.pdf Graphic file (type pdf)
<use PID.pdf, page 12>
Package pdftex.def Info: PID.pdf , page12 used on input line 722.
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
[54 <./PID.pdf>]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 726.
[55]
Package atveryend Info: Empty hook `AfterLastShipout' on input line 726.
(./document.aux) (./document.aux)
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 557. Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 726.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 557. Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 726.
Package rerunfilecheck Info: File `document.out' has not changed. Package rerunfilecheck Info: File `document.out' has not changed.
(rerunfilecheck) Checksum: E74EE3B61BD2C1BC2632352EB7304668;8659. (rerunfilecheck) Checksum: 2D9E87FEC6D93F9F3501E4506AD8AD84;9738.
Package logreq Info: Writing requests to 'document.run.xml'. Package logreq Info: Writing requests to 'document.run.xml'.
\openout1 = `document.run.xml'. \openout1 = `document.run.xml'.
) )
Here is how much of TeX's memory you used: Here is how much of TeX's memory you used:
22477 strings out of 492982 22537 strings out of 492982
355812 string characters out of 6134895 356980 string characters out of 6134895
846942 words of memory out of 5000000 847942 words of memory out of 5000000
25578 multiletter control sequences out of 15000+600000 25613 multiletter control sequences out of 15000+600000
12017 words of font info for 47 fonts, out of 8000000 for 9000 12017 words of font info for 47 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191 1141 hyphenation exceptions out of 8191
45i,18n,67p,2008b,1302s stack positions out of 5000i,500n,10000p,200000b,80000s 45i,18n,67p,2008b,1302s stack positions out of 5000i,500n,10000p,200000b,80000s
@ -1356,10 +1421,10 @@ amsfonts/cm/cmr7.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts
sy5.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb sy5.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb
></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb></usr ></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb></usr
/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb> /share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb>
Output written on document.pdf (44 pages, 648007 bytes). Output written on document.pdf (55 pages, 1317438 bytes).
PDF statistics: PDF statistics:
794 PDF objects out of 1000 (max. 8388607) 900 PDF objects out of 1000 (max. 8388607)
698 compressed objects within 7 object streams 786 compressed objects within 8 object streams
140 named destinations out of 1000 (max. 500000) 159 named destinations out of 1000 (max. 500000)
541 words of extra memory for PDF output out of 10000 (max. 10000000) 635 words of extra memory for PDF output out of 10000 (max. 10000000)

View File

@ -26,30 +26,38 @@
\BOOKMARK [3][-]{section*.27}{\376\377\000N\000a\000i\000v\000e\000\040\000B\000a\000y\000e\000s}{section*.26}% 26 \BOOKMARK [3][-]{section*.27}{\376\377\000N\000a\000i\000v\000e\000\040\000B\000a\000y\000e\000s}{section*.26}% 26
\BOOKMARK [1][-]{section*.28}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000A\000p\000p\000r\000o\000a\000c\000h}{}% 27 \BOOKMARK [1][-]{section*.28}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000A\000p\000p\000r\000o\000a\000c\000h}{}% 27
\BOOKMARK [2][-]{section*.29}{\376\377\000D\000a\000t\000a\000\040\000g\000a\000t\000h\000e\000r\000i\000n\000g}{section*.28}% 28 \BOOKMARK [2][-]{section*.29}{\376\377\000D\000a\000t\000a\000\040\000g\000a\000t\000h\000e\000r\000i\000n\000g}{section*.28}% 28
\BOOKMARK [2][-]{section*.30}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.28}% 29 \BOOKMARK [2][-]{section*.30}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000e\000-\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.28}% 29
\BOOKMARK [2][-]{section*.31}{\376\377\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000D\000e\000t\000e\000c\000t\000i\000o\000n}{section*.28}% 30 \BOOKMARK [2][-]{section*.31}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.28}% 30
\BOOKMARK [2][-]{section*.32}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000S\000u\000m\000m\000a\000r\000y}{section*.28}% 31 \BOOKMARK [2][-]{section*.32}{\376\377\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000D\000e\000t\000e\000c\000t\000i\000o\000n}{section*.28}% 31
\BOOKMARK [2][-]{section*.33}{\376\377\000R\000e\000q\000u\000i\000r\000e\000m\000e\000n\000t\000s}{section*.28}% 32 \BOOKMARK [2][-]{section*.33}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.28}% 32
\BOOKMARK [2][-]{section*.34}{\376\377\000D\000a\000t\000a\000\040\000f\000l\000o\000w\000\040\000O\000v\000e\000r\000v\000i\000e\000w}{section*.28}% 33 \BOOKMARK [2][-]{section*.34}{\376\377\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k}{section*.28}% 33
\BOOKMARK [2][-]{section*.35}{\376\377\000P\000a\000c\000k\000a\000g\000e\000s\000,\000\040\000T\000o\000o\000l\000s\000\040\000a\000n\000d\000\040\000T\000e\000c\000h\000n\000i\000q\000u\000e\000s}{section*.28}% 34 \BOOKMARK [2][-]{section*.35}{\376\377\000P\000r\000i\000c\000e\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.28}% 34
\BOOKMARK [1][-]{section*.36}{\376\377\000S\000y\000s\000t\000e\000m\000\040\000D\000e\000s\000i\000g\000n\000\040\000a\000n\000d\000\040\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 35 \BOOKMARK [2][-]{section*.36}{\376\377\000F\000r\000o\000n\000t\000e\000n\000d\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n}{section*.28}% 35
\BOOKMARK [2][-]{section*.37}{\376\377\000D\000a\000t\000a\000\040\000c\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.36}% 36 \BOOKMARK [2][-]{section*.37}{\376\377\000W\000i\000t\000h\000\040\000r\000e\000f\000e\000r\000e\000n\000c\000e\000\040\000t\000o\000\040\000I\000n\000i\000t\000i\000a\000l\000\040\000P\000I\000D}{section*.28}% 36
\BOOKMARK [3][-]{section*.38}{\376\377\000P\000r\000i\000c\000e\000\040\000T\000i\000m\000e\000-\000s\000e\000r\000i\000e\000s\000\040\000D\000a\000t\000a}{section*.37}% 37 \BOOKMARK [2][-]{section*.38}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000S\000u\000m\000m\000a\000r\000y}{section*.28}% 37
\BOOKMARK [2][-]{section*.39}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.36}% 38 \BOOKMARK [2][-]{section*.39}{\376\377\000D\000a\000t\000a\000\040\000f\000l\000o\000w\000\040\000O\000v\000e\000r\000v\000i\000e\000w}{section*.28}% 38
\BOOKMARK [3][-]{section*.40}{\376\377\000P\000r\000e\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.39}% 39 \BOOKMARK [1][-]{section*.40}{\376\377\000S\000y\000s\000t\000e\000m\000\040\000D\000e\000s\000i\000g\000n}{}% 39
\BOOKMARK [3][-]{section*.44}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.39}% 40 \BOOKMARK [2][-]{section*.41}{\376\377\000D\000a\000t\000a\000f\000l\000o\000w\000\040\000D\000e\000s\000i\000g\000n\000s}{section*.40}% 40
\BOOKMARK [2][-]{section*.47}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.36}% 41 \BOOKMARK [2][-]{section*.42}{\376\377\000U\000M\000L\000\040\000C\000o\000m\000p\000o\000n\000e\000n\000t\000\040\000D\000e\000s\000i\000g\000n}{section*.40}% 41
\BOOKMARK [3][-]{section*.48}{\376\377\000V\000A\000D\000E\000R}{section*.47}% 42 \BOOKMARK [2][-]{section*.43}{\376\377\000I\000n\000t\000e\000r\000f\000a\000c\000e\000\040\000D\000e\000s\000i\000g\000n}{section*.40}% 42
\BOOKMARK [2][-]{section*.49}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000-\000\040\000L\000S\000T\000M}{section*.36}% 43 \BOOKMARK [1][-]{section*.44}{\376\377\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 43
\BOOKMARK [3][-]{section*.50}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{section*.49}% 44 \BOOKMARK [2][-]{section*.45}{\376\377\000D\000a\000t\000a\000\040\000c\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.44}% 44
\BOOKMARK [3][-]{section*.51}{\376\377\000S\000c\000o\000r\000i\000n\000g\000\040\000a\000n\000d\000\040\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n}{section*.49}% 45 \BOOKMARK [3][-]{section*.46}{\376\377\000P\000r\000i\000c\000e\000\040\000T\000i\000m\000e\000-\000s\000e\000r\000i\000e\000s\000\040\000D\000a\000t\000a}{section*.45}% 45
\BOOKMARK [3][-]{section*.52}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.49}% 46 \BOOKMARK [2][-]{section*.47}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.44}% 46
\BOOKMARK [1][-]{section*.53}{\376\377\000T\000e\000s\000t\000i\000n\000g\000:\000\040\000V\000e\000r\000i\000f\000i\000c\000a\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 47 \BOOKMARK [3][-]{section*.48}{\376\377\000P\000r\000e\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.47}% 47
\BOOKMARK [1][-]{section*.54}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 48 \BOOKMARK [3][-]{section*.52}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.47}% 48
\BOOKMARK [2][-]{section*.55}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section*.54}% 49 \BOOKMARK [2][-]{section*.55}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.44}% 49
\BOOKMARK [1][-]{section*.56}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 50 \BOOKMARK [3][-]{section*.56}{\376\377\000V\000A\000D\000E\000R}{section*.55}% 50
\BOOKMARK [2][-]{section*.57}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section*.56}% 51 \BOOKMARK [2][-]{section*.57}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000-\000\040\000L\000S\000T\000M}{section*.44}% 51
\BOOKMARK [2][-]{section*.58}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section*.56}% 52 \BOOKMARK [3][-]{section*.58}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{section*.57}% 52
\BOOKMARK [1][-]{section*.60}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 53 \BOOKMARK [3][-]{section*.59}{\376\377\000S\000c\000o\000r\000i\000n\000g\000\040\000a\000n\000d\000\040\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n}{section*.57}% 53
\BOOKMARK [2][-]{section*.61}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section*.60}% 54 \BOOKMARK [3][-]{section*.60}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.57}% 54
\BOOKMARK [2][-]{section*.62}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section*.60}% 55 \BOOKMARK [1][-]{section*.61}{\376\377\000T\000e\000s\000t\000i\000n\000g\000:\000\040\000V\000e\000r\000i\000f\000i\000c\000a\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 55
\BOOKMARK [1][-]{section*.62}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 56
\BOOKMARK [2][-]{section*.63}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section*.62}% 57
\BOOKMARK [1][-]{section*.64}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 58
\BOOKMARK [2][-]{section*.65}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section*.64}% 59
\BOOKMARK [2][-]{section*.66}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section*.64}% 60
\BOOKMARK [1][-]{section*.68}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 61
\BOOKMARK [2][-]{section*.69}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section*.68}% 62
\BOOKMARK [2][-]{section*.70}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section*.68}% 63

Binary file not shown.

Binary file not shown.

View File

@ -122,9 +122,9 @@
\begin{center} \begin{center}
\section{Introduction}\label{introduction} \section{Introduction}\label{introduction}
\end{center} \end{center}
The premise of this project is to investigate into whether the sentiment in social media has a correlation to the prices of cryptocurrencies and how this could be used to predict future changes in the price. The premise of this project is to investigate into whether the sentiment expressed in social media has a correlation to the prices of cryptocurrencies and how this could be used to predict future changes in the price.
The chosen cryptocurrency that will be focused in this project will be the currency that has the most community and backing and has been known to lead other fiat currencies, Bitcoin (BTC). Bitcoin is seen as one, if not the first cryptocurrency to bring a wider following to the peer-to-peer token transaction scene since 2009. Although it was not the first token to utilise blockchain technology, it allowed investors to openly trade a public cryptocurrency which provided pseudonymous means of transferring funds through the internet. Thus it has been around longer than most of the other fiat currencies and is the most popular crypto-token due to it's larger community base. The chosen cryptocurrency that will be of focus for this project will be the currency Bitcoin (BTC), due to having the largest community and backing and has been known to lead other fiat currencies. Bitcoin is seen as one, if not the first cryptocurrency to bring a wider following to the peer-to-peer token transaction scene since 2009. Although it was not the first token to utilise blockchain technology, it allowed investors to openly trade a public cryptocurrency which provided pseudonymous means of transferring funds through the internet. Thus it has been around longer than most of the other fiat currencies and is the most popular crypto-token due to it's larger community base.
Most financial commodities are subject to the whim of public confidence and are the core of it's base value. A platform that is frequently used for the public to convey their opinions on a commodity is that of Twitter which provides arguably biased information and opinions. Whether the opinions present a basis in facts or not, they are usually taken at face value and can influence the public opinion of given topics. As Bitcoin has been around since 2009 the opinions and information on the commodity are prevalent through the platform. Most financial commodities are subject to the whim of public confidence and are the core of it's base value. A platform that is frequently used for the public to convey their opinions on a commodity is that of Twitter which provides arguably biased information and opinions. Whether the opinions present a basis in facts or not, they are usually taken at face value and can influence the public opinion of given topics. As Bitcoin has been around since 2009 the opinions and information on the commodity are prevalent through the platform.
In the paper \textit{Sentiment Analysis of Twitter Data for Predicting Stock Market Movements} by \textit{Majhi et al.} \cite{1} 2.5 million tweets on Microsoft were extracted from Twitter, sentiment analysis and logistical regression performed on the data yielded 69.01\% accuracy for a 3-day period on the increase/decrease in stock price. These results showed a "\textit{good correlation between stock market movements and the sentiments of public expressed in Twitter}". In the paper \textit{Sentiment Analysis of Twitter Data for Predicting Stock Market Movements} by \textit{Majhi et al.} \cite{1} 2.5 million tweets on Microsoft were extracted from Twitter, sentiment analysis and logistical regression performed on the data yielded 69.01\% accuracy for a 3-day period on the increase/decrease in stock price. These results showed a "\textit{good correlation between stock market movements and the sentiments of public expressed in Twitter}".
@ -149,9 +149,9 @@
The key problems this project attempts to address are that of, an open-source system available to the public that aids in the analysis and prediction of BTC. The accuracy of open-source tools and technology when applied to the trading market scene and to identify whether there is a correlation between Twitter sentiment and BTC price fluctuation. While there are existing tools only a few are available to the public and only provide basic functionality, while others are kept in-house of major corporations who invest into this problem domain. The key problems this project attempts to address are that of, an open-source system available to the public that aids in the analysis and prediction of BTC. The accuracy of open-source tools and technology when applied to the trading market scene and to identify whether there is a correlation between Twitter sentiment and BTC price fluctuation. While there are existing tools only a few are available to the public and only provide basic functionality, while others are kept in-house of major corporations who invest into this problem domain.
The other issue presented here is that assuming perfect accuracy can be achieved is naive. As this project will only be using existing tools and technologies thus, there are limitations to accuracy that can be obtained. One of that being the suitability of the tools, there are no open-source sentiment analysers for stock market prediction, thus finding a specifically trained analyser for the chosen domain in highly unlikely. In relation, finding the most suitable machine learning or neural network is equally important as this will determine the accuracy of the predictions. Due to being a regression problem, machine learning techniques and neural networks that focus around this and forecasting should be considered. The other issue presented here is that assuming perfect accuracy can be achieved is naive. As this project will only be using existing tools and technologies thus, there are limitations to the accuracy of what can be obtained. One of that being the suitability of the tools, there are no open-source sentiment analysers for stock market prediction, thus finding a specifically trained analyser for the chosen domain in highly unlikely. In relation, finding the most suitable machine learning or neural network is equally important as this will determine the accuracy of the predictions. Due to being a regression problem, machine learning techniques and neural networks that focus around this and forecasting should be considered.
The accuracy and suitability of various machine learning methods and neural networks are a known issue in their respective domains, this investigation should be carried out to determine their suitability for their needed use in this project and should be detailed in the literature review. The accuracy and suitability of various machine learning methods and neural networks are a known issue in their respective domains, this investigation should be carried out to determine their suitability for their needed use in this project and will be detailed in the literature review.
This project will focus on the investigation of these technologies and tools to justify whether it is feasible to predict the price of BTC based on historical price and the sentiment gathered from Twitter. Limitations of the system and it's accuracy in predictions should be investigated and discussed to determine the implemented solution is the more suitable compared to other methods. This project will focus on the investigation of these technologies and tools to justify whether it is feasible to predict the price of BTC based on historical price and the sentiment gathered from Twitter. Limitations of the system and it's accuracy in predictions should be investigated and discussed to determine the implemented solution is the more suitable compared to other methods.
@ -204,7 +204,7 @@
\item To produce a neural netowork which accomplished the same as the other above, but with out use of sentiment \item To produce a neural netowork which accomplished the same as the other above, but with out use of sentiment
\item To produce metrics to justify accuracy of the model \item To produce metrics to justify accuracy of the model
\item To produce data files containing, the current time of predictions alongside current hour price and sentiment. This should also include a suggested action based on a threshold for the price difference between hours. \item To produce data files containing, the current time of predictions alongside current hour price and sentiment. This should also include a suggested action based on a threshold for the price difference between hours.
\item To produce JSON files containing the true and predicted price values of every hour for trained data, and another for current reoccuring predictions. \item To produce data files containing the true and predicted price values of every hour for trained data, and another for current reoccuring predictions.
\newline \newline
\end{itemize} \end{itemize}
@ -222,14 +222,19 @@
\item This system, both prediction system and interface, should be deployed to a server due to the need to be constantly running \item This system, both prediction system and interface, should be deployed to a server due to the need to be constantly running
\end{itemize} \end{itemize}
\subsection{Project Constraints}\label{constraints}
This project will not attempt to justify the accuracy of the chosen algorithm or tools over other algorithms. It will be discussed in the solution approach the justifications made on why the chosen algorithm and tools have been used for this project over the others, but accuracy will not be directly compared. This project will not attempt to justify the accuracy of the chosen algorithm or tools over other algorithms. It will be discussed in the solution approach the justifications made on why the chosen algorithm and tools have been used for this project over the others, but accuracy will not be directly compared.
This project will only be coded to predict an hour ahead as the model will be trained on an hourly basis as the data is gathered per hour. Predictions for further in the future can be modelled but will be seen as a future improvement to the system. This project will only be coded to predict an hour ahead as the model will be trained on an hourly basis as the data is gathered per hour. Predictions for further in the future can be modelled but will be seen as a future improvement to the system.
The detail of a interface may be subject of change through this project due to time contraints and the focus being the investigation of the impact social media has on market predictions. The detail of a interface may be subject of change through this project due to time contraints and the focus being the investigation of the impact social media has on market predictions.
\subsection{Project Constraints}\label{constraints}
The following constraints are recognisted in this project
\begin{itemize}
\item ...
\end{itemize}
\newpage \newpage
@ -276,7 +281,7 @@
In short, sentiment analysis is the process and discovery of computationally identifying and categorising the underlining opinions and subjectivity expressed in written language. This process determines the writer's attitude towards a particular topic as either being positive, neutral or negative in terms of opinion, known as polarity classification. In short, sentiment analysis is the process and discovery of computationally identifying and categorising the underlining opinions and subjectivity expressed in written language. This process determines the writer's attitude towards a particular topic as either being positive, neutral or negative in terms of opinion, known as polarity classification.
\subsubsection{Natural Language Processing}\label{algorithms} \subsubsection{Natural Language Processing}\label{algorithms}
Polarity classification is the focus of sentiment analysis and is a well-known problem in natural language processing that has had significant attention by researchers in recent years \cite{1}\cite{3}\cite{6}\cite{10}. Traditional approaches to this have usually been classified to dictionary-based approaches that use a pre-constructed sentiment lexicons such as VADER or usually confined to machine learning approaches. The later requires an extensive amount of natural language pre-processing to extrapolate vectors and features from given text, this is then fed into a machine learning classifier which attempts to categorise words to a level of sentiment polarity. Natural language pre-processing techniques that would be required for this approach would consist of; Polarity classification is the focus of sentiment analysis and is a well-known problem in natural language processing that has had significant attention by researchers in recent years \cite{1}\cite{3}\cite{6}\cite{10}. Traditional approaches to this have usually been classified to dictionary-based approaches that use a pre-constructed sentiment lexicons such as VADER or usually confined to machine learning approaches. The later requires an extensive amount of natural language pre-processing to extrapolate vectors and features from given text, this is then fed into a machine learning classifier which attempts to categorise words to a level of sentiment polarity. Natural language pre-processing techniques, supported by the NLTK (Natural Language Toolkit) python package , that would be required for this approach would consist of;
\begin{itemize} \begin{itemize}
\item Tokenisation: The act of splitting a stream of text into smaller units of typographical tokens which isolate unneeded punctuation. \item Tokenisation: The act of splitting a stream of text into smaller units of typographical tokens which isolate unneeded punctuation.
@ -284,6 +289,7 @@
\item Stopword removal: Are commonly used words (such as "the","in","a") that provide no meaning to the sentiment of a given text \item Stopword removal: Are commonly used words (such as "the","in","a") that provide no meaning to the sentiment of a given text
\item Stemming: Is used to replace words with common suffixes and prefixes, as in "go" and "goes" fundamentally convey the same meaning. A stemmer will replace such words with their reduced counterparts \item Stemming: Is used to replace words with common suffixes and prefixes, as in "go" and "goes" fundamentally convey the same meaning. A stemmer will replace such words with their reduced counterparts
\item Term Probability Identification and Feature Extraction: This is a process that involves identifying the most frequently used words in a given text, by using a probability type approach on a pre-defined dataset which classifies a range of texts as with overall negative or positive a machine learning algorithm is trained to classify these accordingly. \item Term Probability Identification and Feature Extraction: This is a process that involves identifying the most frequently used words in a given text, by using a probability type approach on a pre-defined dataset which classifies a range of texts as with overall negative or positive a machine learning algorithm is trained to classify these accordingly.
\item Ngrams: ...
\end{itemize} \end{itemize}
The former, seen and has been proven to provide higher accuracy than traditional machine learning approaches \cite{11}, and need little pre-proeccesing conducted on the data as words have a pre-defined sentiment classification in a provided lexicon. Although these lexicons can be complex to create, they generally require little resources to use and add to. The former, seen and has been proven to provide higher accuracy than traditional machine learning approaches \cite{11}, and need little pre-proeccesing conducted on the data as words have a pre-defined sentiment classification in a provided lexicon. Although these lexicons can be complex to create, they generally require little resources to use and add to.
@ -447,7 +453,7 @@
\begin{center} \begin{center}
\section{Solution Approach}\label{solution} \section{Solution Approach}\label{solution}
\end{center} \end{center}
This section will outline the solution intended to solve the problem that the problem statement identifies with justification and reference to the research conducted in the litrature review. This will lay out the development process for the project and will tools and technologies will be explained for the particular use case in this project. This section will outline the solution intended to solve the problem that the problem statement identifies, with justification and reference to the research conducted in the litrature review. This will lay out the development process for the project and will tools and technologies will be explained for the particular use case in this project.
\newline \newline
\subsection{Data gathering} \subsection{Data gathering}
@ -459,7 +465,7 @@
\newline \newline
Historical price data can be collected in a number methods, one being that of the exchange APIs, another through a historical price tracker who creates a CSV consisting of all prior historical data. Both have their merits and reliability for granting the needed data, however, a historical tracker who has been tracking the price every hour since the start of Bitcoin would be the better option. This is due to a couple of factors, the data in some historical trackers are an average unbiased price for Bitcoin - they track the price of all or a select few exchanges and average the hourly price. Whereas if the historical data was obtained directly from an exchange this would be biased and might not represent the true price of the currency, and thus would need averaging with other hourly prices from other exchanges. By using a historical tracker all the data is unbiased and averaged and readily available and doesn't require any requests to an API or coding needed to process data. Historical price data can be collected in a number methods, one being that of the exchange APIs, another through a historical price tracker who creates a CSV consisting of all prior historical data. Both have their merits and reliability for granting the needed data, however, a historical tracker who has been tracking the price every hour since the start of Bitcoin would be the better option. This is due to a couple of factors, the data in some historical trackers are an average unbiased price for Bitcoin - they track the price of all or a select few exchanges and average the hourly price. Whereas if the historical data was obtained directly from an exchange this would be biased and might not represent the true price of the currency, and thus would need averaging with other hourly prices from other exchanges. By using a historical tracker all the data is unbiased and averaged and readily available and doesn't require any requests to an API or coding needed to process data.
Live price data can be collected through the same methods, a historical price tracker and an exchange API. However, this doesn't work the same way, unfortunately, a historical price tracker isn't updated as frequently as exchange APIs thus wouldn't provide on the hour accurate data. Therefore exchange APIs should be utilised in this case and multiple to provide an unbiased average for the hourly price. Live price data can be collected through the same methods, a historical price tracker and an exchange API. However, this doesn't work the same way, unfortunately, a historical price tracker isn't updated as frequently as exchange APIs thus wouldn't provide on the hour accurate data. Therefore exchange APIs should be utilised in this case and multiple to provide an unbiased average for the hourly price. Three exchanges will provide an sufficient average and the exchanges most likely to be used would be the more popular exchanges such as Coinbase, Bitfinex and Gemini
\newline \newline
\textbf{Tweets} \textbf{Tweets}
@ -469,37 +475,197 @@
Live tweets can be collected by two methods from Twitter, from the Twitter API and using Twitter Python package such as Tweepy, detailed in the Literature review. Additionally, the limitations of the Twitter API are also discussed in the review which states how the Twitter API has a tiering system: Standard, Premium and Enterprise. Each tier has different levels of access to the API and can extract a different amount of data from the platform. Thus concluding the section in the Literature review, the Twitter API will not be used for the extraction and streaming of live tweets due to it being restricted to Enterprise users. Therefore, Tweepy will be used to set up a looping authenticated streaming solution with the Twitter API which will allow the access of current recurring data. Live tweets can be collected by two methods from Twitter, from the Twitter API and using Twitter Python package such as Tweepy, detailed in the Literature review. Additionally, the limitations of the Twitter API are also discussed in the review which states how the Twitter API has a tiering system: Standard, Premium and Enterprise. Each tier has different levels of access to the API and can extract a different amount of data from the platform. Thus concluding the section in the Literature review, the Twitter API will not be used for the extraction and streaming of live tweets due to it being restricted to Enterprise users. Therefore, Tweepy will be used to set up a looping authenticated streaming solution with the Twitter API which will allow the access of current recurring data.
\subsection{Data pre-processing}
Natural language pre-processing will be apart of most systems in this project. Techniques such as tokenisation, stemming, stopword removal and character filtering will be prevalent, as these will be used to remove unwanted data and to sanitise the data for classification.
\subsection{Spam Filtering} \subsection{Spam Filtering}
This part of the system will aim to detect whether or not the steamed and/or the historical tweet is spam - unwanted tweets that serve no purpose in determining opinion of the public. These types of tweets can be from advertisement - usually labeled with \textit{\#Airdrop} and can contain \textit{"tickets here" and "Token Sale"}, to job advertisments - usually containing word such as \textit{Firm, hire, hiring, jobs and careers}. It is important to filter out and remove such data from the network as these can be seen as outliers of the true needed data and will skew predictions will invalid sentiment. This part of the system will aim to detect whether or not the steamed and/or the historical tweet is spam - unwanted tweets that serve no purpose in determining opinion of the public. These types of tweets can be from advertisement - usually labeled with \textit{\#Airdrop} and can contain \textit{"tickets here" and "Token Sale"}, to job advertisments - usually containing word such as \textit{Firm, hire, hiring, jobs and careers}. It is important to filter out and remove such data from the network as these can be seen as outliers of the true needed data and will skew predictions will invalid sentiment.
The spam filter should use a probability-based algorithms such as Naive Bayes, other algorithms such as ... could be used, but due to this being a probability related problem using an algorithm such as Naive Bayes would be more suitable. This classifier should be trained on a hand created dataset containing both spam and ham (\textit{wanted data}) tweets, and should not be exclusive to either category.
\subsection{Language Detection} \subsection{Language Detection}
Twitter provides this, but non-basic-latin characters can get through in tweets Pior to performing any kind of natural languge pre-processing and spam filtering non-English tweets will need to be avoided. This can be introduced through various language detection filtering using techniques such as ngrams alongside other natural language pre-processing techniques to filter out non-English characters. Fortunatly both Tweepy and the Twitter API have methods for specifying the desired language to recieve tweets in - \textit{filter=['en']} for the Tweepy streaming method and \textit{query=\{...,language='en',...\}} on the JSON parameters for the Twitter API. This does provide a simply means of filtering out non-English tweets, but this only filters based on region and user settings which indicate the users desired language. Thus if a user has their region set to \textit{'en'} or has their desired language set also as \textit{'en'} the tweet will be classified as English but may contain non-English characters.
As being the case a suitable language detection system will be implemented to identify any tweets that contain non-English character make it past the inital API filters, and will drop the tweets if it contains more non-English characters. If, however, the majority of the text is English but contains some non-English characters, these will be removed from the tweet.
\subsection{Solution Summary}\label{sumary} \subsection{Sentiment Analysis}
As mentioned in the Litrature review, the VADER sentiment analysis performs exceptionally well on the social media domain when compared to idividual human rates and 10 other highly regarded sentiment analysers, stated in the results section of the paper \textit{VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text} \cite{12}. \newline Extraction of results from paper \cite{12}:
\subsection{Requirements}
\subsection{Data flow Overview}\label{data-flow}
\begin{center} \begin{center}
\includegraphics[width=17cm,height=8cm]{images/Generic_Flow.png} \begin{tabular}{l|c|c|c}
\newline \textbf{Analyser} & \textbf{Overall Precision} & \textbf{Overall Recall} & \textbf{Overall F1 Score}\\
\textit{Figure 3: A conceptual design of an LSTM cell bank - from Medium article by Shi Yan: Understanding LSTM and its diagrams}\cite{23} \hline
\multirow{1}{*}{Ind. Humans} & 0.95 & 0.75 & 0.84 \\
\multirow{1}{*}{VADER} & \textbf{0.99} & \textbf{0.94} & \textbf{0.96} \\
\multirow{1}{*}{Hu-Liu04} & 0.94 & 0.66 & 0.77 \\
\multirow{1}{*}{SCN} & 0.81 & 0.75 & 0.75 \\
\multirow{1}{*}{GI} & 0.84 & 0.58 & 0.69 \\
\multirow{1}{*}{SWN} & 0.75 & 0.62 & 0.67 \\
\multirow{1}{*}{LIWC} & 0.94 & 0.48 & 0.63 \\
\multirow{1}{*}{ANEW} & 0.83 & 0.48 & 0.60 \\
\multirow{1}{*}{WSD} & 0.70 & 0.49 & 0.56 \\
\end{tabular}
\textbf{Analysis of Social Media Text (4,200 Tweets)}\cite{12}
\end{center} \end{center}
\subsection{Packages, Tools and Techniques}\label{tools} Due to the suitability for the given domain of social media and with the customisability, due to VADER's lexicon-dictionary based approach, makes this sentiment analyser most suitable for use in this project. This analyser will be utilised as the sentiment analyser of this project due to its feature set and need for little data pre-processing prior to polarity classification of the provided text. \cite{11} \textit{"extract"}.
This will be an itermediate system between the neural network and the data collection pre-processing system, as the later will provide the cleaned processed data for analysis and the former to feed in the classified polarity of each tweet alongside price data for model learning.
\subsection{Neural Network}
The \textit{Neural Network} section in the litrature review details how Recurrent Neural networks work alongside how an Long-short term memory networks build upon and overcome limitations and known issues with a standard RNN network. A recurrent neural network is the focus of this project, this is due to:
\begin{itemize}
\item Nature of an RNN - Allows for backpropagation to find partial derivatives of the error with respect to the weights after an output has occured, to tweak the current weights of the LSTM cell. In short, allows the tweaking of weights of the network based on previous seen data by looping the same node thus influencing decisions made on current data based on old weights and errors from previous.
\item Nature of an LSTM over RNN - LSTMs are extensions of RNNs \cite{22} that were designed to avoid long-term dependancy problems such as exploding and vanishing gradients. Weights are not only just reused but are stored in memory and are propogated through the network.
\item Lack of use for the projects purose - Other papers tend to focus on machine learning techniques, other neural networks such as Multi-layer Perceptron (MPL) and standard Recurrent Neural Networks, with use of time-series data. Especially with use of a standard RNN, not overcomming its common issues with gradient decsent. Stated in related research section of the litrature review, \cite{5} - \textit{"using the MLP classifier (a.k.a neural networks) showed better results than logistical regression and random forest trained models"}
\item Pior use for time-series data and data forecasting - Although RNN LSTM networks have been used for the prediction of Bitcoin price there are a few papers on this \cite{25}. Regardless, LSTMs have been notably used with use for time-series data forecasting due to being able to remember previous data and weights over long sequence spans \cite{22} - \textit{""}, \cite{25} - \textit{""}.
\end{itemize}
Therefore, a recurrent long-short term memory neural network will be used for this for this project to predict the next hour interval of Bitcoin price based on previous historical prices and hourly sentiment. This system will read in historical data, both price and sentiment - depending on the network for prediction with and without sentiment, this data will be merged, split and used to trained and test the network model for use for forecasting prices. The relative sizes for the training and test data can be decided upon system creation but the standard sizing is 75:25 respectivly.
Tensorflow will also be used for the backend implementation and the Kera API use upon it to make development more straight forward. There are other tools that are comparable to TensorFlow that are also supported by Keras, such as \textit{"TABLE COMPARING TOOLS"} ...
\subsection{Price Forecasting}
This part of the system will be responsible for prediction the next time-step of Bitcoin's price for the next hour based on past data. It will use the trained model from the neural network to predict the next hour price when given live hourly data, price and sentiment. I will also have a look back of 5 which will allow it to see historical data to aid in the predictions. This will ocurr on the hour every hour when new data is recieved and processed, this data should also be merged and the split into training and testing data, the sizing can be decided upon system creation but the standard sizing is 75:25, training and testing respectivly.
\subsection{Frontend Application}
The frontend appliction will display the predicted data to the stakeholders and users of the system, along with charting True hourly prices against Predicted, for both with and without sentiment embedded in the predictions. The interface will display this data in both tabular and chart form to provide variety to the user. Performance metrics will also be disayed at the bottom of the appliction to show the accuracy of the model. Due to this project focusing around the backend, how the predictions are made and the accuracy of the model, the interface will be somewhat of a second thought. It will aim to display the information in a clear and concise manner which will start to solve the problem of providing a system to the public to aid in investment decisions, the design will not be complex but more basic and functional. Therefore a basic webpage coded in HTML with Jquery to plot data, and Ajax requests to obtain and load data, will be sufficient.
%\includegraphics[width=18cm,height=8cm]{images/interface_design.png}
\begin{center}
\textit{Figure 3: Initial Interface design}
\end{center}
\subsection{With reference to Initial PID}
Both the problem and solution has changed considerably from the initial project initiation document (PID), which outlines the inital ideas, objectives and specification for the project. The reason for this was due to a change in direction which was caused by a number of factors; one being a change in passion after intial research into machine learning techniques and neural networks, instead of creating an application that just performed sentiment analysis the direction turned towards how this could be used to predict future prices. This change does still loosly keeps in-line with the intial idea of wanting to create a platform that will aid in investor decision making, but takes it a step further by directly giving them predictions on market direction price as a basis for these decisions rather than just idenifying opinion direction of the market.
Another point was simplicity of the inital idea, which consisted of focusing more work on the design of the frontend appliction to display opinion data and general price data on a range of cryptocurrencies which will simply by just consuming exchange APIs. Both the developer and project supervisor came to the conclusion that this inital idea was too simple and a more complex approach needed to be formed.
The intial PID did however give an initial basis to base ideas and inital research from and was the begining drive of this project.
\subsection{Solution Summary}\label{summary}
The overall solution, with reference to the problem statement, is to create a system mainly consisting of; a frontend application that will display plotting, predicted and true, performance metric data to the user as a clear and concise form. A backend system that'll be behind the price forecasting, which will consist of various subsystem responsible for data collection, filtering, data pre-processing, sentiment analysis, network training, validation and training and future price predictions. Each stage will consist of revelent tools and techniques for performing their required task.
%The tools and techniques that will be used for this project are as follows, with relation to the relevent part of the system.
%\begin{itemize}
% \item Data Collector
% \subitem Prices: Historical - None, will be extrapolated from a historical price CSV, Live - Request and Coinbase Client api package
% \subitem Tweets: Historical - Request or Curl from the Twitter API, Live - Tweepy
% \item Natural Language pre-processing - Regex, (Tokenisation, Stemming, Stopword removal), ngrams, language detection
% \item Spam filter - Natural language pre-processing techniques (above), Naive Bayes Classifier
% \item Neural Network - Tensorflow, Keras, Sc
%\end{itemize}
\subsection{Data flow Overview}\label{data-flow}
To get an understanding of how the system will be put together, a dataflow diagram is a useful method for view how systems are integrated and how data could possibly flow through a system.
\includegraphics[width=18cm,height=8cm]{images/Generic_Flow.png}
\begin{center}
\textit{Figure 4: Basic Dataflow diagram of systems in the project and how data could possibly flow}
\end{center}
\newpage \newpage
\begin{center} \begin{center}
\section{System Design and Implementation}\label{implementation} \section{System Design}\label{Design}
\end{center} \end{center}
\subsection{Dataflow Designs}
This section will describe and outline how the system will be formed and will work with each component, a good way of displaying this is as a dataflow diagram. A dataflow is a way of representing the flow of data through a process or system, as a result it also provides information about how inputs and outputs of each component works and how they're connected to other components. It can also give either broad or in-depth overview of the specific workings of each component through how the data is processed and manipulated.
\newline
\textbf{Dataflow overview of entire system:}
\begin{center}
\includegraphics[width=18cm,height=8cm]{images/Dataflow.png}
\textit{Figure 5: Overall Dataflow diagram of the entire system}
\end{center}
This dataflow diagram shows the overall concept of how the data is intended to flow through the system, from being processed and manipulated through each components and what the outputs are of each. Due to the size this will be broken up and individually explained.
\newpage
\textbf{Data collector}
\begin{center}
\includegraphics[width=15cm,height=8cm]{images/Data_Collector.png}
\textit{Figure 6: Data collector Dataflow diagram}
\end{center}
This dataflow diagram shows the part of the system responsible for the collection and processing of both historical data. This is split into three parts: Price collector, Tweet collector and tweet normalisation and natural language pre-processing.
\begin{itemize}
\item Price Collector - Processes two forms of data, Historical and Live price data.
\subitem Historical data is extrapolated from three CSVs that contain the historical price every hour for the past year, from a historical price tracker. At this point in the project it was identified that historical price trackers do not average price data from exchanges as previously identified, therefore this data will need to be merged and averaged to create the unbiased hourly price needed.
\subitem Live data is extracted directly from the three exchanges APIs shown through REST endpoint requests.
\subitem Data from both, as separate processes independant from one another, are averaged by extracting the \textit{High}, \textit{Mid} and \textit{Low} hourly prices. This averaged price per hour for each exchange are then averaged together to obtain an unbiased hourly average. This is then saved to a CSV of historical or live prices respectivly. The difference in the flow of data is that of Live prices, in which the process is looped every hour to extract the new hourly prices.
\item Tweet Collector - Streams tweets from Twitter using Tweepy, historical tweets are manually collected directly from the Twitter API. Both are fed through the normalisation and data pre-processing stage.
\item Data pre-processing - This involves cleaning the intial data by removing line breaks and new lines that occur in the data, removal of special characters that are standard in tweets (\textit{'\#','\@' and urls}). This is then fed into a language detection system which tokenises and compares stopwords in text to NLTK package supported languages. Depending on whether the text is idendified as majoritly English or not determines whether or not the tweet is dropped and not used in the network. If the majority is in English, non-English characters are removed as these can still be present in the text.
\end{itemize}
\textbf{Analysis Engine}
\begin{center}
\includegraphics[width=17cm,height=8cm]{images/Analysis_Engine.png}
\textit{Figure 7: Analysis Engine Dataflow diagram}
\end{center}
This dataflow diagram shows the part of the system that is responsible for training a spam filter, creating the model that'll be used to identify if the tweets from the data collector are unwanted - spam. This system is also responsible for assigning the polarity classifiction to the tweet through sentiment analysis conducted by the VADER package \cite{12}.
\begin{itemize}
\item Spam filter training - The inital step in this system is to train the Naive Bayes Classifier using the pre-labeled spam dataset which contains an unbiased amount of either spam or ham tweets with their respective labels.
\subitem This data is split into two samples, training and test sets 75:25 respectively and the Naive Bayes classifier trained and validated against these datasets after pre-processing of the data occurs on the data to prepare it.
\item Data pre-processing - The tweets from both training and testing the filter and from live and historical tweets are processed through this section.
\subitem This section of the system is primarily used to process the tweets for the filter to classify the data and doesn't directly modify the live and historical tweets. The data is processed through various natural language processing techniques such as; Tokenisation, Ngram generation, stopword removal and stemming.
\item Classifier Modelling and Model creation - Once the data is pre-processed the data is classified and the prediction model created, which later used to classify the historical and live tweets.
\item Sentiment Analysis (VADER) - On a separate route from the spam filter training, using the historical and live tweets, the sentiment analysier VADER performs analysis on the tweets and assigns a polarity classification to each text (\textit{Negative, Neutral, Positive} and calculates the compound score which is the difference between the negative and positive scores \textit{compound}).
\item Storage - The polarity classification and tweets are saved to their respective CSV files for historical and live data.
\end{itemize}
\textbf{Neural Network}
\begin{center}
\includegraphics[width=17cm,height=12cm]{images/Neural_Network.png}
\textit{Figure 8: Neural Network layout Dataflow diagram}
\end{center}
The dataflow diagram in \textit{figure 8} shows the part of the system that is responsible for training and creating the neural network model. The dataflow diagram show how this will be trained and the layers of a possible solution to the network, which shows 4 layers which may not be the solution that will be implemented but are there to show a representation of an amount of layer that could be implemented.
\begin{itemize}
\item Merging of Datasets - Data from both historical datasets are merged to create one dataset with mapped price and sentiment for each hour. *This is a specific process that is different with the system that does not include sentiment for predictions, the merge process doesn't occur in that system/model.
\item Training and Testing - Data is split into two samples of training and testing, 75:25 respectively. **This also doesn't occur in the system that doesn't model with the sentiment.
\item Training network - The training sets, X and Y coords are used to train the network.
\item Testing network - The testing sets, X and Y coords of 25\% of the initial data are used to test the validation and accuracy of predictions as these contain the true data of what the predictions should be.
\item Outputs - Accuracy Statistics, true price data and predicted next hour prices are outputted to respective files for use on the front-end application. The model is then later used for hourly forecasting.
\end{itemize}
\textbf{Future Price Forecasting}
\begin{center}
\includegraphics[width=18cm,height=8cm]{images/Future_Predictions.png}
\textit{Figure 9: Price Forecasting Dataflow diagram}
\end{center}
The dataflow diagram in \textit{figure 9} shows how the forecasting system would be implemented. This dataflow shows how it will read live data of both sentiment and price data, merge, split and conduct regression using the trained neural network model to predict the next hour price.
\begin{itemize}
\item Data merging - (Doesn't occur with the system that doesn't include sentiment in price predictions) Data is merged from both historical and live data up to 5 iterations. This is due to after the initial hour there will only be one record of price and sentiment data, in which not prediction will be made from this as there isn't sufficient amount of data.
\item Prediction - This data is then fitted to the neural network model and predictions for the next time-step hour are made.
\item Hour Loop - This will then proceed to loop every hour to make the hourly predictions. Historical price data will cease to be used when there are 5 or more live price records.
\item Outputs - Accuracy Statistics, true price data and predicted next hour prices are ouptutted to respective files for use on the front-end application for charting.
\end{itemize}
\newpage
\textbf{Front-end Application}
\begin{center}
\includegraphics[width=10cm,height=9cm]{images/Frontend_Application.png}
\newline
\textit{Figure 10: Front-end Application Dataflow diagram}
\end{center}
The above dataflow diagram shows the data flow for the front-end application and how the data is read into the system from the data files generated by the backend application (Neural network).
\begin{itemize}
\item Ajax Requests - These are api file requests for files hosted on the server in which the system is running on. This loads the data files into the application for use.
\item CSS Styling - Contains design styling for page and charts, loaded upon loading of webpage.
\item Charting and Tables - Accesses the loaded data from the Ajax requests and plots the data. Prediction data, only with sentiment and prices are plotted into a table. There will be separate charts and tables displaying the data from the backend that hasn't used sentiment in predictions to aid in establishing a correlation between sentiment and price and whether it affects the hourly price (Aiming to solve problem statement)
\item Stakeholders - There will be the four stakeholder, outline in the problem articulation section, that would be the primary users of this application.
\end{itemize}
\newpage
\subsection{UML Component Design}
\subsection{Interface Design}
\newpage
\begin{center}
\section{Implementation}\label{implementation}
\end{center}
\subsection{Data collection}\label{collection} \subsection{Data collection}\label{collection}
\subsubsection{Price Time-series Data} \subsubsection{Price Time-series Data}
Historical data of Bitcoin prices can be obtained through may means, Historical data of Bitcoin prices can be obtained through may means,
@ -527,6 +693,7 @@
\newpage \newpage
\section{Testing: Verification and Reflection} \section{Testing: Verification and Reflection}
mean bias Error
\newpage \newpage

View File

@ -6,116 +6,132 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Glossary}{3}{section*.3} \contentsline {section}{Glossary}{3}{section*.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Introduction}{6}{section*.5} \contentsline {section}{Introduction}{7}{section*.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Problem Articulation}{8}{section*.6} \contentsline {section}{Problem Articulation}{9}{section*.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Problem Statement}{8}{section*.7} \contentsline {subsection}{Problem Statement}{9}{section*.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Stakeholders}{8}{section*.8} \contentsline {subsection}{Stakeholders}{9}{section*.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Project Motivation}{9}{section*.9} \contentsline {subsection}{Project Motivation}{10}{section*.9}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Technical Specification}{10}{section*.10} \contentsline {subsection}{Technical Specification}{11}{section*.10}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Project Constraints}{11}{section*.11} \contentsline {subsection}{Project Constraints}{12}{section*.11}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Literature Review}{12}{section*.12} \contentsline {section}{Literature Review}{13}{section*.12}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Existing Tools}{12}{section*.13} \contentsline {subsection}{Existing Tools}{13}{section*.13}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Related research}{12}{section*.14} \contentsline {subsection}{Related research}{13}{section*.14}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Data Collection}{13}{section*.15} \contentsline {subsection}{Data Collection}{14}{section*.15}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Twitter and Twitter API}{13}{section*.16} \contentsline {subsubsection}{Twitter and Twitter API}{14}{section*.16}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Tweepy Python Package}{14}{section*.17} \contentsline {subsubsection}{Tweepy Python Package}{15}{section*.17}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Sentiment Analysis}{14}{section*.18} \contentsline {subsection}{Sentiment Analysis}{15}{section*.18}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Natural Language Processing}{14}{section*.19} \contentsline {subsubsection}{Natural Language Processing}{15}{section*.19}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{15}{section*.20} \contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{16}{section*.20}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Neural Networks}{15}{section*.21} \contentsline {subsection}{Neural Networks}{16}{section*.21}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{16}{section*.22} \contentsline {subsubsection}{Recurrent Neural Network (RNN)}{17}{section*.22}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{17}{section*.23} \contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{18}{section*.23}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Keras and TensorFlow}{18}{section*.24} \contentsline {subsubsection}{Keras and TensorFlow}{19}{section*.24}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Optimisers}{19}{section*.25} \contentsline {subsubsection}{Optimisers}{20}{section*.25}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Machine Learning}{20}{section*.26} \contentsline {subsection}{Machine Learning}{21}{section*.26}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Naive Bayes}{20}{section*.27} \contentsline {subsubsection}{Naive Bayes}{21}{section*.27}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Solution Approach}{22}{section*.28} \contentsline {section}{Solution Approach}{23}{section*.28}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Data gathering}{22}{section*.29} \contentsline {subsection}{Data gathering}{23}{section*.29}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Spam Filtering}{23}{section*.30} \contentsline {subsection}{Data pre-processing}{24}{section*.30}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Language Detection}{23}{section*.31} \contentsline {subsection}{Spam Filtering}{24}{section*.31}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Solution Summary}{23}{section*.32} \contentsline {subsection}{Language Detection}{24}{section*.32}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Requirements}{23}{section*.33} \contentsline {subsection}{Sentiment Analysis}{24}{section*.33}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Data flow Overview}{23}{section*.34} \contentsline {subsection}{Neural Network}{25}{section*.34}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Packages, Tools and Techniques}{23}{section*.35} \contentsline {subsection}{Price Forecasting}{26}{section*.35}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{System Design and Implementation}{24}{section*.36} \contentsline {subsection}{Frontend Application}{26}{section*.36}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Data collection}{24}{section*.37} \contentsline {subsection}{With reference to Initial PID}{26}{section*.37}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Price Time-series Data}{24}{section*.38} \contentsline {subsection}{Solution Summary}{27}{section*.38}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Data processing}{24}{section*.39} \contentsline {subsection}{Data flow Overview}{27}{section*.39}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Preprocessing}{24}{section*.40} \contentsline {section}{System Design}{28}{section*.40}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {paragraph}{Tweet Filtering}{24}{section*.41} \contentsline {subsection}{Dataflow Designs}{28}{section*.41}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {paragraph}{Text Cleaning}{24}{section*.42} \contentsline {subsection}{UML Component Design}{34}{section*.42}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {paragraph}{Ngram based Language detection filtering}{24}{section*.43} \contentsline {subsection}{Interface Design}{34}{section*.43}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Spam Filtering}{24}{section*.44} \contentsline {section}{Implementation}{35}{section*.44}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {paragraph}{Tweet Processing}{24}{section*.45} \contentsline {subsection}{Data collection}{35}{section*.45}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {paragraph}{Naive Bayes model}{24}{section*.46} \contentsline {subsubsection}{Price Time-series Data}{35}{section*.46}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Sentiment Analysis}{24}{section*.47} \contentsline {subsection}{Data processing}{35}{section*.47}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{VADER}{24}{section*.48} \contentsline {subsubsection}{Preprocessing}{35}{section*.48}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Recurrent Neural Network - LSTM}{24}{section*.49} \contentsline {paragraph}{Tweet Filtering}{35}{section*.49}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Training and Testing Model}{24}{section*.50} \contentsline {paragraph}{Text Cleaning}{35}{section*.50}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Scoring and Validation}{24}{section*.51} \contentsline {paragraph}{Ngram based Language detection filtering}{35}{section*.51}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Future Prediction Forecasting}{24}{section*.52} \contentsline {subsubsection}{Spam Filtering}{35}{section*.52}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Testing: Verification and Reflection}{25}{section*.53} \contentsline {paragraph}{Tweet Processing}{35}{section*.53}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Discussion: Contribution and Reflection}{26}{section*.54} \contentsline {paragraph}{Naive Bayes model}{35}{section*.54}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Limitations}{26}{section*.55} \contentsline {subsection}{Sentiment Analysis}{35}{section*.55}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Conclusion and Future Improvements}{27}{section*.56} \contentsline {subsubsection}{VADER}{35}{section*.56}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Conclusion}{27}{section*.57} \contentsline {subsection}{Recurrent Neural Network - LSTM}{35}{section*.57}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Future Improvements}{27}{section*.58} \contentsline {subsubsection}{Training and Testing Model}{35}{section*.58}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{Appendices}{31}{section*.60} \contentsline {subsubsection}{Scoring and Validation}{35}{section*.59}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Appendix A - Project Initiation Document}{31}{section*.61} \contentsline {subsubsection}{Future Prediction Forecasting}{35}{section*.60}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{Appendix B - Log book}{44}{section*.62} \contentsline {section}{Testing: Verification and Reflection}{36}{section*.61}
\defcounter {refsection}{0}\relax
\contentsline {section}{Discussion: Contribution and Reflection}{37}{section*.62}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{Limitations}{37}{section*.63}
\defcounter {refsection}{0}\relax
\contentsline {section}{Conclusion and Future Improvements}{38}{section*.64}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{Conclusion}{38}{section*.65}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{Future Improvements}{38}{section*.66}
\defcounter {refsection}{0}\relax
\contentsline {section}{Appendices}{42}{section*.68}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{Appendix A - Project Initiation Document}{42}{section*.69}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{Appendix B - Log book}{55}{section*.70}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 73 KiB

BIN
images/Data Collector.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 83 KiB

After

Width:  |  Height:  |  Size: 116 KiB