diff --git a/document.aux b/document.aux index a7544e0..e5d8216 100644 --- a/document.aux +++ b/document.aux @@ -23,22 +23,23 @@ \@writefile{toc}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } \@writefile{lof}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } \@writefile{lot}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax } -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Abstract}{1}{section*.1}} -\newlabel{abstract}{{}{1}{Abstract}{section*.1}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Acknowledgements}{2}{section*.2}} -\newlabel{acknowledgements}{{}{2}{Acknowledgements}{section*.2}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Glossary}{3}{section*.3}} -\newlabel{glossary}{{}{3}{Glossary}{section*.3}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Abstract}{2}{section*.1}} +\newlabel{abstract}{{}{2}{Abstract}{section*.1}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Acknowledgements}{3}{section*.2}} +\newlabel{acknowledgements}{{}{3}{Acknowledgements}{section*.2}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Glossary}{4}{section*.3}} +\newlabel{glossary}{{}{4}{Glossary}{section*.3}{}} \abx@aux@cite{1} \abx@aux@segm{0}{0}{1} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Introduction}{7}{section*.5}} -\newlabel{introduction}{{}{7}{Introduction}{section*.5}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Problem Articulation}{9}{section*.6}} -\newlabel{problem}{{}{9}{Problem Articulation}{section*.6}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Problem Statement}{9}{section*.7}} -\newlabel{statement}{{}{9}{Problem Statement}{section*.7}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Stakeholders}{9}{section*.8}} -\newlabel{stakeholders}{{}{9}{Stakeholders}{section*.8}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Introduction}{8}{section*.5}} +\newlabel{introduction}{{}{8}{Introduction}{section*.5}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Problem Articulation}{10}{section*.6}} +\newlabel{problem}{{}{10}{Problem Articulation}{section*.6}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Problem Statement}{10}{section*.7}} +\newlabel{statement}{{}{10}{Problem Statement}{section*.7}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Stakeholders}{10}{section*.8}} +\newlabel{stakeholders}{{}{10}{Stakeholders}{section*.8}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Motivation}{11}{section*.9}} \abx@aux@cite{2} \abx@aux@segm{0}{0}{2} \abx@aux@cite{3} @@ -46,40 +47,40 @@ \abx@aux@cite{4} \abx@aux@segm{0}{0}{4} \abx@aux@segm{0}{0}{1} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Motivation}{10}{section*.9}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Technical Specification}{11}{section*.10}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Constraints}{12}{section*.11}} -\newlabel{constraints}{{}{12}{Project Constraints}{section*.11}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Technical Specification}{13}{section*.10}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Project Constraints}{15}{section*.11}} +\newlabel{constraints}{{}{15}{Project Constraints}{section*.11}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Quality Goals}{15}{section*.12}} \abx@aux@segm{0}{0}{3} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Literature Review}{16}{section*.13}} +\newlabel{literature}{{}{16}{Literature Review}{section*.13}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Existing Tools}{16}{section*.14}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Related research}{16}{section*.15}} \abx@aux@segm{0}{0}{1} \abx@aux@cite{5} \abx@aux@segm{0}{0}{5} \abx@aux@cite{6} \abx@aux@segm{0}{0}{6} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Literature Review}{13}{section*.12}} -\newlabel{literature}{{}{13}{Literature Review}{section*.12}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Existing Tools}{13}{section*.13}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Related research}{13}{section*.14}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data Collection}{17}{section*.16}} +\newlabel{tweet_collection}{{}{17}{Data Collection}{section*.16}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Twitter and Twitter API}{17}{section*.17}} \abx@aux@cite{7} \abx@aux@segm{0}{0}{7} \abx@aux@cite{8} \abx@aux@segm{0}{0}{8} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data Collection}{14}{section*.15}} -\newlabel{tweet_collection}{{}{14}{Data Collection}{section*.15}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Twitter and Twitter API}{14}{section*.16}} \abx@aux@cite{9} \abx@aux@segm{0}{0}{9} \abx@aux@segm{0}{0}{7} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Tweepy Python Package}{18}{section*.18}} \abx@aux@segm{0}{0}{1} \abx@aux@segm{0}{0}{3} \abx@aux@segm{0}{0}{6} \abx@aux@cite{10} \abx@aux@segm{0}{0}{10} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Tweepy Python Package}{15}{section*.17}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{15}{section*.18}} -\newlabel{sentiment}{{}{15}{Sentiment Analysis}{section*.18}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Natural Language Processing}{15}{section*.19}} -\newlabel{algorithms}{{}{15}{Natural Language Processing}{section*.19}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{19}{section*.19}} +\newlabel{sentiment}{{}{19}{Sentiment Analysis}{section*.19}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Natural Language Processing}{19}{section*.20}} +\newlabel{algorithms}{{}{19}{Natural Language Processing}{section*.20}{}} \abx@aux@cite{11} \abx@aux@segm{0}{0}{11} \abx@aux@cite{12} @@ -91,25 +92,25 @@ \abx@aux@segm{0}{0}{12} \abx@aux@segm{0}{0}{12} \abx@aux@segm{0}{0}{12} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{20}{section*.21}} +\newlabel{Vader}{{}{20}{Valence Aware Dictionary and sEntiment Reasoning}{section*.21}{}} \abx@aux@cite{14} \abx@aux@segm{0}{0}{14} \abx@aux@cite{15} \abx@aux@segm{0}{0}{15} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{16}{section*.20}} -\newlabel{Vader}{{}{16}{Valence Aware Dictionary and sEntiment Reasoning}{section*.20}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Networks}{16}{section*.21}} -\newlabel{networks}{{}{16}{Neural Networks}{section*.21}{}} \abx@aux@cite{16} \abx@aux@segm{0}{0}{16} \abx@aux@segm{0}{0}{15} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Networks}{21}{section*.22}} +\newlabel{networks}{{}{21}{Neural Networks}{section*.22}{}} \abx@aux@cite{17} \abx@aux@segm{0}{0}{17} \abx@aux@cite{18} \abx@aux@segm{0}{0}{18} \abx@aux@cite{19} \abx@aux@segm{0}{0}{19} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{17}{section*.22}} -\newlabel{types}{{}{17}{Recurrent Neural Network (RNN)}{section*.22}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{22}{section*.23}} +\newlabel{types}{{}{22}{Recurrent Neural Network (RNN)}{section*.23}{}} \abx@aux@cite{20} \abx@aux@segm{0}{0}{20} \abx@aux@cite{21} @@ -119,9 +120,9 @@ \abx@aux@segm{0}{0}{22} \abx@aux@cite{23} \abx@aux@segm{0}{0}{23} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{18}{section*.23}} -\newlabel{lstms}{{}{18}{Long-Short Term Memory (LSTM)}{section*.23}{}} \abx@aux@segm{0}{0}{23} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{23}{section*.24}} +\newlabel{lstms}{{}{23}{Long-Short Term Memory (LSTM)}{section*.24}{}} \abx@aux@segm{0}{0}{22} \abx@aux@segm{0}{0}{23} \abx@aux@cite{24} @@ -132,7 +133,7 @@ \abx@aux@segm{0}{0}{25} \abx@aux@cite{26} \abx@aux@segm{0}{0}{26} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Keras and TensorFlow}{19}{section*.24}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Keras and TensorFlow}{24}{section*.25}} \abx@aux@cite{27} \abx@aux@segm{0}{0}{27} \abx@aux@cite{28} @@ -141,81 +142,97 @@ \abx@aux@segm{0}{0}{28} \abx@aux@cite{29} \abx@aux@segm{0}{0}{29} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Optimisers}{25}{section*.26}} \abx@aux@cite{30} \abx@aux@segm{0}{0}{30} \abx@aux@cite{31} \abx@aux@segm{0}{0}{31} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Optimisers}{20}{section*.25}} \abx@aux@segm{0}{0}{30} \abx@aux@cite{32} \abx@aux@segm{0}{0}{32} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Machine Learning}{21}{section*.26}} -\newlabel{machine}{{}{21}{Machine Learning}{section*.26}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Naive Bayes}{21}{section*.27}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Machine Learning}{27}{section*.27}} +\newlabel{machine}{{}{27}{Machine Learning}{section*.27}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Naive Bayes}{27}{section*.28}} \abx@aux@segm{0}{0}{32} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Random Forest}{28}{section*.29}} \abx@aux@cite{33} \abx@aux@segm{0}{0}{33} \abx@aux@segm{0}{0}{8} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Solution Approach}{23}{section*.28}} -\newlabel{solution}{{}{23}{Solution Approach}{section*.28}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data gathering}{23}{section*.29}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Solution Approach}{29}{section*.30}} +\newlabel{solution}{{}{29}{Solution Approach}{section*.30}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data gathering}{29}{section*.31}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data pre-processing}{30}{section*.32}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Spam Filtering}{30}{section*.33}} \abx@aux@segm{0}{0}{12} \abx@aux@segm{0}{0}{12} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data pre-processing}{24}{section*.30}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Spam Filtering}{24}{section*.31}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Language Detection}{24}{section*.32}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Language Detection}{31}{section*.34}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{31}{section*.35}} \abx@aux@segm{0}{0}{12} \abx@aux@segm{0}{0}{11} \abx@aux@segm{0}{0}{22} \abx@aux@segm{0}{0}{5} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Network}{32}{section*.36}} \abx@aux@segm{0}{0}{25} -\abx@aux@segm{0}{0}{22} \abx@aux@segm{0}{0}{25} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{25}{section*.33}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Neural Network}{25}{section*.34}} \abx@aux@cite{34} \abx@aux@segm{0}{0}{34} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Price Forecasting}{26}{section*.36}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Frontend Application}{26}{section*.37}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{With reference to Initial PID}{27}{section*.38}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Solution Summary}{27}{section*.39}} -\newlabel{summary}{{}{27}{Solution Summary}{section*.39}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data flow Overview}{27}{section*.40}} -\newlabel{data-flow}{{}{27}{Data flow Overview}{section*.40}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{System Design}{29}{section*.41}} -\newlabel{Design}{{}{29}{System Design}{section*.41}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Dataflow Designs}{29}{section*.42}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Price Forecasting}{34}{section*.38}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Frontend Application}{34}{section*.39}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{With reference to Initial PID}{34}{section*.40}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Solution Summary}{35}{section*.41}} +\newlabel{summary}{{}{35}{Solution Summary}{section*.41}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data flow Overview}{36}{section*.42}} +\newlabel{data-flow}{{}{36}{Data flow Overview}{section*.42}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{System Design}{37}{section*.43}} +\newlabel{Design}{{}{37}{System Design}{section*.43}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Dataflow Designs}{37}{section*.44}} \abx@aux@segm{0}{0}{12} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{UML Component Design}{35}{section*.43}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Interface Design}{35}{section*.44}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Implementation}{36}{section*.46}} -\newlabel{implementation}{{}{36}{Implementation}{section*.46}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data collection}{36}{section*.47}} -\newlabel{collection}{{}{36}{Data collection}{section*.47}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Price Time-series Data}{36}{section*.48}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data processing}{36}{section*.49}} -\newlabel{processing}{{}{36}{Data processing}{section*.49}{}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Preprocessing}{36}{section*.50}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Filtering}{36}{section*.51}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Text Cleaning}{36}{section*.52}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Ngram based Language detection filtering}{36}{section*.53}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Spam Filtering}{36}{section*.54}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Tweet Processing}{36}{section*.55}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Naive Bayes model}{36}{section*.56}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{36}{section*.57}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{VADER}{36}{section*.58}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Recurrent Neural Network - LSTM}{36}{section*.59}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Training and Testing Model}{36}{section*.60}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Scoring and Validation}{36}{section*.61}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Future Prediction Forecasting}{36}{section*.62}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Testing: Verification and Reflection}{37}{section*.63}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Discussion: Contribution and Reflection}{38}{section*.64}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Limitations}{38}{section*.65}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Conclusion and Future Improvements}{39}{section*.66}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Conclusion}{39}{section*.67}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Future Improvements}{39}{section*.68}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Appendices}{43}{section*.70}} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix A - Project Initiation Document}{43}{section*.71}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Interface Design}{44}{section*.45}} +\abx@aux@cite{35} +\abx@aux@segm{0}{0}{35} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Implementation}{45}{section*.47}} +\newlabel{implementation}{{}{45}{Implementation}{section*.47}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data collection}{45}{section*.48}} +\newlabel{collection}{{}{45}{Data collection}{section*.48}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Price Time-Series Historical Data}{45}{section*.49}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Historical price collection and averaging per exchange}{45}{lstlisting.1}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Price Time-Series Live Data}{46}{section*.50}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Extraction of Price from exchanges}{46}{lstlisting.2}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Creation of the unbiased hourly price}{47}{lstlisting.3}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Historical Tweet Collection}{48}{section*.51}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}Sample Curl request - data saved to json and python scripted called to process data}{48}{lstlisting.4}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Sift-text python script - used alongside Curl command in Listing 4}{48}{lstlisting.5}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Live Tweet Collection}{50}{section*.52}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Spam filter initialisation and training functions}{50}{lstlisting.6}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}Tweepy Streamer setup}{51}{lstlisting.7}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}Tweepy Stream: 'on\_data' method}{52}{lstlisting.8}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Data processing}{55}{section*.53}} +\newlabel{processing}{{}{55}{Data processing}{section*.53}{}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Preprocessing}{55}{section*.54}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}Basic data filtering and processing function - defined in 'tweet\_collector.py'}{55}{lstlisting.9}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Text Cleaning}{55}{section*.56}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Ngram based Language detection filtering}{55}{section*.57}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Spam Filtering}{55}{section*.58}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {10}Spam filter training Class}{55}{lstlisting.10}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {paragraph}{Naive Bayes model}{57}{section*.59}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Sentiment Analysis}{57}{section*.60}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{VADER}{57}{section*.61}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Recurrent Neural Network - LSTM}{57}{section*.62}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Training and Testing Model}{57}{section*.63}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{Scoring and Validation}{57}{section*.64}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Future Prediction Forecasting}{58}{section*.65}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Main File 'Main.py'}{58}{section*.66}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Miscellaneous}{58}{section*.67}} +\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {11}keys class - loads API keys for access}{58}{lstlisting.11}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Testing Metrics and Accuracy}{59}{section*.68}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Project Evaluation}{60}{section*.69}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Discussion: Contribution and Reflection}{60}{section*.70}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Limitations}{60}{section*.71}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Conclusion and Future Improvements}{61}{section*.72}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Conclusion}{61}{section*.73}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Future Improvements}{61}{section*.74}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{Appendices}{66}{section*.76}} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix A - Project Initiation Document}{66}{section*.77}} \abx@aux@refcontextdefaultsdone \abx@aux@defaultrefcontext{0}{1}{none/global//global/global} \abx@aux@defaultrefcontext{0}{2}{none/global//global/global} @@ -251,4 +268,5 @@ \abx@aux@defaultrefcontext{0}{32}{none/global//global/global} \abx@aux@defaultrefcontext{0}{33}{none/global//global/global} \abx@aux@defaultrefcontext{0}{34}{none/global//global/global} -\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix B - Log book}{56}{section*.72}} +\abx@aux@defaultrefcontext{0}{35}{none/global//global/global} +\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{Appendix B - Log book}{79}{section*.78}} diff --git a/document.bbl b/document.bbl index fdde499..e02b972 100644 --- a/document.bbl +++ b/document.bbl @@ -1156,6 +1156,31 @@ \verb https://www.analyticsindiamag.com/tensorflow-vs-theano-researchers-prefer-artificial-intelligence-framework \endverb \endentry + \entry{35}{inproceedings}{} + \name{author}{1}{}{% + {{hash=9fb73450a6ae06fd7652db34b9a3c981}{% + family={bitcoincharts}, + familyi={b\bibinitperiod}}}% + } + \list{organization}{1}{% + {Bitcoin Charts}% + } + \strng{namehash}{9fb73450a6ae06fd7652db34b9a3c981} + \strng{fullhash}{9fb73450a6ae06fd7652db34b9a3c981} + \strng{bibnamehash}{9fb73450a6ae06fd7652db34b9a3c981} + \strng{authorbibnamehash}{9fb73450a6ae06fd7652db34b9a3c981} + \strng{authornamehash}{9fb73450a6ae06fd7652db34b9a3c981} + \strng{authorfullhash}{9fb73450a6ae06fd7652db34b9a3c981} + \field{sortinit}{6} + \field{sortinithash}{57e57fb8451e7fcfa45d1e069f6d3136} + \field{labelnamesource}{author} + \verb{urlraw} + \verb http://api.bitcoincharts.com/v1/csv/ + \endverb + \verb{url} + \verb http://api.bitcoincharts.com/v1/csv/ + \endverb + \endentry \enddatalist \endrefsection \endinput diff --git a/document.bcf b/document.bcf index 5df50bb..cc2a9da 100644 --- a/document.bcf +++ b/document.bcf @@ -1988,10 +1988,10 @@ 22 5 25 - 22 - 25 - 34 - 12 + 25 + 34 + 12 + 35 * diff --git a/document.blg b/document.blg index 815288a..42671e0 100644 --- a/document.blg +++ b/document.blg @@ -1,20 +1,20 @@ [0] Config.pm:302> INFO - This is Biber 2.9 [0] Config.pm:305> INFO - Logfile is 'document.blg' -[18] biber:313> INFO - === Thu Apr 25, 2019, 00:03:40 -[33] Biber.pm:371> INFO - Reading 'document.bcf' -[79] Biber.pm:854> INFO - Using all citekeys in bib section 0 -[88] Biber.pm:3981> INFO - Processing section 0 -[96] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0 -[97] bibtex.pm:1468> INFO - LaTeX decoding ... -[125] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib' -[133] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly. -[206] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name -[238] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing) -[238] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing) -[257] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable' -[257] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized' -[257] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US' -[257] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US' -[275] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii' -[287] bbl.pm:720> INFO - Output to document.bbl -[288] Biber.pm:110> INFO - WARNINGS: 4 +[22] biber:313> INFO - === Thu Apr 25, 2019, 20:17:38 +[39] Biber.pm:371> INFO - Reading 'document.bcf' +[92] Biber.pm:854> INFO - Using all citekeys in bib section 0 +[103] Biber.pm:3981> INFO - Processing section 0 +[112] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0 +[114] bibtex.pm:1468> INFO - LaTeX decoding ... +[147] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib' +[196] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly. +[203] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name +[266] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing) +[266] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing) +[286] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized' +[286] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable' +[286] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US' +[286] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US' +[306] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii' +[318] bbl.pm:720> INFO - Output to document.bbl +[319] Biber.pm:110> INFO - WARNINGS: 4 diff --git a/document.log b/document.log index 4607693..5a878cc 100644 --- a/document.log +++ b/document.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 25 APR 2019 00:06 +This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 25 APR 2019 20:17 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -8,8 +8,8 @@ LaTeX2e <2017-04-15> Babel <3.18> and hyphenation patterns for 84 language(s) loaded. (/usr/share/texlive/texmf-dist/tex/latex/base/article.cls Document Class: article 2014/09/29 v1.4h Standard LaTeX document class -(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo -File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option) +(/usr/share/texlive/texmf-dist/tex/latex/base/size12.clo +File: size12.clo 2014/09/29 v1.4h Standard LaTeX file (size option) ) \c@part=\count79 \c@section=\count80 @@ -454,6 +454,31 @@ Package: titling 2009/09/04 v2.1d maketitle typesetting \thanksmargin=\skip72 \droptitle=\skip73 ) +(/usr/share/texlive/texmf-dist/tex/latex/listings/listings.sty +\lst@mode=\count141 +\lst@gtempboxa=\box37 +\lst@token=\toks39 +\lst@length=\count142 +\lst@currlwidth=\dimen153 +\lst@column=\count143 +\lst@pos=\count144 +\lst@lostspace=\dimen154 +\lst@width=\dimen155 +\lst@newlines=\count145 +\lst@lineno=\count146 +\lst@maxwidth=\dimen156 + +(/usr/share/texlive/texmf-dist/tex/latex/listings/lstmisc.sty +File: lstmisc.sty 2015/06/04 1.6 (Carsten Heinz) +\c@lstnumber=\count147 +\lst@skipnumbers=\count148 +\lst@framebox=\box38 +) +(/usr/share/texlive/texmf-dist/tex/latex/listings/listings.cfg +File: listings.cfg 2015/06/04 1.6 listings configuration +)) +Package: listings 2015/06/04 1.6 (Carsten Heinz) + (/usr/share/texlive/texmf-dist/tex/latex/biblatex/biblatex.sty Package: biblatex 2017/12/19 v3.10 programmable bibliographies (PK/JW/AB) @@ -485,7 +510,7 @@ Package etexcmds Info: Could not find \expanded. ))) (/usr/share/texlive/texmf-dist/tex/latex/logreq/logreq.sty Package: logreq 2010/08/04 v1.0 xml request logger -\lrq@indent=\count141 +\lrq@indent=\count149 (/usr/share/texlive/texmf-dist/tex/latex/logreq/logreq.def File: logreq.def 2010/08/04 v1.0 logreq spec v1.0 @@ -497,49 +522,49 @@ Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. (/usr/share/texlive/texmf-dist/tex/generic/xstring/xstring.sty (/usr/share/texlive/texmf-dist/tex/generic/xstring/xstring.tex \@xs@message=\write3 -\integerpart=\count142 -\decimalpart=\count143 +\integerpart=\count150 +\decimalpart=\count151 ) Package: xstring 2013/10/13 v1.7c String manipulations (C Tellechea) ) -\c@tabx@nest=\count144 -\c@listtotal=\count145 -\c@listcount=\count146 -\c@liststart=\count147 -\c@liststop=\count148 -\c@citecount=\count149 -\c@citetotal=\count150 -\c@multicitecount=\count151 -\c@multicitetotal=\count152 -\c@instcount=\count153 -\c@maxnames=\count154 -\c@minnames=\count155 -\c@maxitems=\count156 -\c@minitems=\count157 -\c@citecounter=\count158 -\c@savedcitecounter=\count159 -\c@uniquelist=\count160 -\c@uniquename=\count161 -\c@refsection=\count162 -\c@refsegment=\count163 -\c@maxextratitle=\count164 -\c@maxextratitleyear=\count165 -\c@maxextradate=\count166 -\c@maxextraalpha=\count167 -\c@abbrvpenalty=\count168 -\c@highnamepenalty=\count169 -\c@lownamepenalty=\count170 -\c@maxparens=\count171 -\c@parenlevel=\count172 -\blx@tempcnta=\count173 -\blx@tempcntb=\count174 -\blx@tempcntc=\count175 -\blx@maxsection=\count176 -\blx@maxsegment@0=\count177 -\blx@notetype=\count178 -\blx@parenlevel@text=\count179 -\blx@parenlevel@foot=\count180 -\blx@sectionciteorder@0=\count181 +\c@tabx@nest=\count152 +\c@listtotal=\count153 +\c@listcount=\count154 +\c@liststart=\count155 +\c@liststop=\count156 +\c@citecount=\count157 +\c@citetotal=\count158 +\c@multicitecount=\count159 +\c@multicitetotal=\count160 +\c@instcount=\count161 +\c@maxnames=\count162 +\c@minnames=\count163 +\c@maxitems=\count164 +\c@minitems=\count165 +\c@citecounter=\count166 +\c@savedcitecounter=\count167 +\c@uniquelist=\count168 +\c@uniquename=\count169 +\c@refsection=\count170 +\c@refsegment=\count171 +\c@maxextratitle=\count172 +\c@maxextratitleyear=\count173 +\c@maxextradate=\count174 +\c@maxextraalpha=\count175 +\c@abbrvpenalty=\count176 +\c@highnamepenalty=\count177 +\c@lownamepenalty=\count178 +\c@maxparens=\count179 +\c@parenlevel=\count180 +\blx@tempcnta=\count181 +\blx@tempcntb=\count182 +\blx@tempcntc=\count183 +\blx@maxsection=\count184 +\blx@maxsegment@0=\count185 +\blx@notetype=\count186 +\blx@parenlevel@text=\count187 +\blx@parenlevel@foot=\count188 +\blx@sectionciteorder@0=\count189 \labelnumberwidth=\skip74 \labelalphawidth=\skip75 \biblabelsep=\skip76 @@ -550,9 +575,9 @@ Package: xstring 2013/10/13 v1.7c String manipulations (C Tellechea) \bibhang=\skip81 \blx@bcfin=\read1 \blx@bcfout=\write4 -\c@mincomprange=\count182 -\c@maxcomprange=\count183 -\c@mincompwidth=\count184 +\c@mincomprange=\count190 +\c@maxcomprange=\count191 +\c@mincompwidth=\count192 Package biblatex Info: Trying to load biblatex default data model... Package biblatex Info: ... file 'blx-dm.def' found. @@ -561,74 +586,74 @@ Package biblatex Info: Trying to load biblatex style data model... Package biblatex Info: ... file 'ieee.dbx' not found. Package biblatex Info: Trying to load biblatex custom data model... Package biblatex Info: ... file 'biblatex-dm.cfg' not found. -\c@afterword=\count185 -\c@savedafterword=\count186 -\c@annotator=\count187 -\c@savedannotator=\count188 -\c@author=\count189 -\c@savedauthor=\count190 -\c@bookauthor=\count191 -\c@savedbookauthor=\count192 -\c@commentator=\count193 -\c@savedcommentator=\count194 -\c@editor=\count195 -\c@savededitor=\count196 -\c@editora=\count197 -\c@savededitora=\count198 -\c@editorb=\count199 -\c@savededitorb=\count266 -\c@editorc=\count267 -\c@savededitorc=\count268 -\c@foreword=\count269 -\c@savedforeword=\count270 -\c@holder=\count271 -\c@savedholder=\count272 -\c@introduction=\count273 -\c@savedintroduction=\count274 -\c@namea=\count275 -\c@savednamea=\count276 -\c@nameb=\count277 -\c@savednameb=\count278 -\c@namec=\count279 -\c@savednamec=\count280 -\c@translator=\count281 -\c@savedtranslator=\count282 -\c@shortauthor=\count283 -\c@savedshortauthor=\count284 -\c@shorteditor=\count285 -\c@savedshorteditor=\count286 -\c@labelname=\count287 -\c@savedlabelname=\count288 -\c@institution=\count289 -\c@savedinstitution=\count290 -\c@lista=\count291 -\c@savedlista=\count292 -\c@listb=\count293 -\c@savedlistb=\count294 -\c@listc=\count295 -\c@savedlistc=\count296 -\c@listd=\count297 -\c@savedlistd=\count298 -\c@liste=\count299 -\c@savedliste=\count300 -\c@listf=\count301 -\c@savedlistf=\count302 -\c@location=\count303 -\c@savedlocation=\count304 -\c@organization=\count305 -\c@savedorganization=\count306 -\c@origlocation=\count307 -\c@savedoriglocation=\count308 -\c@origpublisher=\count309 -\c@savedorigpublisher=\count310 -\c@publisher=\count311 -\c@savedpublisher=\count312 -\c@language=\count313 -\c@savedlanguage=\count314 -\c@origlanguage=\count315 -\c@savedoriglanguage=\count316 -\c@pageref=\count317 -\c@savedpageref=\count318 +\c@afterword=\count193 +\c@savedafterword=\count194 +\c@annotator=\count195 +\c@savedannotator=\count196 +\c@author=\count197 +\c@savedauthor=\count198 +\c@bookauthor=\count199 +\c@savedbookauthor=\count266 +\c@commentator=\count267 +\c@savedcommentator=\count268 +\c@editor=\count269 +\c@savededitor=\count270 +\c@editora=\count271 +\c@savededitora=\count272 +\c@editorb=\count273 +\c@savededitorb=\count274 +\c@editorc=\count275 +\c@savededitorc=\count276 +\c@foreword=\count277 +\c@savedforeword=\count278 +\c@holder=\count279 +\c@savedholder=\count280 +\c@introduction=\count281 +\c@savedintroduction=\count282 +\c@namea=\count283 +\c@savednamea=\count284 +\c@nameb=\count285 +\c@savednameb=\count286 +\c@namec=\count287 +\c@savednamec=\count288 +\c@translator=\count289 +\c@savedtranslator=\count290 +\c@shortauthor=\count291 +\c@savedshortauthor=\count292 +\c@shorteditor=\count293 +\c@savedshorteditor=\count294 +\c@labelname=\count295 +\c@savedlabelname=\count296 +\c@institution=\count297 +\c@savedinstitution=\count298 +\c@lista=\count299 +\c@savedlista=\count300 +\c@listb=\count301 +\c@savedlistb=\count302 +\c@listc=\count303 +\c@savedlistc=\count304 +\c@listd=\count305 +\c@savedlistd=\count306 +\c@liste=\count307 +\c@savedliste=\count308 +\c@listf=\count309 +\c@savedlistf=\count310 +\c@location=\count311 +\c@savedlocation=\count312 +\c@organization=\count313 +\c@savedorganization=\count314 +\c@origlocation=\count315 +\c@savedoriglocation=\count316 +\c@origpublisher=\count317 +\c@savedorigpublisher=\count318 +\c@publisher=\count319 +\c@savedpublisher=\count320 +\c@language=\count321 +\c@savedlanguage=\count322 +\c@origlanguage=\count323 +\c@savedoriglanguage=\count324 +\c@pageref=\count325 +\c@savedpageref=\count326 \shorthandwidth=\skip82 \shortjournalwidth=\skip83 \shortserieswidth=\skip84 @@ -646,13 +671,13 @@ Package biblatex Info: ... file 'biblatex.def' found. (/usr/share/texlive/texmf-dist/tex/latex/biblatex/biblatex.def File: biblatex.def 2017/12/19 v3.10 biblatex compatibility (PK/JW/AB) -\c@textcitecount=\count319 -\c@textcitetotal=\count320 -\c@textcitemaxnames=\count321 -\c@biburlnumpenalty=\count322 -\c@biburlucpenalty=\count323 -\c@biburllcpenalty=\count324 -\c@smartand=\count325 +\c@textcitecount=\count327 +\c@textcitetotal=\count328 +\c@textcitemaxnames=\count329 +\c@biburlnumpenalty=\count330 +\c@biburlucpenalty=\count331 +\c@biburllcpenalty=\count332 +\c@smartand=\count333 ) Package biblatex Info: Trying to load bibliography style 'ieee'... Package biblatex Info: ... file 'ieee.bbx' found. @@ -674,8 +699,8 @@ Package biblatex Info: ... file 'standard.bbx' found. (/usr/share/texlive/texmf-dist/tex/latex/biblatex/bbx/standard.bbx File: standard.bbx 2017/12/19 v3.10 biblatex bibliography style (PK/JW/AB) -\c@bbx:relatedcount=\count326 -\c@bbx:relatedtotal=\count327 +\c@bbx:relatedcount=\count334 +\c@bbx:relatedtotal=\count335 )))) Package biblatex Info: Trying to load citation style 'ieee'... Package biblatex Info: ... file 'ieee.cbx' found. @@ -687,8 +712,8 @@ Package biblatex Info: ... file 'numeric-comp.cbx' found. (/usr/share/texlive/texmf-dist/tex/latex/biblatex/cbx/numeric-comp.cbx File: numeric-comp.cbx 2017/12/19 v3.10 biblatex citation style (PK/JW/AB) -\c@cbx@tempcnta=\count328 -\c@cbx@tempcntb=\count329 +\c@cbx@tempcnta=\count336 +\c@cbx@tempcntb=\count337 Package biblatex Info: Redefining '\cite'. Package biblatex Info: Redefining '\parencite'. Package biblatex Info: Redefining '\footcite'. @@ -748,14 +773,14 @@ Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/auxhook.sty Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO) ) -\@linkdim=\dimen153 -\Hy@linkcounter=\count330 -\Hy@pagecounter=\count331 +\@linkdim=\dimen157 +\Hy@linkcounter=\count338 +\Hy@pagecounter=\count339 (/usr/share/texlive/texmf-dist/tex/latex/hyperref/pd1enc.def File: pd1enc.def 2018/02/06 v6.86b Hyperref: PDFDocEncoding definition (HO) ) -\Hy@SavedSpaceFactor=\count332 +\Hy@SavedSpaceFactor=\count340 (/usr/share/texlive/texmf-dist/tex/latex/latexconfig/hyperref.cfg File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive @@ -772,12 +797,12 @@ Package hyperref Info: Plain pages OFF on input line 4524. Package hyperref Info: Backreferencing OFF on input line 4529. Package hyperref Info: Implicit mode ON; LaTeX internals redefined. Package hyperref Info: Bookmarks ON on input line 4762. -\c@Hy@tempcnt=\count333 +\c@Hy@tempcnt=\count341 LaTeX Info: Redefining \url on input line 5115. -\XeTeXLinkMargin=\dimen154 -\Fld@menulength=\count334 -\Field@Width=\dimen155 -\Fld@charsize=\dimen156 +\XeTeXLinkMargin=\dimen158 +\Fld@menulength=\count342 +\Field@Width=\dimen159 +\Fld@charsize=\dimen160 Package hyperref Info: Hyper figures OFF on input line 6369. Package hyperref Info: Link nesting OFF on input line 6374. Package hyperref Info: Hyper index ON on input line 6377. @@ -787,16 +812,16 @@ Package hyperref Info: Link coloring with OCG OFF on input line 6394. Package hyperref Info: PDF/A mode OFF on input line 6399. LaTeX Info: Redefining \ref on input line 6439. LaTeX Info: Redefining \pageref on input line 6443. -\Hy@abspage=\count335 -\c@Item=\count336 -\c@Hfootnote=\count337 +\Hy@abspage=\count343 +\c@Item=\count344 +\c@Hfootnote=\count345 ) Package hyperref Info: Driver (autodetected): hpdftex. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/hpdftex.def File: hpdftex.def 2018/02/06 v6.86b Hyperref driver for pdfTeX -\Fld@listcount=\count338 -\c@bookmark@seq@number=\count339 +\Fld@listcount=\count346 +\c@bookmark@seq@number=\count347 (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO) @@ -811,42 +836,42 @@ Package biblatex Info: ... file 'english.lbx' found. (/usr/share/texlive/texmf-dist/tex/latex/biblatex/lbx/english.lbx File: english.lbx 2017/12/19 v3.10 biblatex localization (PK/JW/AB) ) -\@quotelevel=\count340 -\@quotereset=\count341 +\@quotelevel=\count348 +\@quotereset=\count349 (./document.aux) \openout1 = `document.aux'. -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. -LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 63. -LaTeX Font Info: ... okay on input line 63. +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. +LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 91. +LaTeX Font Info: ... okay on input line 91. (/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] -\scratchcounter=\count342 -\scratchdimen=\dimen157 -\scratchbox=\box37 -\nofMPsegments=\count343 -\nofMParguments=\count344 -\everyMPshowfont=\toks39 -\MPscratchCnt=\count345 -\MPscratchDim=\dimen158 -\MPnumerator=\count346 -\makeMPintoPDFobject=\count347 -\everyMPtoPDFconversion=\toks40 +\scratchcounter=\count350 +\scratchdimen=\dimen161 +\scratchbox=\box39 +\nofMPsegments=\count351 +\nofMParguments=\count352 +\everyMPshowfont=\toks40 +\MPscratchCnt=\count353 +\MPscratchDim=\dimen162 +\MPnumerator=\count354 +\makeMPintoPDFobject=\count355 +\everyMPtoPDFconversion=\toks41 ) (/usr/share/texlive/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf @@ -867,6 +892,7 @@ e Package caption Info: Begin \AtBeginDocument code. Package caption Info: float package is loaded. Package caption Info: hyperref package is loaded. +Package caption Info: listings package is loaded. Package caption Info: longtable package is loaded. (/usr/share/texlive/texmf-dist/tex/latex/caption/ltcaption.sty @@ -874,7 +900,7 @@ Package: ltcaption 2013/06/09 v1.4-94 longtable captions (AR) ) Package caption Info: supertabular package is loaded. Package caption Info: End \AtBeginDocument code. -\AtBeginShipoutBox=\box38 +\AtBeginShipoutBox=\box40 *geometry* driver: auto-detecting *geometry* detected driver: pdftex @@ -895,12 +921,12 @@ Package caption Info: End \AtBeginDocument code. * \topmargin=-22.54622pt * \headheight=12.0pt * \headsep=25.0pt -* \topskip=10.0pt +* \topskip=12.0pt * \footskip=30.0pt -* \marginparwidth=65.0pt -* \marginparsep=11.0pt +* \marginparwidth=44.0pt +* \marginparsep=10.0pt * \columnsep=10.0pt -* \skip\footins=9.0pt plus 4.0pt minus 2.0pt +* \skip\footins=10.8pt plus 4.0pt minus 2.0pt * \hoffset=0.0pt * \voffset=0.0pt * \mag=1000 @@ -910,6 +936,7 @@ Package caption Info: End \AtBeginDocument code. * \@reversemarginfalse * (1in=72.27pt=25.4mm, 1cm=28.453pt) +\c@lstlisting=\count356 Package biblatex Info: No input encoding detected. (biblatex) Assuming 'ascii'. Package biblatex Info: Automatic encoding selection. @@ -926,248 +953,323 @@ Package biblatex Warning: Biber reported the following issues A." has too many commas: skipping name. ) -Package biblatex Info: Reference section=0 on input line 63. -Package biblatex Info: Reference segment=0 on input line 63. -Package hyperref Info: Link coloring OFF on input line 63. +Package biblatex Info: Reference section=0 on input line 91. +Package biblatex Info: Reference segment=0 on input line 91. +Package hyperref Info: Link coloring OFF on input line 91. (/usr/share/texlive/texmf-dist/tex/latex/hyperref/nameref.sty Package: nameref 2016/05/21 v2.44 Cross-referencing by name of section (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/gettitlestring.sty Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO) ) -\c@section@level=\count348 +\c@section@level=\count357 ) -LaTeX Info: Redefining \ref on input line 63. -LaTeX Info: Redefining \pageref on input line 63. -LaTeX Info: Redefining \nameref on input line 63. +LaTeX Info: Redefining \ref on input line 91. +LaTeX Info: Redefining \pageref on input line 91. +LaTeX Info: Redefining \nameref on input line 91. (./document.out) (./document.out) \@outlinefile=\write5 \openout5 = `document.out'. - + File: images/reading_logo.png Graphic file (type png) -Package pdftex.def Info: images/reading_logo.png used on input line 66. +Package pdftex.def Info: images/reading_logo.png used on input line 94. (pdftex.def) Requested size: 170.71916pt x 56.9057pt. -LaTeX Font Info: Try loading font information for U+msa on input line 66. +LaTeX Font Info: Try loading font information for U+msa on input line 94. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) -LaTeX Font Info: Try loading font information for U+msb on input line 66. +LaTeX Font Info: Try loading font information for U+msb on input line 94. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B ) [1 {/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} <./images/reading_logo.png> -] [2] [3] -(./document.toc [4] [5]) +] [2] [3] [4] +(./document.toc [5] [6]) \tf@toc=\write6 \openout6 = `document.toc'. - [6] [7] [8] -LaTeX Font Info: Try loading font information for OMS+cmr on input line 162. + [7] [8] [9] +LaTeX Font Info: Try loading font information for OMS+cmr on input line 190. (/usr/share/texlive/texmf-dist/tex/latex/base/omscmr.fd File: omscmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions ) -LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 162. - [9] [10] -Underfull \hbox (badness 10000) in paragraph at lines 178--180 +LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <12> not available +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 190. + [10] [11] [12] +Underfull \hbox (badness 10000) in paragraph at lines 206--208 [] -Missing character: There is no â in font cmr10! -Missing character: There is no € in font cmr10! -Missing character: There is no ™ in font cmr10! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! -Underfull \hbox (badness 10000) in paragraph at lines 187--189 +Underfull \hbox (badness 10000) in paragraph at lines 215--217 [] -Underfull \hbox (badness 10000) in paragraph at lines 197--199 +Underfull \hbox (badness 10000) in paragraph at lines 225--227 + + [] + +[13] +Underfull \hbox (badness 10000) in paragraph at lines 235--237 [] -Underfull \hbox (badness 10000) in paragraph at lines 207--209 +Underfull \hbox (badness 10000) in paragraph at lines 244--246 [] -[11] -Underfull \hbox (badness 10000) in paragraph at lines 216--218 - - [] - -[12] [13] [14] [15] - +[14] [15] [16] [17] [18] [19] +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ˜ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ˜ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ˜ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ˜ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ˜ in font cmr12! +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! + [20] + File: images/perceptron.png Graphic file (type png) -Package pdftex.def Info: images/perceptron.png used on input line 308. +Package pdftex.def Info: images/perceptron.png used on input line 338. (pdftex.def) Requested size: 284.52713pt x 170.72142pt. - [16] - + [21 <./images/perceptron.png>] + File: images/rnn_ffn.png Graphic file (type png) -Package pdftex.def Info: images/rnn_ffn.png used on input line 328. +Package pdftex.def Info: images/rnn_ffn.png used on input line 358. (pdftex.def) Requested size: 426.80307pt x 170.72112pt. - [17 <./images/perceptron.png>] - + [22 <./images/rnn_ffn.png>] + File: images/lstm.png Graphic file (type png) -Package pdftex.def Info: images/lstm.png used on input line 346. +Package pdftex.def Info: images/lstm.png used on input line 376. (pdftex.def) Requested size: 256.07123pt x 199.1616pt. - [18 <./images/rnn_ffn.png>] [19 <./images/lstm.png>] -Missing character: There is no â in font cmr10! -Missing character: There is no € in font cmr10! -Missing character: There is no ™ in font cmr10! -Missing character: There is no â in font cmr10! -Missing character: There is no € in font cmr10! -Missing character: There is no ™ in font cmr10! - [20] [21] [22] -Underfull \hbox (badness 10000) in paragraph at lines 456--458 + [23 <./images/lstm.png>] [24] +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! + [25] +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! + [26] [27] [28] +Underfull \hbox (badness 10000) in paragraph at lines 488--490 [] -Underfull \hbox (badness 10000) in paragraph at lines 460--462 +Underfull \hbox (badness 10000) in paragraph at lines 492--494 [] -Underfull \hbox (badness 10000) in paragraph at lines 463--467 +Underfull \hbox (badness 10000) in paragraph at lines 495--499 [] -Underfull \hbox (badness 10000) in paragraph at lines 468--470 +Underfull \hbox (badness 10000) in paragraph at lines 500--502 [] -Underfull \hbox (badness 10000) in paragraph at lines 471--475 +Underfull \hbox (badness 10000) in paragraph at lines 503--507 [] -[23] -LaTeX Font Info: Font shape `OMS/cmr/m/it' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 488. - [24] [25] -Missing character: There is no â in font cmr10! -Missing character: There is no € in font cmr10! -Missing character: There is no ™ in font cmr10! - [26] - +[29] [30] +LaTeX Font Info: Font shape `OMS/cmr/m/it' in size <12> not available +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 520. + [31] [32] +Missing character: There is no â in font cmr12! +Missing character: There is no € in font cmr12! +Missing character: There is no ™ in font cmr12! + [33] [34] [35] + File: images/Generic_Flow.png Graphic file (type png) -Package pdftex.def Info: images/Generic_Flow.png used on input line 574. +Package pdftex.def Info: images/Generic_Flow.png used on input line 609. (pdftex.def) Requested size: 512.13474pt x 227.62009pt. -Overfull \hbox (71.28728pt too wide) in paragraph at lines 574--575 +Overfull \hbox (71.28728pt too wide) in paragraph at lines 609--610 [][] [] -[27] [28 <./images/Generic_Flow.png (PNG copy)>] -Underfull \hbox (badness 10000) in paragraph at lines 585--587 +[36 <./images/Generic_Flow.png (PNG copy)>] +Underfull \hbox (badness 10000) in paragraph at lines 620--622 [] - + File: images/Dataflow.png Graphic file (type png) -Package pdftex.def Info: images/Dataflow.png used on input line 590. +Package pdftex.def Info: images/Dataflow.png used on input line 625. (pdftex.def) Requested size: 512.09683pt x 227.62125pt. -Overfull \hbox (71.24937pt too wide) in paragraph at lines 590--592 +Overfull \hbox (71.24937pt too wide) in paragraph at lines 625--627 [] [] -[29 <./images/Dataflow.png (PNG copy)>] - +[37 <./images/Dataflow.png (PNG copy)>] + File: images/Data_Collector.png Graphic file (type png) -Package pdftex.def Info: images/Data_Collector.png used on input line 598. +Package pdftex.def Info: images/Data_Collector.png used on input line 633. (pdftex.def) Requested size: 426.78574pt x 227.61746pt. - [30 <./images/Data_Collector.png (PNG copy)>] - + [38 <./images/Data_Collector.png (PNG copy)>] + File: images/Analysis_Engine.png Graphic file (type png) -Package pdftex.def Info: images/Analysis_Engine.png used on input line 613. +Package pdftex.def Info: images/Analysis_Engine.png used on input line 648. (pdftex.def) Requested size: 483.67276pt x 227.62561pt. -Overfull \hbox (42.8253pt too wide) in paragraph at lines 613--615 +Overfull \hbox (42.8253pt too wide) in paragraph at lines 648--650 [] [] -[31 <./images/Analysis_Engine.png (PNG copy)>] - +[39 <./images/Analysis_Engine.png (PNG copy)>] + File: images/Neural_Network.png Graphic file (type png) -Package pdftex.def Info: images/Neural_Network.png used on input line 629. +Package pdftex.def Info: images/Neural_Network.png used on input line 664. (pdftex.def) Requested size: 483.6893pt x 341.42757pt. -Overfull \hbox (42.84184pt too wide) in paragraph at lines 629--631 +Overfull \hbox (42.84184pt too wide) in paragraph at lines 664--666 [] [] -[32 <./images/Neural_Network.png (PNG copy)>] - +[40] [41 <./images/Neural_Network.png (PNG copy)>] + File: images/Future_Predictions.png Graphic file (type png) -Package pdftex.def Info: images/Future_Predictions.png used on input line 643. +Package pdftex.def Info: images/Future_Predictions.png used on input line 678. (pdftex.def) Requested size: 512.1362pt x 227.62119pt. -Overfull \hbox (71.28874pt too wide) in paragraph at lines 643--645 +Overfull \hbox (71.28874pt too wide) in paragraph at lines 678--680 [] [] -[33 <./images/Future_Predictions.png (PNG copy)>] - +[42 <./images/Future_Predictions.png (PNG copy)>] + File: images/Frontend_Application.png Graphic file (type png) -Package pdftex.def Info: images/Frontend_Application.png used on input line 65 -7. +Package pdftex.def Info: images/Frontend_Application.png used on input line 69 +1. (pdftex.def) Requested size: 284.52162pt x 256.07664pt. - [34 <./images/Frontend_Application.png (PNG copy)>] - + [43 <./images/Frontend_Application.png (PNG copy)>] + File: images/interface_design.png Graphic file (type png) -Package pdftex.def Info: images/interface_design.png used on input line 676. +Package pdftex.def Info: images/interface_design.png used on input line 710. (pdftex.def) Requested size: 227.61479pt x 369.88063pt. - [35 <./images/interface_design.png>] [36] [37] [38] -[39] -Overfull \hbox (1.46323pt too wide) in paragraph at lines 736--736 -\OT1/cmr/m/n/10 [On-line]. Avail-able: []$\OT1/cmtt/m/n/10 https : / / www . co -deproject . com / Articles / 1201444 / Stock -[] Predictions -[] + [44 <./images/interface_design.png>] +Underfull \hbox (badness 10000) in paragraph at lines 721--723 + [] -[40] -Overfull \hbox (22.26572pt too wide) in paragraph at lines 736--736 -\OT1/cmr/m/n/10 able: []$\OT1/cmtt/m/n/10 https : / / towardsdatascience . com -/ recurrent -[] neural -[] networks -[] and -[] lstm -[] 4b601dd822a5$[]\OT1/cm -r/m/n/10 . +(/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty +File: lstlang1.sty 2015/06/04 1.6 listings language file +) [45] [46] +[47] +LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 847. + [48] [49] +Underfull \hbox (badness 10000) in paragraph at lines 934--936 + + [] + +[50] +Underfull \hbox (badness 10000) in paragraph at lines 971--973 + + [] + +[51] +LaTeX Font Info: Try loading font information for OML+cmr on input line 985. + + (/usr/share/texlive/texmf-dist/tex/latex/base/omlcmr.fd +File: omlcmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions +) +LaTeX Font Info: Font shape `OML/cmr/m/n' in size <10> not available +(Font) Font shape `OML/cmm/m/it' tried instead on input line 985. + [52] [53] +[54] +Missing character: There is no  in font cmr10! +Missing character: There is no £ in font cmr10! + [55] [56] [57] [58] [59] [60] [61] +Overfull \hbox (5.27716pt too wide) in paragraph at lines 1234--1234 +\OT1/cmr/m/it/12 ence on sig-nal pro-cess-ing, com-mu-ni-ca-tion, power and em- +bed-ded sys-tem (SCOPES)\OT1/cmr/m/n/12 , [] -Overfull \hbox (59.98181pt too wide) in paragraph at lines 736--736 -[]$\OT1/cmtt/m/n/10 http : / / deeplearning . stanford . edu / tutorial / super -vised / OptimizationStochasticGradientDescent$[]\OT1/cmr/m/n/10 . +Overfull \hbox (42.7786pt too wide) in paragraph at lines 1234--1234 +\OT1/cmr/m/n/12 works,'' To-wards Data Sci-ence, 2018. [On-line]. Avail-able: [ +]$\OT1/cmtt/m/n/12 https : / / towardsdatascience . [] -[41] [42] +[62] +Overfull \hbox (86.07425pt too wide) in paragraph at lines 1234--1234 +\OT1/cmr/m/n/12 works,'' Ma-chine Larn-ing Mas-tery, 2017. [On-line]. Avail-abl +e: []$\OT1/cmtt/m/n/12 https : / / machinelearningmastery . + [] + + +Overfull \hbox (30.84552pt too wide) in paragraph at lines 1234--1234 +\OT1/cmr/m/n/12 lem,'' Su-per Data Sci-ence, 2018. [On-line]. Avail-able: []$\O +T1/cmtt/m/n/12 https : / / www . superdatascience . + [] + +[63] +Overfull \hbox (9.16136pt too wide) in paragraph at lines 1234--1234 +\OT1/cmr/m/n/12 2019. [On-line]. Avail-able: []$\OT1/cmtt/m/n/12 https : / / me +dium . com / datadriveninvestor / overview -[] + [] + +[64] [65] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf used on input line 741. +Package pdftex.def Info: PID.pdf used on input line 1239. (pdftex.def) Requested size: 597.551pt x 845.07512pt. @@ -1175,7 +1277,7 @@ pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf used on input line 741. +Package pdftex.def Info: PID.pdf used on input line 1239. (pdftex.def) Requested size: 597.551pt x 845.07512pt. @@ -1185,253 +1287,256 @@ rsion <1.7>, but at most version <1.5> allowed pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page1 used on input line 741. +Package pdftex.def Info: PID.pdf , page1 used on input line 1239. (pdftex.def) Requested size: 597.551pt x 845.07512pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page1 used on input line 741. +Package pdftex.def Info: PID.pdf , page1 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[43] +[66] File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page1 used on input line 741. +Package pdftex.def Info: PID.pdf , page1 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page1 used on input line 741. +Package pdftex.def Info: PID.pdf , page1 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page1 used on input line 741. +Package pdftex.def Info: PID.pdf , page1 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. - [44 <./PID.pdf>] + [67 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page2 used on input line 741. +Package pdftex.def Info: PID.pdf , page2 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page2 used on input line 741. +Package pdftex.def Info: PID.pdf , page2 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page2 used on input line 741. +Package pdftex.def Info: PID.pdf , page2 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[45 <./PID.pdf>] +[68 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page3 used on input line 741. +Package pdftex.def Info: PID.pdf , page3 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page3 used on input line 741. +Package pdftex.def Info: PID.pdf , page3 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page3 used on input line 741. +Package pdftex.def Info: PID.pdf , page3 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[46 <./PID.pdf>] +[69 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page4 used on input line 741. +Package pdftex.def Info: PID.pdf , page4 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page4 used on input line 741. +Package pdftex.def Info: PID.pdf , page4 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page4 used on input line 741. +Package pdftex.def Info: PID.pdf , page4 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[47 <./PID.pdf>] +[70 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page5 used on input line 741. +Package pdftex.def Info: PID.pdf , page5 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page5 used on input line 741. +Package pdftex.def Info: PID.pdf , page5 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page5 used on input line 741. +Package pdftex.def Info: PID.pdf , page5 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[48 <./PID.pdf>] +[71 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page6 used on input line 741. +Package pdftex.def Info: PID.pdf , page6 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page6 used on input line 741. +Package pdftex.def Info: PID.pdf , page6 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page6 used on input line 741. +Package pdftex.def Info: PID.pdf , page6 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[49 <./PID.pdf>] +[72 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page7 used on input line 741. +Package pdftex.def Info: PID.pdf , page7 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page7 used on input line 741. +Package pdftex.def Info: PID.pdf , page7 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page7 used on input line 741. +Package pdftex.def Info: PID.pdf , page7 used on input line 1239. (pdftex.def) Requested size: 562.1644pt x 795.0303pt. -[50 <./PID.pdf>] +[73 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page8 used on input line 741. +Package pdftex.def Info: PID.pdf , page8 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page8 used on input line 741. +Package pdftex.def Info: PID.pdf , page8 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page8 used on input line 741. +Package pdftex.def Info: PID.pdf , page8 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. -[51 <./PID.pdf>] +[74 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page9 used on input line 741. +Package pdftex.def Info: PID.pdf , page9 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page9 used on input line 741. +Package pdftex.def Info: PID.pdf , page9 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page9 used on input line 741. +Package pdftex.def Info: PID.pdf , page9 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. -[52 <./PID.pdf>] +[75 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page10 used on input line 741. +Package pdftex.def Info: PID.pdf , page10 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page10 used on input line 741. +Package pdftex.def Info: PID.pdf , page10 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page10 used on input line 741. +Package pdftex.def Info: PID.pdf , page10 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. -[53 <./PID.pdf>] +[76 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page11 used on input line 741. +Package pdftex.def Info: PID.pdf , page11 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page11 used on input line 741. +Package pdftex.def Info: PID.pdf , page11 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page11 used on input line 741. +Package pdftex.def Info: PID.pdf , page11 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. -[54 <./PID.pdf>] +[77 <./PID.pdf>] pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve rsion <1.7>, but at most version <1.5> allowed - + File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page12 used on input line 741. +Package pdftex.def Info: PID.pdf , page12 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page12 used on input line 741. +Package pdftex.def Info: PID.pdf , page12 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. File: PID.pdf Graphic file (type pdf) -Package pdftex.def Info: PID.pdf , page12 used on input line 741. +Package pdftex.def Info: PID.pdf , page12 used on input line 1239. (pdftex.def) Requested size: 795.0303pt x 562.1644pt. -[55 <./PID.pdf>] -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 745. - [56] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 745. +[78 <./PID.pdf>] +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1243. + [79] +Package atveryend Info: Empty hook `AfterLastShipout' on input line 1243. (./document.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 745. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 745. +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1243. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1243. + Package rerunfilecheck Info: File `document.out' has not changed. -(rerunfilecheck) Checksum: EE0B06B4CF3C74748D09C1D69BD8A1A8;9738. +(rerunfilecheck) Checksum: 285E84D8F0115130141DAF2B9AB7C433;10818. Package logreq Info: Writing requests to 'document.run.xml'. \openout1 = `document.run.xml'. ) Here is how much of TeX's memory you used: - 22553 strings out of 492982 - 357315 string characters out of 6134895 - 846942 words of memory out of 5000000 - 25624 multiletter control sequences out of 15000+600000 - 12017 words of font info for 47 fonts, out of 8000000 for 9000 + 24568 strings out of 492982 + 385699 string characters out of 6134895 + 1048130 words of memory out of 5000000 + 27195 multiletter control sequences out of 15000+600000 + 13923 words of font info for 54 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 45i,18n,67p,2008b,1302s stack positions out of 5000i,500n,10000p,200000b,80000s - -Output written on document.pdf (56 pages, 1411693 bytes). + 45i,18n,78p,2008b,1819s stack positions out of 5000i,500n,10000p,200000b,80000s + +Output written on document.pdf (79 pages, 1486919 bytes). PDF statistics: - 913 PDF objects out of 1000 (max. 8388607) - 796 compressed objects within 8 object streams - 163 named destinations out of 1000 (max. 500000) - 640 words of extra memory for PDF output out of 10000 (max. 10000000) + 1554 PDF objects out of 1728 (max. 8388607) + 1405 compressed objects within 15 object streams + 588 named destinations out of 1000 (max. 500000) + 696 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/document.out b/document.out index 6ffa8ec..3f86170 100644 --- a/document.out +++ b/document.out @@ -8,56 +8,63 @@ \BOOKMARK [2][-]{section*.9}{\376\377\000P\000r\000o\000j\000e\000c\000t\000\040\000M\000o\000t\000i\000v\000a\000t\000i\000o\000n}{section*.6}% 8 \BOOKMARK [2][-]{section*.10}{\376\377\000T\000e\000c\000h\000n\000i\000c\000a\000l\000\040\000S\000p\000e\000c\000i\000f\000i\000c\000a\000t\000i\000o\000n}{section*.6}% 9 \BOOKMARK [2][-]{section*.11}{\376\377\000P\000r\000o\000j\000e\000c\000t\000\040\000C\000o\000n\000s\000t\000r\000a\000i\000n\000t\000s}{section*.6}% 10 -\BOOKMARK [1][-]{section*.12}{\376\377\000L\000i\000t\000e\000r\000a\000t\000u\000r\000e\000\040\000R\000e\000v\000i\000e\000w}{}% 11 -\BOOKMARK [2][-]{section*.13}{\376\377\000E\000x\000i\000s\000t\000i\000n\000g\000\040\000T\000o\000o\000l\000s}{section*.12}% 12 -\BOOKMARK [2][-]{section*.14}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000r\000e\000s\000e\000a\000r\000c\000h}{section*.12}% 13 -\BOOKMARK [2][-]{section*.15}{\376\377\000D\000a\000t\000a\000\040\000C\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.12}% 14 -\BOOKMARK [3][-]{section*.16}{\376\377\000T\000w\000i\000t\000t\000e\000r\000\040\000a\000n\000d\000\040\000T\000w\000i\000t\000t\000e\000r\000\040\000A\000P\000I}{section*.15}% 15 -\BOOKMARK [3][-]{section*.17}{\376\377\000T\000w\000e\000e\000p\000y\000\040\000P\000y\000t\000h\000o\000n\000\040\000P\000a\000c\000k\000a\000g\000e}{section*.15}% 16 -\BOOKMARK [2][-]{section*.18}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.12}% 17 -\BOOKMARK [3][-]{section*.19}{\376\377\000N\000a\000t\000u\000r\000a\000l\000\040\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000P\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.18}% 18 -\BOOKMARK [3][-]{section*.20}{\376\377\000V\000a\000l\000e\000n\000c\000e\000\040\000A\000w\000a\000r\000e\000\040\000D\000i\000c\000t\000i\000o\000n\000a\000r\000y\000\040\000a\000n\000d\000\040\000s\000E\000n\000t\000i\000m\000e\000n\000t\000\040\000R\000e\000a\000s\000o\000n\000i\000n\000g}{section*.18}% 19 -\BOOKMARK [2][-]{section*.21}{\376\377\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000s}{section*.12}% 20 -\BOOKMARK [3][-]{section*.22}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000\050\000R\000N\000N\000\051}{section*.21}% 21 -\BOOKMARK [3][-]{section*.23}{\376\377\000L\000o\000n\000g\000-\000S\000h\000o\000r\000t\000\040\000T\000e\000r\000m\000\040\000M\000e\000m\000o\000r\000y\000\040\000\050\000L\000S\000T\000M\000\051}{section*.21}% 22 -\BOOKMARK [3][-]{section*.24}{\376\377\000K\000e\000r\000a\000s\000\040\000a\000n\000d\000\040\000T\000e\000n\000s\000o\000r\000F\000l\000o\000w}{section*.21}% 23 -\BOOKMARK [3][-]{section*.25}{\376\377\000O\000p\000t\000i\000m\000i\000s\000e\000r\000s}{section*.21}% 24 -\BOOKMARK [2][-]{section*.26}{\376\377\000M\000a\000c\000h\000i\000n\000e\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section*.12}% 25 -\BOOKMARK [3][-]{section*.27}{\376\377\000N\000a\000i\000v\000e\000\040\000B\000a\000y\000e\000s}{section*.26}% 26 -\BOOKMARK [1][-]{section*.28}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000A\000p\000p\000r\000o\000a\000c\000h}{}% 27 -\BOOKMARK [2][-]{section*.29}{\376\377\000D\000a\000t\000a\000\040\000g\000a\000t\000h\000e\000r\000i\000n\000g}{section*.28}% 28 -\BOOKMARK [2][-]{section*.30}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000e\000-\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.28}% 29 -\BOOKMARK [2][-]{section*.31}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.28}% 30 -\BOOKMARK [2][-]{section*.32}{\376\377\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000D\000e\000t\000e\000c\000t\000i\000o\000n}{section*.28}% 31 -\BOOKMARK [2][-]{section*.33}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.28}% 32 -\BOOKMARK [2][-]{section*.34}{\376\377\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k}{section*.28}% 33 -\BOOKMARK [2][-]{section*.36}{\376\377\000P\000r\000i\000c\000e\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.28}% 34 -\BOOKMARK [2][-]{section*.37}{\376\377\000F\000r\000o\000n\000t\000e\000n\000d\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n}{section*.28}% 35 -\BOOKMARK [2][-]{section*.38}{\376\377\000W\000i\000t\000h\000\040\000r\000e\000f\000e\000r\000e\000n\000c\000e\000\040\000t\000o\000\040\000I\000n\000i\000t\000i\000a\000l\000\040\000P\000I\000D}{section*.28}% 36 -\BOOKMARK [2][-]{section*.39}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000S\000u\000m\000m\000a\000r\000y}{section*.28}% 37 -\BOOKMARK [2][-]{section*.40}{\376\377\000D\000a\000t\000a\000\040\000f\000l\000o\000w\000\040\000O\000v\000e\000r\000v\000i\000e\000w}{section*.28}% 38 -\BOOKMARK [1][-]{section*.41}{\376\377\000S\000y\000s\000t\000e\000m\000\040\000D\000e\000s\000i\000g\000n}{}% 39 -\BOOKMARK [2][-]{section*.42}{\376\377\000D\000a\000t\000a\000f\000l\000o\000w\000\040\000D\000e\000s\000i\000g\000n\000s}{section*.41}% 40 -\BOOKMARK [2][-]{section*.43}{\376\377\000U\000M\000L\000\040\000C\000o\000m\000p\000o\000n\000e\000n\000t\000\040\000D\000e\000s\000i\000g\000n}{section*.41}% 41 -\BOOKMARK [2][-]{section*.44}{\376\377\000I\000n\000t\000e\000r\000f\000a\000c\000e\000\040\000D\000e\000s\000i\000g\000n}{section*.41}% 42 -\BOOKMARK [1][-]{section*.46}{\376\377\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 43 -\BOOKMARK [2][-]{section*.47}{\376\377\000D\000a\000t\000a\000\040\000c\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.46}% 44 -\BOOKMARK [3][-]{section*.48}{\376\377\000P\000r\000i\000c\000e\000\040\000T\000i\000m\000e\000-\000s\000e\000r\000i\000e\000s\000\040\000D\000a\000t\000a}{section*.47}% 45 -\BOOKMARK [2][-]{section*.49}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.46}% 46 -\BOOKMARK [3][-]{section*.50}{\376\377\000P\000r\000e\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.49}% 47 -\BOOKMARK [3][-]{section*.54}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.49}% 48 -\BOOKMARK [2][-]{section*.57}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.46}% 49 -\BOOKMARK [3][-]{section*.58}{\376\377\000V\000A\000D\000E\000R}{section*.57}% 50 -\BOOKMARK [2][-]{section*.59}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000-\000\040\000L\000S\000T\000M}{section*.46}% 51 -\BOOKMARK [3][-]{section*.60}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{section*.59}% 52 -\BOOKMARK [3][-]{section*.61}{\376\377\000S\000c\000o\000r\000i\000n\000g\000\040\000a\000n\000d\000\040\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n}{section*.59}% 53 -\BOOKMARK [3][-]{section*.62}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.59}% 54 -\BOOKMARK [1][-]{section*.63}{\376\377\000T\000e\000s\000t\000i\000n\000g\000:\000\040\000V\000e\000r\000i\000f\000i\000c\000a\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 55 -\BOOKMARK [1][-]{section*.64}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 56 -\BOOKMARK [2][-]{section*.65}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section*.64}% 57 -\BOOKMARK [1][-]{section*.66}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 58 -\BOOKMARK [2][-]{section*.67}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section*.66}% 59 -\BOOKMARK [2][-]{section*.68}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section*.66}% 60 -\BOOKMARK [1][-]{section*.70}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 61 -\BOOKMARK [2][-]{section*.71}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section*.70}% 62 -\BOOKMARK [2][-]{section*.72}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section*.70}% 63 +\BOOKMARK [1][-]{section*.12}{\376\377\000Q\000u\000a\000l\000i\000t\000y\000\040\000G\000o\000a\000l\000s}{}% 11 +\BOOKMARK [1][-]{section*.13}{\376\377\000L\000i\000t\000e\000r\000a\000t\000u\000r\000e\000\040\000R\000e\000v\000i\000e\000w}{}% 12 +\BOOKMARK [2][-]{section*.14}{\376\377\000E\000x\000i\000s\000t\000i\000n\000g\000\040\000T\000o\000o\000l\000s}{section*.13}% 13 +\BOOKMARK [2][-]{section*.15}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000r\000e\000s\000e\000a\000r\000c\000h}{section*.13}% 14 +\BOOKMARK [2][-]{section*.16}{\376\377\000D\000a\000t\000a\000\040\000C\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.13}% 15 +\BOOKMARK [3][-]{section*.17}{\376\377\000T\000w\000i\000t\000t\000e\000r\000\040\000a\000n\000d\000\040\000T\000w\000i\000t\000t\000e\000r\000\040\000A\000P\000I}{section*.16}% 16 +\BOOKMARK [3][-]{section*.18}{\376\377\000T\000w\000e\000e\000p\000y\000\040\000P\000y\000t\000h\000o\000n\000\040\000P\000a\000c\000k\000a\000g\000e}{section*.16}% 17 +\BOOKMARK [2][-]{section*.19}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.13}% 18 +\BOOKMARK [3][-]{section*.20}{\376\377\000N\000a\000t\000u\000r\000a\000l\000\040\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000P\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.19}% 19 +\BOOKMARK [3][-]{section*.21}{\376\377\000V\000a\000l\000e\000n\000c\000e\000\040\000A\000w\000a\000r\000e\000\040\000D\000i\000c\000t\000i\000o\000n\000a\000r\000y\000\040\000a\000n\000d\000\040\000s\000E\000n\000t\000i\000m\000e\000n\000t\000\040\000R\000e\000a\000s\000o\000n\000i\000n\000g}{section*.19}% 20 +\BOOKMARK [2][-]{section*.22}{\376\377\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000s}{section*.13}% 21 +\BOOKMARK [3][-]{section*.23}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000\050\000R\000N\000N\000\051}{section*.22}% 22 +\BOOKMARK [3][-]{section*.24}{\376\377\000L\000o\000n\000g\000-\000S\000h\000o\000r\000t\000\040\000T\000e\000r\000m\000\040\000M\000e\000m\000o\000r\000y\000\040\000\050\000L\000S\000T\000M\000\051}{section*.22}% 23 +\BOOKMARK [3][-]{section*.25}{\376\377\000K\000e\000r\000a\000s\000\040\000a\000n\000d\000\040\000T\000e\000n\000s\000o\000r\000F\000l\000o\000w}{section*.22}% 24 +\BOOKMARK [3][-]{section*.26}{\376\377\000O\000p\000t\000i\000m\000i\000s\000e\000r\000s}{section*.22}% 25 +\BOOKMARK [2][-]{section*.27}{\376\377\000M\000a\000c\000h\000i\000n\000e\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section*.13}% 26 +\BOOKMARK [3][-]{section*.28}{\376\377\000N\000a\000i\000v\000e\000\040\000B\000a\000y\000e\000s}{section*.27}% 27 +\BOOKMARK [2][-]{section*.29}{\376\377\000R\000a\000n\000d\000o\000m\000\040\000F\000o\000r\000e\000s\000t}{section*.13}% 28 +\BOOKMARK [1][-]{section*.30}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000A\000p\000p\000r\000o\000a\000c\000h}{}% 29 +\BOOKMARK [2][-]{section*.31}{\376\377\000D\000a\000t\000a\000\040\000g\000a\000t\000h\000e\000r\000i\000n\000g}{section*.30}% 30 +\BOOKMARK [2][-]{section*.32}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000e\000-\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.30}% 31 +\BOOKMARK [2][-]{section*.33}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.30}% 32 +\BOOKMARK [2][-]{section*.34}{\376\377\000L\000a\000n\000g\000u\000a\000g\000e\000\040\000D\000e\000t\000e\000c\000t\000i\000o\000n}{section*.30}% 33 +\BOOKMARK [2][-]{section*.35}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.30}% 34 +\BOOKMARK [2][-]{section*.36}{\376\377\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k}{section*.30}% 35 +\BOOKMARK [2][-]{section*.38}{\376\377\000P\000r\000i\000c\000e\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.30}% 36 +\BOOKMARK [2][-]{section*.39}{\376\377\000F\000r\000o\000n\000t\000e\000n\000d\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n}{section*.30}% 37 +\BOOKMARK [2][-]{section*.40}{\376\377\000W\000i\000t\000h\000\040\000r\000e\000f\000e\000r\000e\000n\000c\000e\000\040\000t\000o\000\040\000I\000n\000i\000t\000i\000a\000l\000\040\000P\000I\000D}{section*.30}% 38 +\BOOKMARK [2][-]{section*.41}{\376\377\000S\000o\000l\000u\000t\000i\000o\000n\000\040\000S\000u\000m\000m\000a\000r\000y}{section*.30}% 39 +\BOOKMARK [2][-]{section*.42}{\376\377\000D\000a\000t\000a\000\040\000f\000l\000o\000w\000\040\000O\000v\000e\000r\000v\000i\000e\000w}{section*.30}% 40 +\BOOKMARK [1][-]{section*.43}{\376\377\000S\000y\000s\000t\000e\000m\000\040\000D\000e\000s\000i\000g\000n}{}% 41 +\BOOKMARK [2][-]{section*.44}{\376\377\000D\000a\000t\000a\000f\000l\000o\000w\000\040\000D\000e\000s\000i\000g\000n\000s}{section*.43}% 42 +\BOOKMARK [2][-]{section*.45}{\376\377\000I\000n\000t\000e\000r\000f\000a\000c\000e\000\040\000D\000e\000s\000i\000g\000n}{section*.43}% 43 +\BOOKMARK [1][-]{section*.47}{\376\377\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 44 +\BOOKMARK [2][-]{section*.48}{\376\377\000D\000a\000t\000a\000\040\000c\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.47}% 45 +\BOOKMARK [3][-]{section*.49}{\376\377\000P\000r\000i\000c\000e\000\040\000T\000i\000m\000e\000-\000S\000e\000r\000i\000e\000s\000\040\000H\000i\000s\000t\000o\000r\000i\000c\000a\000l\000\040\000D\000a\000t\000a}{section*.48}% 46 +\BOOKMARK [3][-]{section*.50}{\376\377\000P\000r\000i\000c\000e\000\040\000T\000i\000m\000e\000-\000S\000e\000r\000i\000e\000s\000\040\000L\000i\000v\000e\000\040\000D\000a\000t\000a}{section*.48}% 47 +\BOOKMARK [3][-]{section*.51}{\376\377\000H\000i\000s\000t\000o\000r\000i\000c\000a\000l\000\040\000T\000w\000e\000e\000t\000\040\000C\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.48}% 48 +\BOOKMARK [3][-]{section*.52}{\376\377\000L\000i\000v\000e\000\040\000T\000w\000e\000e\000t\000\040\000C\000o\000l\000l\000e\000c\000t\000i\000o\000n}{section*.48}% 49 +\BOOKMARK [2][-]{section*.53}{\376\377\000D\000a\000t\000a\000\040\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.47}% 50 +\BOOKMARK [3][-]{section*.54}{\376\377\000P\000r\000e\000p\000r\000o\000c\000e\000s\000s\000i\000n\000g}{section*.53}% 51 +\BOOKMARK [3][-]{section*.58}{\376\377\000S\000p\000a\000m\000\040\000F\000i\000l\000t\000e\000r\000i\000n\000g}{section*.53}% 52 +\BOOKMARK [2][-]{section*.60}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section*.47}% 53 +\BOOKMARK [3][-]{section*.61}{\376\377\000V\000A\000D\000E\000R}{section*.60}% 54 +\BOOKMARK [2][-]{section*.62}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000-\000\040\000L\000S\000T\000M}{section*.47}% 55 +\BOOKMARK [3][-]{section*.63}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{section*.62}% 56 +\BOOKMARK [3][-]{section*.64}{\376\377\000S\000c\000o\000r\000i\000n\000g\000\040\000a\000n\000d\000\040\000V\000a\000l\000i\000d\000a\000t\000i\000o\000n}{section*.62}% 57 +\BOOKMARK [2][-]{section*.65}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section*.47}% 58 +\BOOKMARK [2][-]{section*.66}{\376\377\000M\000a\000i\000n\000\040\000F\000i\000l\000e\000\040\000'\000M\000a\000i\000n\000.\000p\000y\000'}{section*.47}% 59 +\BOOKMARK [2][-]{section*.67}{\376\377\000M\000i\000s\000c\000e\000l\000l\000a\000n\000e\000o\000u\000s}{section*.47}% 60 +\BOOKMARK [1][-]{section*.68}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000e\000t\000r\000i\000c\000s\000\040\000a\000n\000d\000\040\000A\000c\000c\000u\000r\000a\000c\000y}{}% 61 +\BOOKMARK [1][-]{section*.69}{\376\377\000P\000r\000o\000j\000e\000c\000t\000\040\000E\000v\000a\000l\000u\000a\000t\000i\000o\000n}{}% 62 +\BOOKMARK [1][-]{section*.70}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 63 +\BOOKMARK [2][-]{section*.71}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section*.70}% 64 +\BOOKMARK [1][-]{section*.72}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 65 +\BOOKMARK [2][-]{section*.73}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section*.72}% 66 +\BOOKMARK [2][-]{section*.74}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section*.72}% 67 +\BOOKMARK [1][-]{section*.76}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 68 +\BOOKMARK [2][-]{section*.77}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section*.76}% 69 +\BOOKMARK [2][-]{section*.78}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section*.76}% 70 diff --git a/document.pdf b/document.pdf index 98fc215..5f5abf6 100644 Binary files a/document.pdf and b/document.pdf differ diff --git a/document.synctex.gz b/document.synctex.gz index c7077dd..9360597 100644 Binary files a/document.synctex.gz and b/document.synctex.gz differ diff --git a/document.tex b/document.tex index 8aa739e..f28ca93 100644 --- a/document.tex +++ b/document.tex @@ -1,4 +1,4 @@ -\documentclass[oneside, 10pt]{article} +\documentclass[oneside, 12pt]{article} \usepackage{amsmath} \usepackage{amsfonts} @@ -23,6 +23,34 @@ \usepackage[margin=1.2in]{geometry} \usepackage{titling} +\usepackage{listings} +\usepackage{color} + +\definecolor{codegreen}{rgb}{0,0.6,0} +\definecolor{codegray}{rgb}{0.5,0.5,0.5} +\definecolor{codepurple}{rgb}{0.58,0,0.82} +\definecolor{backcolour}{rgb}{0.95,0.95,0.92} +\lstdefinestyle{mystyle}{ + backgroundcolor=\color{backcolour}, + commentstyle=\color{codegreen}, + keywordstyle=\color{magenta}, + numberstyle=\tiny\color{codegray}, + stringstyle=\color{codepurple}, + basicstyle=\footnotesize, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=5pt, + showspaces=false, + showstringspaces=false, + showtabs=false, + tabsize=2 +} + +\lstset{style=mystyle} + \usepackage[style=ieee,backend=biber]{biblatex} \addbibresource{report.bib} @@ -234,6 +262,8 @@ \begin{itemize} \item ... \end{itemize} + + \section{Quality Goals} \newpage @@ -289,13 +319,13 @@ \item Stopword removal: Are commonly used words (such as "the","in","a") that provide no meaning to the sentiment of a given text \item Stemming: Is used to replace words with common suffixes and prefixes, as in "go" and "goes" fundamentally convey the same meaning. A stemmer will replace such words with their reduced counterparts \item Term Probability Identification and Feature Extraction: This is a process that involves identifying the most frequently used words in a given text, by using a probability type approach on a pre-defined dataset which classifies a range of texts as with overall negative or positive a machine learning algorithm is trained to classify these accordingly. - \item Ngrams: ... + \item Ngrams: Are a contiguous sequence of n items from a given sample of text. The use of Ngrams in natural language processing can improve the accuracy of classification. For example: ‘Good’ and ‘Not Good’ have opposite meanings. By only using 1 token (1gram) ‘not good’ (‘not’ and ‘good’) can be incorrectly classified. As the english language contains a significant amount of 2gram type word chains using 2gram can improve the accuracy of classification. \end{itemize} The former, seen and has been proven to provide higher accuracy than traditional machine learning approaches \cite{11}, and need little pre-processing conducted on the data as words have a pre-defined sentiment classification in a provided lexicon. Although these lexicons can be complex to create, they generally require little resources to use and alter. \subsubsection{Valence Aware Dictionary and sEntiment Reasoning}\label{Vader} - VADER is a combined lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media and works well on texts from other domains. It is capable of detecting the polarity of a given text - positivity, neutrality, and negativity \cite{12}. VADER uses a human-centric approach to sentiment analysis, combining qualitative analysis and empirical validation by using human raters to rate the level of sentiment for words in its lexicon. Vader also has emoticon support which maps these colloquialisms have pre-defined intensities in its lexicon, which makes VADER specifically suitable for the social media domain were the use of emoticons, utf-8 emojis and slang such as "Lol" and "Yolo" are prevalent within the text. Additionally, VADER is provided as a lexicon and a python library under the MIT license, this means that it is open-source software. This means that the lexicon can be altered and added to abling it to be tailored to specific topic domains. + VADER is a combined lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media and works well on texts from other domains. It is capable of detecting the polarity of a given text - positivity, neutrality, and negativity \cite{12}, and also calculate the compound score which is calculated by summing the valence scores of each word in the lexicon. VADER uses a human-centric approach to sentiment analysis, combining qualitative analysis and empirical validation by using human raters to rate the level of sentiment for words in its lexicon. Vader also has emoticon support which maps these colloquialisms have pre-defined intensities in its lexicon, which makes VADER specifically suitable for the social media domain were the use of emoticons, utf-8 emojis and slang such as "Lol" and "Yolo" are prevalent within the text. Additionally, VADER is provided as a lexicon and a python library under the MIT license, this means that it is open-source software. This means that the lexicon can be altered and added to abling it to be tailored to specific topic domains. VADER was constructed by examining and extracting features from three pre-existing well-established and human-validated sentiment lexicons \cite{12} - (LIWC) Linguistic Inquiry and Word Count, (ANEW) Affective Norms for English Words, and (GI) General Inquirer. This is supplemented with additional lexicon features \textit{"commonly used to express sentiment in social media text (emoticons, acronyms and slang)"} \cite{12} and uses "wisdom-of-the-crowd" approach \cite{13} to establish a point of estimations of sentiment valance for each lexical feature candidate. This was evaluated for the impact of grammatical and syntactical rules and 7,500+ lexical features, with mean valence \textit{"<> zero, and SD <= 2.5"} as a human-validated "gold-standard" sentiment lexicon. \cite{12}\textit{Section 3.1} @@ -447,6 +477,8 @@ \[Probability \ of \ Outcome \cap Evidence = \frac{Probability \ of \ Likelihood \ of \ evidence * Prior}{Probability \ of \ Evidence} \] The naive Bayes approach has many applications, especially for the topic of this project in classifying the probability occurrence of the next price. Although it is a robust algorithm has its drawbacks which make it not as suitable as a neural network for the given need of this project. The naive Bayes trap is an issue that may occur due to the size of the dataset that will be used. There are however other scenarios this algorithm could be used such as classification of spam data.\cite{32} + + \subsection{Random Forest} \newpage @@ -510,7 +542,7 @@ \textbf{Analysis of Social Media Text (4,200 Tweets)}\cite{12} \end{center} - Due to the suitability for the given domain of social media and with the customisability, due to VADER's lexicon-dictionary based approach, makes this sentiment analyser most suitable for use in this project. This analyser will be utilised as the sentiment analyser of this project due to its feature set and need for little data pre-processing before polarity classification of the provided text. \cite{11} \textit{"extract ..."}. + Due to the suitability for the given domain of social media and with the customisability, due to VADER's lexicon-dictionary based approach, makes this sentiment analyser most suitable for use in this project. This analyser will be utilised as the sentiment analyser of this project due to its feature set and need for little data pre-processing before polarity classification of the provided text. \cite{11} \textit{"is a widely used approach to sentiment analysis in the marketing research community, as it does not require any pre-processing or training of the classifier."}. This will be an intermediate system between the neural network and the data collection pre-processing system, as the later will provide the cleaned processed data for analysis and the former to feed in the classified polarity of each tweet alongside price data for model learning. @@ -521,7 +553,7 @@ \item Nature of an RNN - Allows for backpropagation to find partial derivatives of the error with respect to the weights after an output has occurred, to tweak the current weights of the LSTM cell. In short, allows the tweaking of weights of the network based on previously seen data by looping the same node thus influencing decisions made on current data based on old weights and errors from previous. \item Nature of an LSTM over RNN - LSTMs are extensions of RNNs \cite{22} that were designed to avoid long-term dependency problems such as exploding and vanishing gradients. Weights are not only just reused but are stored in memory and are propagated through the network. \item Lack of use for the project's purpose - Other papers tend to focus on machine learning techniques, other neural networks such as Multi-layer Perceptron (MPL) and standard Recurrent Neural Networks, with use of time-series data. Especially with the use of a standard RNN, not overcoming its common issues with gradient descent. Stated in related research section of the literature review, \cite{5} - \textit{"using the MLP classifier (a.k.a neural networks) showed better results than logistical regression and random forest trained models"} - \item Prior use for time-series data and data forecasting - Although RNN LSTM networks have been used for the prediction of Bitcoin price there are a few papers on this \cite{25}. Regardless, LSTMs have been notably used with use for time-series data forecasting due to being able to remember previous data and weights over long sequence spans \cite{22} - \textit{""}, \cite{25} - \textit{""}. + \item Prior use for time-series data and data forecasting - Although RNN LSTM networks have been used for the prediction of Bitcoin price there are a few papers on this \cite{25}. Regardless, LSTMs have been notably used with use for time-series data forecasting due to being able to remember previous data and weights over long sequence spans \cite{25} - \textit{"adds a great benefit in time series forecasting, where classical linear methods can be difficult to adapt to multivariate or multiple input forecasting problems"}. \end{itemize} Therefore, a recurrent long-short-term memory neural network will be used for this project to predict the next hour interval of Bitcoin price based on previous historical prices and hourly sentiment. This system will read in historical data, both price and sentiment - depending on the network for prediction with and without sentiment, this data will be merged, split and used to trained and test the network model for use for forecasting prices. The relative sizes for the training and test data can be decided upon system creation, but the standard sizing for training neural networks is 75:25 respectively. @@ -534,13 +566,15 @@ \hline \multirow{6}{*}{TensorFlow} & Supports reinforcement learning and other algorithms & Doesn’t support matrix operations \\ & Offers computational graph abstraction & Doesn't have pertained models \\ & Faster compile time than Theano & Drops to Python to load each new training batch \\ & Data and model parallelism & Doesn't support dynamic typing on large scale projects \\ & Can be deployed over multiple CPUs and GPUs & \\ \hline - \multirow{4}{*}{Theano} & Computational Graph Abstraction & Is low-level \\ & Has multiple high-level wrappers similar to Keras & Can only be deployed to a single GPU \\ & & Much slower compile times on large models than competition \\ & & Unhelpful and vague error messages \\ + \multirow{5}{*}{Theano} & Computational Graph Abstraction & Is low-level \\ & Has multiple high-level wrappers similar to Keras & Can only be deployed to a single GPU \\ & & Much slower compile times on large models than competition \\ & & Unhelpful and vague error messages \\ & & Development ceased in 2017 \\ \hline \multirow{3}{*}{Pytorch} & Graph definition is more imperative and dynamic than other frameworks & Not as widley adopted as TensorFlow \\ & Graph computation defined at runtime, allowing standard popular IDEs to support it & Visualisation is not as robust as TensorBoard \\ & Natively support common python deployment frameworks such as Flask & Not as deployable as TensorFlow, doesn't supper gRPC \\ & & \\ \end{tabular}} \textbf{Comparison between TensorFlow, Theano and Pytorch}\cite{34} \end{table} + + Due to the continued support and development of TensorFlow, the board community and support of a high-level wrapper - Keras, this library will be used for this project. Although, Pytorch is a good alternative it is not as easy to use as implement when compared to TensorFlow using Keras. \subsection{Price Forecasting} This part of the system will be responsible for prediction the next time-step of Bitcoin's price for the next hour based on past data. It will use the trained model from the neural network to predict the future hour price when given live hourly data, price and sentiment. The system will also have a look back of 5 which will allow it to see historical data to aid in the predictions. This will occur on the hour every hour when new data is received and processed, this data will also be merged and the split into training and testing data. The sizing can be decided upon system creation, but the standard sizing for training is 75:25, training and testing respectively. @@ -567,6 +601,7 @@ % \item Neural Network - Tensorflow, Keras, Sc %\end{itemize} + \newpage \subsection{Data flow Overview}\label{data-flow} To get an understanding of how the system will be put together, a dataflow diagram is a useful method for view how systems are integrated and how data could possibly flow through a system. @@ -651,7 +686,6 @@ \item Outputs - Accuracy Statistics, true price data and predicted next hour prices are outputted to respective files for use on the front-end application for charting. \end{itemize} - \newpage \textbf{Front-end Application} \begin{center} \includegraphics[width=10cm,height=9cm]{images/Frontend_Application.png} @@ -665,8 +699,8 @@ \item Charting and Tables - Accesses the loaded data from the Ajax requests and plots the data. Prediction data, only with sentiment and prices are plotted into a table. There will be separate charts and tables displaying the data from the backend that hasn't used sentiment in predictions to aid in establishing a correlation between sentiment and price and whether it affects the hourly price (Aiming to solve the problem statement) \item Stakeholders - There will be the four stakeholders, outlined in the problem articulation section, that would be the primary users of this application. \end{itemize} - \newpage - \subsection{UML Component Design} + + %\subsection{UML Component Design} \subsection{Interface Design} @@ -678,25 +712,470 @@ \begin{center} \textit{Figure 10: Interface design} \end{center} - Figure 10 above shows the basic idea of the interface design that will be presented to the stakeholders and aims to be the interface that these stakeholders will use to aid in their market decisions of Bitcoin. The interface, although simplistic, provides all the necassary information that any of these stakeholders would need, it also provides information to allow visual comparision on how sentiment affects the hourly price of Bitcoin, represented as the two charts. + \textit{Figure 10} above shows the basic idea of the interface design that will be presented to the stakeholders and aims to be the interface that these stakeholders will use to aid in their market decisions of Bitcoin. The interface, although simplistic, provides all the necessary information that any of these stakeholders would need, it also provides information to allow visual comparison on how sentiment affects the hourly price of Bitcoin, represented as the two charts. The comparison will aid in solving the problem statement later in the conclusion of the project. \newpage \begin{center} \section{Implementation}\label{implementation} \end{center} - - \subsection{Data collection}\label{collection} - \subsubsection{Price Time-series Data} - Historical data of Bitcoin prices can be obtained through may means, + This section will outline the method and process of development of this system to satisfy the chosen solution, technical specification and the problem statement. Each section of the system will be outlined and discussed with relevant codes snippets of essential methods from the system to highlight the processing of data throughout. + \newline + \subsection{Data collection}\label{collection} + \subsubsection{Price Time-Series Historical Data} + Historical price data were extracted from a CSV historical price tracker, \textit{Bitcoin Charts} \cite{35}. This tracker provided the historical data from the three exchanges used for Live price collection - Coinbase, Bitfinex and Gemini, since the exchanges supported the cryptocurrency. The data used spans from \textit{2018-01-06} to \textit{2019-01-06}. + + \begin{lstlisting}[language=Python, caption=Historical price collection and averaging per exchange] +... +coinbase = pd.read_csv('coinbase_btcusd.csv') +bitfinex = pd.read_csv('bitfinex_btcusd.csv') +gemini = pd.read_csv('gemini_btcusd.csv') + +coinbase.drop(columns=["Currency", "24h Open (USD)", "24h High (USD)", "24h Low (USD)"], axis=1, inplace=True) + +coinbase.columns = ["timestamp", "price"] + +coinbase['timestamp'] = pd.to_datetime(coinbase['timestamp']) + +coinbase = coinbase.set_index('timestamp').resample('1D').mean().resample('1H').mean() +... # similar code for the other 2 exchanges + +data.set_index(coinbase['timestamp']) +for i in data: + data['price'] = (coinbase['price'][i] + gemini['price'][i] + bitfinex['price'][i])/3 + +data = data.fillna(method='backfill') +data = data.round(3) + \end{lstlisting} + + Due to each of the hourly prices in each CSV for each exchange were averaged from the \textit{'high'}, \textit{'mid'} and \textit{low} prices, the data from each exchange only needed to be averaged together. This data is averaged and then saved to a CSV containing historical prices of Bitcoin for the past year. + + \subsubsection{Price Time-Series Live Data} + Live price data, as described in the solution approach, were extracted every hour from three exchanges - Coinbase, Bitfinex and Gemini were chosen for providing this data due to being the most popular exchange platforms that provide an API for retrieving live price data. + + Key packages used: + \begin{lstlisting}[language=Python, caption=] +import requests + +from coinbase.wallet.client import Client + +from dotenv import load_dotenv +from pathlib import Path +env_path = Path('.')/'data_collector/prices/config/coinbase.env' +load_dotenv(dotenv_path=env_path) + \end{lstlisting} + + \textbf{\textit{Requests}} was used to make the API endpoint calls to obtain the response that contained the three prices for the hour needed. + + The \textbf{\textit{Coinbase}} package was mandatory for establishing a connection with the Coinbase API, and regardless this exchange was still used as it is regarded as the most popular exchange to the general public with one of the highest flow of traffic through the site to purchase cryptocurrencies. + + Both the \textbf{\textit{dotenv}} and \textbf{\textit{pathlib}} packages were used to extract the API keys - access and secret keys, from the relevant \textit{'.env'} file used alongside the Coinbase package for connection to the Coinbase API. + + The \textit{'high'}, \textit{'mid'} and \textit{low} prices were extracted from the endpoint response and averaged to provide an overall hourly price per exchange. + + \begin{lstlisting}[language=Python, caption=Extraction of Price from exchanges] +def coinbase(): + + api_key = keys().api_key + api_secret = keys().api_secret + + try: + client = Client(api_key, api_secret) + repsonse = client.get_spot_price(currency_pair = 'BTC-USD') + price = (float(repsonse['amount'])) + price = round(price, 3) + return price + except KeyError as e: + print("Error: %s" % str(e)) + sys.stdout.flush() + price = 0 + return price + +def bitfinex(): + + try: + response = requests.request("GET", "https://api.bitfinex.com/v1/pubticker/btcusd") + response = json.loads(response.text) + + price = (float(response['low'])+ float(response['mid']) + float(response['high']))/3 + price = round(price, 3) + return price + except KeyError as e: + print("Error: %s" % str(e)) + sys.stdout.flush() + price = 0 + return price + +def gemini(): + ... # Exact code to bitfinex() + \end{lstlisting} + + The above code shows how this was implemented as a system for the price extraction from the APIs. + + These functions are called every hour by a master function which uses the averaged price from each exchange to average and creates a fair, unbiased hourly price, which is the saved to a CSV containing the live unbiased price for the hour along with the time of creation. The below code shows how this is implemented: + + \begin{lstlisting}[language=Python, caption=Creation of the unbiased hourly price] +def collector(priceCSV, fieldnames): + + now = datetime.now() + + coinbase_P = coinbase() + bitfinex_P = bitfinex() + gemini_P = gemini() + + if coinbase_P == 0 or bitfinex_P == 0 or gemini_P == 0: + if coinbase_P and bitfinex_P == 0: + averagePrice = gemini_P + return + elif coinbase_P and gemini_P == 0: + averagePrice = bitfinex_P + return + elif bitfinex_P and gemini_P == 0: + averagePrice = coinbase_P + return + averagePrice = (coinbase_P + bitfinex_P + gemini_P)/2 + else: + averagePrice = (coinbase_P + bitfinex_P + gemini_P)/3 + + averagePrice = round(averagePrice, 3) + \end{lstlisting} + + \subsubsection{Historical Tweet Collection} + Historical tweets were obtained directly from the Twitter API through a simple Curl command for the given date range of the past year. Multiple accounts were created to obtain the amount of data needed, as detailed in the data gathering section under the solution approach. Due to the vast amount need, 5 tweets averaged per hour for the past year would require 1.2 requests per day (40320 total to get a whole year's worth), totalling 9,050,000 tweets. As this was highly unfeasible with the API access available for this project, 1 tweet per hour (25 per day, 1 request per 4 days) was obtained rather than the average, which resulted in only ~92 requests needed to get the required data. + + \begin{lstlisting}[language=, caption=Sample Curl request - data saved to json and python scripted called to process data] +curl --request POST \ + --url https://api.twitter.com/1.1/tweets/search/fullarchive/boop.json \ + --header 'authorization: Bearer TOKEN' --header 'content-type: application/json' \ + --data '{"query": "bitcoin", "maxResults":100, "fromDate":"201904050000", "toDate":"201904050200"}' -o data_collector/twitter/temp_hist_tweets.json \ + && python3 data_collector/twitter/sift_text.py + \end{lstlisting} + + These tweets are processed through the spam filter to detect if they were included unwanted text, cleaned and a polarity classification assigned to each for each hour. The process of how both the spam classification, pre-processing of the data and polarity classifications work will be detailed in their relevant sections of the system below. + + \begin{lstlisting}[language=Python, caption=Sift-text python script - used alongside Curl command in Listing 4] +import tweet_collector ## pre-processing functions +import spam_filter ## spam filter classification +import analysis_engine.sentiment_analysis as sentiment_analysis +## Sentiment analysis and polarity classification (symbolic link to file) + +def processTweet(tweet, tweetFilter): + + now = datetime.datetime.now() + + #Data preprocessing + removedLines = tweet_collector.utilityFuncs().fixLines(tweet) + removedSpecialChars = tweet_collector.utilityFuncs().cleanTweet(removedLines) + removedSpacing = tweet_collector.utilityFuncs().removeSpacing(removedSpecialChars[0]) + tweetLength = tweet_collector.utilityFuncs().checkLength(removedSpacing) + + if tweetLength == True: + ## Drop tweet if too short + + ##Check if the tweet is predominantly English + checkIfEnglish = tweet_collector.utilityFuncs().detectLaguage(removedSpecialChars[0]) + + + if checkIfEnglish == True: + ## Remove non-English Characters + tweetText = tweet_collector.utilityFuncs().remove_non_ascii(removedSpacing) + print("Cleaned Tweet: ", tweetText) + sys.stdout.flush() + + cleanedTweet = tweetText+' '+removedSpecialChars[1] + + ## Check with spam filter - drop if classified as spam + classification = tweetFilter.testTweet(cleanedTweet) + + if classification == False: + ## Perform Sentiment Analysis + ovSentiment, compound = analyser.get_vader_sentiment(cleanedTweet) + + try: + ## Save to historical tweets file + with open('data_collector/historical_tweets.csv', mode='a') as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=['created_at', 'tweet', 'sentiment', 'compound']) + writer.writerow({'created_at': now.strftime("%Y-%m-%d %H:%M"), 'tweet': cleanedTweet, 'sentiment': ovSentiment, 'compound': compound}) + return True + except BaseException as exception: + print("Error: %s" % str(exception)) + sys.stdout.flush() + return False + else: + .... # other finished else statements with print statements + \end{lstlisting} + + As detailed in the comments for the code, this function conducts multiple methods on the data, all of which are predefined in other files. These are not redefined in this function to reduce code duplication throughout the system and hence are imported at the beginning of the file. Due to the nature of spam filtering tweets were inevitably removed; therefore a few hours were missing data. This resolved by making another request for that specific hour and averaging the sentiment for the given hour to fill missing data. + + \subsubsection{Live Tweet Collection} + Live tweets were obtained through the use of the Tweepy package to stream current tweets per hour from the Twitter API. Spam filter detection,, data pre-processing and language detection are also conducted on this data and are defined within this python script \textit{'tweet\_collector.py'}, these functions will be described in the relevant sections in Data processing section. + + When this script, \textit{'tweet\_collector.py'}, is ran it firstly initialises the CSV files for storing tweets and tweets that have been assigned polarities by the VADER. More importantly it initialises the spam filter and trains it based on the pre-labelled spam dataset. + \begin{lstlisting}[language=python, caption=Spam filter initialisation and training functions] +## In __main__ when script is first ran +... + + tweetFilter = filterSpam(training_set) + tweetFilter.trainFilter() + ## Initialise with loaded training_set and train + + prediction = tweetFilter.testData_Prediction() + # test classification model with test tweets + + tweetFilter.filterStatistics(prediction) + # Print metric accuracys for test data + + tweetFilter.testPrediction() + # Test classifier with hard specified tweets - to check if it correctly classifies + \end{lstlisting} + + Said functions relate to a function defined under the \textit{filterSpam} class which are used to create the training and test datasets. This function will be described in the Spam Filtering section below. + + The streaming of tweets are handled by the Tweepy package and is first initialised upon starting of the python script. The streaming method works by establishing a listener and authenticated with the Twitter API; it then listens on that connection for data. This streamer can also filter on language and a specified hashtag which is loaded from a \textit{'.env'} file also containing the API keys for authentication. + \newline + + + \begin{lstlisting}[language=python, caption=Tweepy Streamer setup] +...# in __main__ #Code ran first on script run + twitter_streamer = Streamer() + twitter_streamer.stream_tweets(tweets_file, temp_tweets, hashtag, tweetFilter, analyser) + +#======================================== + class Streamer(): + + def __init__(self): + pass + # Initialise stream object + + def stream_tweets(self, tweets_file, temp_tweets, hashtag, tweetFilter, analyser): + listener = Listener(tweets_file, temp_tweets, tweetFilter, analyser) + auth = OAuthHandler(keys().api_key, keys().api_secret) + # Load API keys from env file and set auth + + print("Console: ", "Authorising with twitter API") + sys.stdout.flush() + + auth.set_access_token(keys().access_token, keys().access_secret) + # Set access keys + + print("Console: ", "Streaming Tweets") + sys.stdout.flush() + + stream = Stream(auth, listener, tweet_mode='extended') + stream.filter(languages=["en"], track=hashtag) + ## Execute streamer and filter for only English region tweets and by specified hashtag ('Bitcoin') + \end{lstlisting} + + Once the listener and streamer are declared, and Tweepy begins listening all data is processed through the \textit{on\_data} method. In this function, the tweet is extracted from the response and performs data pre-processing, language detection, spam classification and sentiment analysis on the data. Additionally, there is an initial time interval that checks for a time limit - this is used to ensure that the script runs for just under an hour and restarts every hour. This allows the average of the gathered tweets' sentiment to be summed for that hour and then used for the network price predictions. + + The tweet text can be nested in multiple attributes in the response; this depends on a few factors of what the tweet is and how it was posted on Twitter. If a user retweeted the tweet, the text of the tweet would be nested under \textit{'retweeted\_status'} in the JSON response, also there is a check to see if the tweets are above the original twitter tweet character limit (140 characters). This is a possible legacy parameter in the Twitter API but is checked upon data response. If an attribute \textit{'extended\_tweet'} exists the character limit for the tweet exceeds 140 but is under the 280 characters hard limit of Twitter, this exact filtering is the same if it in a non-retweeted tweet. + \newline + + \begin{lstlisting}[language=python, caption=Tweepy Stream: 'on\_data' method] +import spam_filter +import analysis_engine.sentiment_analysis as sentiment_analysis +from tweepy import OAuthHandler +from tweepy import Stream +from tweepy.streaming import StreamListener +import csv +... + +def on_data(self, data): + ## Check time limit for under an hour - if limit reached kill script + if (time.time() - self.start_time) < self.limit: + + now = datetime.now() + timedelta(hours=1) + ## Sets current time, add 1 hour due to script finished before the completed hour is finished + + data = json.loads(data) + + # Tweet Extraction from response + try: + # Check if tweet is a retweet + if 'retweeted_status' in data: + if 'extended_tweet' in data['retweeted_status']: + #if tweet is over the 140 word limit + text = data['retweeted_status']['extended_tweet']['full_text'] + print("Uncleaned Tweet:", text) + sys.stdout.flush() + else: + text = data['retweeted_status']['text'] + print("Uncleaned Tweet:", text) + sys.stdout.flush() + else: + # Else if a normal Tweet + if 'extended_tweet' in data: + # If tweet is over 140 word limit + text = data['extended_tweet']['full_text'] + print("Uncleaned Tweet:", text) + sys.stdout.flush() + else: + # Else if not found in nested attributes look in top-level + text = data['text'] + print("Uncleaned Tweet: ", text) + sys.stdout.flush() + + # Data cleaning and pre-processing prior to polarity classification + removedLines = utilityFuncs().fixLines(text) + removedSpecialChars = utilityFuncs().cleanTweet(removedLines) + removedSpacing = utilityFuncs().removeSpacing(removedSpecialChars[0]) + + tweetLength = utilityFuncs().checkLength(removedSpacing) + + # Check if tweet is long enough to perform polarity classification on (> 5 words (checked through tokenisation)) + if tweetLength == True: + checkIfEnglish = utilityFuncs().detectLaguage(removedSpecialChars[0]) + # Check if the text in tweet is predominatly English, if not drop + if checkIfEnglish == True: + tweetText = utilityFuncs().remove_non_ascii(removedSpacing) + print("Cleaned Tweet: ", tweetText) + sys.stdout.flush() + + # re-combine emojis onto end of tweet (Due to VADER supporting emoticon sentiment assignment) + cleanedTweet = tweetText+' '+removedSpecialChars[1] + + ## Check if spam, drop if classified as such + classification = self.tweetFilter.testTweet(cleanedTweet) + + if classification == False: + ## Perform Sentiment Analysis using VADER + ovSentiment, compound = self.analyser.get_vader_sentiment(cleanedTweet) + + # Save date/hour, tweet text, highest sentiment score from Positive or Negative and compound score + try: + # temp file which is used at end of hour streaming to average sentiment for hour + with open(temp_tweets, mode='a') as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=temp_fieldnames) + writer.writerow({'created_at': now.strftime("%Y-%m-%d %H:%M:%S"), 'tweet': cleanedTweet, 'sentiment': ovSentiment, 'compound': compound}) + except BaseException as exception: + print("1 Error: %s" % str(exception)) + sys.stdout.flush() + + # Save date/hour, tweet text, highest sentiment score from Positive or Negative and compound score + try: + # tweet file for storing all collected tweets from every hour + with open(tweets_file, mode='a') as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=fieldnames_tweet) + writer.writerow({'created_at': now.strftime("%Y-%m-%d %H:%M:%S"), 'tweet': cleanedTweet, 'sentiment': ovSentiment, 'compound': compound}) + except BaseException as exception: + print("2 Error: %s" % str(exception)) + sys.stdout.flush() + else: + print("Console: ", "Tweet is spam. Not storing tweet in dataset") + sys.stdout.flush() + ... + ... # Closing Else statments with print statments for when the tweet doesn't meet criteria + ... + \end{lstlisting} + + As for key facts about this function; the tweets length is checked to be above 5 (tokenised) due to any tweets will less words do not contain enough information to be given a proper polarity classification and almost always returns as 100\% neutral, which is of no use and will have no affect on the hours average sentiment. Entire code in the function is encapsulated in a try catch to check if data was recieved and handles non-responses and missing data by simply ignore that there was no data, unless a connection between the streamer and API is broken it otherwise exits the script. + + \newpage \subsection{Data processing}\label{processing} \subsubsection{Preprocessing} + Various techniques and tools have been utilised throughout the development of the system to process the data appropriately so it can be parsed by VADER, spam filter and neural network. This section will cover the crucial functions that provide such functionalities and that are called throughout the system, as seen in some of the above code snippets. \paragraph{Tweet Filtering} + + \begin{lstlisting}[language=python, caption=Basic data filtering and processing function - defined in 'tweet\_collector.py'] +import re +import emoji as ji +... + +class utilityFuncs(): + + def cleanTweet(self, text): + # Function to clean tweets, removes links and special characters + return re.sub(r'([^0-9A-Za-z \-\%\£\$ \t])|(@[A-Za-z0-9]+)|(http\S+)', '', text), ' '.join(c for c in text if c in ji.UNICODE_EMOJI) + + def removeSpacing(self, text): + return re.sub(r'( +)', ' ', text) + + def fixLines(self, text): + return re.sub(r"([\r\n])", " ", text) + + def remove_non_ascii(self, text): + return ''.join(i for i in text if ord(i)<128) + \end{lstlisting} \paragraph{Text Cleaning} \paragraph{Ngram based Language detection filtering} \subsubsection{Spam Filtering} - \paragraph{Tweet Processing} + \begin{lstlisting}[language=python, caption=Spam filter training Class] + class filterSpam(object): + + def __init__(self, training_set): + self.training_set = training_set + ## initialises function and globalises training set for use in every function where needed + + def trainFilter(self): + self.dataset() ## Split dataset 75:25 + self.train() ## Train based on training dataset + + def dataset(self): + self.data = pd.read_csv(self.training_set) + + self.data['class'] = self.data['classes'].map({'ham': 0, 'spam': 1}) + # Remap labels of 'Spam' and 'Ham' to 1:0 respectively + + self.data.drop(['classes'], axis=1, inplace=True) + # Drop old labels + + self.trainIndex, self.testIndex = list(), list() + for i in range(self.data.shape[0]): + if np.random.uniform(0, 1) < 0.75: # Random shuffle data of 75% + self.trainIndex += [i] # Create training index + else: + self.testIndex += [i] # Create testing index + self.trainData = self.data.loc[self.trainIndex] + self.testData = self.data.loc[self.testIndex] + # Define datasets by getting values from first 75% and then 25% + + self.trainData.reset_index(inplace=True) + self.testData.reset_index(inplace=True) + # Reset indexes + + self.trainData.drop(['index'], axis=1, inplace=True) + self.testData.drop(['index'], axis=1, inplace=True) + # Drop old index + + def train(self): + self.spamFilter = spam_filter.classifier(self.trainData) + # Initialise the spam filter with the 75% dataset + + self.spamFilter.train() + # Train + + def testData_Prediction(self): + prediction = self.spamFilter.predict(self.testData['tweet']) + + return prediction + + def testPrediction(self): + + # Test Spam/Ham tweets - should return True and False respectivly + spam = spam_filter.processTweet("Earn more than 0015 btc free No deposit No investment Free Bitcoins - Earn $65 free btc in 5 minutes bitcoin freebtc getbtc") + + ham = spam_filter.processTweet("Bitcoin closed with some gains in month of February") + + hamTweet = self.spamFilter.classify(ham) + spamTweet = self.spamFilter.classify(spam) + + print("Console: ", "Spam Tweet -- ", spamTweet) + sys.stdout.flush() + print("Console: ", "Ham Tweet -- ", hamTweet) + sys.stdout.flush() + + def filterStatistics(self, prediction): + spam_filter.metrics(self.testData['class'], prediction) + + def testTweet(self, tweet): + + processed = spam_filter.processTweet(tweet) + classified = self.spamFilter.classify(processed) + + return classified + \end{lstlisting} + \paragraph{Naive Bayes model} \subsection{Sentiment Analysis} @@ -707,15 +1186,34 @@ Dropouts? \subsubsection{Scoring and Validation} Loss? - \subsubsection{Future Prediction Forecasting} - + \subsection{Future Prediction Forecasting} + + + \subsection{Main File 'Main.py'} + + \subsection{Miscellaneous} + + API keys for accessing the Twitter API - use by Tweepy to access said API, along with loading the defined hashtag filtering. This parameter allows for the streaming of the hashtag specified, as for this project it is set for the \textit{'\#Bitcoin'} and \textit{'\#bitcoin'} hashtags. + \begin{lstlisting}[language=python, caption=keys class - loads API keys for access] + if __name__ == '__main__': + + ... + hashtag = keys().currency_hashtags + hashtag = hashtag.split(', ') + \end{lstlisting} \newpage - \section{Testing: Verification and Reflection} + \section{Testing Metrics and Accuracy} mean bias Error \newpage + \section{Project Evaluation} + Reflection + + Quality + + \section{Discussion: Contribution and Reflection} \subsection{Limitations} diff --git a/document.toc b/document.toc index 34d6ac4..adea065 100644 --- a/document.toc +++ b/document.toc @@ -1,137 +1,147 @@ \boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax \defcounter {refsection}{0}\relax -\contentsline {section}{Abstract}{1}{section*.1} +\contentsline {section}{Abstract}{2}{section*.1} \defcounter {refsection}{0}\relax -\contentsline {section}{Acknowledgements}{2}{section*.2} +\contentsline {section}{Acknowledgements}{3}{section*.2} \defcounter {refsection}{0}\relax -\contentsline {section}{Glossary}{3}{section*.3} +\contentsline {section}{Glossary}{4}{section*.3} \defcounter {refsection}{0}\relax -\contentsline {section}{Introduction}{7}{section*.5} +\contentsline {section}{Introduction}{8}{section*.5} \defcounter {refsection}{0}\relax -\contentsline {section}{Problem Articulation}{9}{section*.6} +\contentsline {section}{Problem Articulation}{10}{section*.6} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Problem Statement}{9}{section*.7} +\contentsline {subsection}{Problem Statement}{10}{section*.7} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Stakeholders}{9}{section*.8} +\contentsline {subsection}{Stakeholders}{10}{section*.8} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Project Motivation}{10}{section*.9} +\contentsline {subsection}{Project Motivation}{11}{section*.9} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Technical Specification}{11}{section*.10} +\contentsline {subsection}{Technical Specification}{13}{section*.10} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Project Constraints}{12}{section*.11} +\contentsline {subsection}{Project Constraints}{15}{section*.11} \defcounter {refsection}{0}\relax -\contentsline {section}{Literature Review}{13}{section*.12} +\contentsline {section}{Quality Goals}{15}{section*.12} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Existing Tools}{13}{section*.13} +\contentsline {section}{Literature Review}{16}{section*.13} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Related research}{13}{section*.14} +\contentsline {subsection}{Existing Tools}{16}{section*.14} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data Collection}{14}{section*.15} +\contentsline {subsection}{Related research}{16}{section*.15} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Twitter and Twitter API}{14}{section*.16} +\contentsline {subsection}{Data Collection}{17}{section*.16} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Tweepy Python Package}{15}{section*.17} +\contentsline {subsubsection}{Twitter and Twitter API}{17}{section*.17} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Sentiment Analysis}{15}{section*.18} +\contentsline {subsubsection}{Tweepy Python Package}{18}{section*.18} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Natural Language Processing}{15}{section*.19} +\contentsline {subsection}{Sentiment Analysis}{19}{section*.19} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{16}{section*.20} +\contentsline {subsubsection}{Natural Language Processing}{19}{section*.20} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Neural Networks}{16}{section*.21} +\contentsline {subsubsection}{Valence Aware Dictionary and sEntiment Reasoning}{20}{section*.21} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{17}{section*.22} +\contentsline {subsection}{Neural Networks}{21}{section*.22} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{18}{section*.23} +\contentsline {subsubsection}{Recurrent Neural Network (RNN)}{22}{section*.23} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Keras and TensorFlow}{19}{section*.24} +\contentsline {subsubsection}{Long-Short Term Memory (LSTM)}{23}{section*.24} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Optimisers}{20}{section*.25} +\contentsline {subsubsection}{Keras and TensorFlow}{24}{section*.25} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Machine Learning}{21}{section*.26} +\contentsline {subsubsection}{Optimisers}{25}{section*.26} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Naive Bayes}{21}{section*.27} +\contentsline {subsection}{Machine Learning}{27}{section*.27} \defcounter {refsection}{0}\relax -\contentsline {section}{Solution Approach}{23}{section*.28} +\contentsline {subsubsection}{Naive Bayes}{27}{section*.28} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data gathering}{23}{section*.29} +\contentsline {subsection}{Random Forest}{28}{section*.29} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data pre-processing}{24}{section*.30} +\contentsline {section}{Solution Approach}{29}{section*.30} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Spam Filtering}{24}{section*.31} +\contentsline {subsection}{Data gathering}{29}{section*.31} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Language Detection}{24}{section*.32} +\contentsline {subsection}{Data pre-processing}{30}{section*.32} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Sentiment Analysis}{25}{section*.33} +\contentsline {subsection}{Spam Filtering}{30}{section*.33} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Neural Network}{25}{section*.34} +\contentsline {subsection}{Language Detection}{31}{section*.34} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Price Forecasting}{26}{section*.36} +\contentsline {subsection}{Sentiment Analysis}{31}{section*.35} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Frontend Application}{26}{section*.37} +\contentsline {subsection}{Neural Network}{32}{section*.36} \defcounter {refsection}{0}\relax -\contentsline {subsection}{With reference to Initial PID}{27}{section*.38} +\contentsline {subsection}{Price Forecasting}{34}{section*.38} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Solution Summary}{27}{section*.39} +\contentsline {subsection}{Frontend Application}{34}{section*.39} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data flow Overview}{27}{section*.40} +\contentsline {subsection}{With reference to Initial PID}{34}{section*.40} \defcounter {refsection}{0}\relax -\contentsline {section}{System Design}{29}{section*.41} +\contentsline {subsection}{Solution Summary}{35}{section*.41} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Dataflow Designs}{29}{section*.42} +\contentsline {subsection}{Data flow Overview}{36}{section*.42} \defcounter {refsection}{0}\relax -\contentsline {subsection}{UML Component Design}{35}{section*.43} +\contentsline {section}{System Design}{37}{section*.43} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Interface Design}{35}{section*.44} +\contentsline {subsection}{Dataflow Designs}{37}{section*.44} \defcounter {refsection}{0}\relax -\contentsline {section}{Implementation}{36}{section*.46} +\contentsline {subsection}{Interface Design}{44}{section*.45} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data collection}{36}{section*.47} +\contentsline {section}{Implementation}{45}{section*.47} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Price Time-series Data}{36}{section*.48} +\contentsline {subsection}{Data collection}{45}{section*.48} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Data processing}{36}{section*.49} +\contentsline {subsubsection}{Price Time-Series Historical Data}{45}{section*.49} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Preprocessing}{36}{section*.50} +\contentsline {subsubsection}{Price Time-Series Live Data}{46}{section*.50} \defcounter {refsection}{0}\relax -\contentsline {paragraph}{Tweet Filtering}{36}{section*.51} +\contentsline {subsubsection}{Historical Tweet Collection}{48}{section*.51} \defcounter {refsection}{0}\relax -\contentsline {paragraph}{Text Cleaning}{36}{section*.52} +\contentsline {subsubsection}{Live Tweet Collection}{50}{section*.52} \defcounter {refsection}{0}\relax -\contentsline {paragraph}{Ngram based Language detection filtering}{36}{section*.53} +\contentsline {subsection}{Data processing}{55}{section*.53} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Spam Filtering}{36}{section*.54} +\contentsline {subsubsection}{Preprocessing}{55}{section*.54} \defcounter {refsection}{0}\relax -\contentsline {paragraph}{Tweet Processing}{36}{section*.55} +\contentsline {paragraph}{Text Cleaning}{55}{section*.56} \defcounter {refsection}{0}\relax -\contentsline {paragraph}{Naive Bayes model}{36}{section*.56} +\contentsline {paragraph}{Ngram based Language detection filtering}{55}{section*.57} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Sentiment Analysis}{36}{section*.57} +\contentsline {subsubsection}{Spam Filtering}{55}{section*.58} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{VADER}{36}{section*.58} +\contentsline {paragraph}{Naive Bayes model}{57}{section*.59} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Recurrent Neural Network - LSTM}{36}{section*.59} +\contentsline {subsection}{Sentiment Analysis}{57}{section*.60} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Training and Testing Model}{36}{section*.60} +\contentsline {subsubsection}{VADER}{57}{section*.61} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Scoring and Validation}{36}{section*.61} +\contentsline {subsection}{Recurrent Neural Network - LSTM}{57}{section*.62} \defcounter {refsection}{0}\relax -\contentsline {subsubsection}{Future Prediction Forecasting}{36}{section*.62} +\contentsline {subsubsection}{Training and Testing Model}{57}{section*.63} \defcounter {refsection}{0}\relax -\contentsline {section}{Testing: Verification and Reflection}{37}{section*.63} +\contentsline {subsubsection}{Scoring and Validation}{57}{section*.64} \defcounter {refsection}{0}\relax -\contentsline {section}{Discussion: Contribution and Reflection}{38}{section*.64} +\contentsline {subsection}{Future Prediction Forecasting}{58}{section*.65} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Limitations}{38}{section*.65} +\contentsline {subsection}{Main File 'Main.py'}{58}{section*.66} \defcounter {refsection}{0}\relax -\contentsline {section}{Conclusion and Future Improvements}{39}{section*.66} +\contentsline {subsection}{Miscellaneous}{58}{section*.67} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Conclusion}{39}{section*.67} +\contentsline {section}{Testing Metrics and Accuracy}{59}{section*.68} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Future Improvements}{39}{section*.68} +\contentsline {section}{Project Evaluation}{60}{section*.69} \defcounter {refsection}{0}\relax -\contentsline {section}{Appendices}{43}{section*.70} +\contentsline {section}{Discussion: Contribution and Reflection}{60}{section*.70} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Appendix A - Project Initiation Document}{43}{section*.71} +\contentsline {subsection}{Limitations}{60}{section*.71} \defcounter {refsection}{0}\relax -\contentsline {subsection}{Appendix B - Log book}{56}{section*.72} +\contentsline {section}{Conclusion and Future Improvements}{61}{section*.72} +\defcounter {refsection}{0}\relax +\contentsline {subsection}{Conclusion}{61}{section*.73} +\defcounter {refsection}{0}\relax +\contentsline {subsection}{Future Improvements}{61}{section*.74} +\defcounter {refsection}{0}\relax +\contentsline {section}{Appendices}{66}{section*.76} +\defcounter {refsection}{0}\relax +\contentsline {subsection}{Appendix A - Project Initiation Document}{66}{section*.77} +\defcounter {refsection}{0}\relax +\contentsline {subsection}{Appendix B - Log book}{79}{section*.78} diff --git a/report.bib b/report.bib index 2a943d6..6b7ef7b 100644 --- a/report.bib +++ b/report.bib @@ -338,4 +338,14 @@ year={2017}, organization={Analytics India}, url={https://www.analyticsindiamag.com/tensorflow-vs-theano-researchers-prefer-artificial-intelligence-framework} +} + +@inproceedings{35, + title={}, + author={bitcoincharts}, + booktitle={}, + pages={}, + year={}, + organization={Bitcoin Charts}, + url={http://api.bitcoincharts.com/v1/csv/} } \ No newline at end of file