178 lines
11 KiB
TeX
178 lines
11 KiB
TeX
\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {1}Abstract}{2}{section.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {2}Acknowledgements}{3}{section.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {3}Glossary}{4}{section.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {4}Introduction}{10}{section.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {5}Problem Articulation}{12}{section.5}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {5.1}Problem Statement}{12}{subsection.5.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {5.2}Stakeholders}{12}{subsection.5.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {5.3}Project Motivation}{13}{subsection.5.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {5.4}Technical Specification}{15}{subsection.5.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {6}Quality Goals}{17}{section.6}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {6.1}Process Description}{17}{subsection.6.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {6.2}Quality Objectives}{17}{subsection.6.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {6.3}Tools to Ensure Quality}{18}{subsection.6.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {7}Literature Review}{19}{section.7}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.1}Existing Tools}{19}{subsection.7.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.2}Related research}{19}{subsection.7.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.3}Data Collection}{20}{subsection.7.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.3.1}Twitter and Twitter API}{20}{subsubsection.7.3.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.3.2}Tweepy Python Package}{21}{subsubsection.7.3.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.4}Sentiment Analysis}{22}{subsection.7.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.4.1}Natural Language Processing}{22}{subsubsection.7.4.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.4.2}Valence Aware Dictionary and sEntiment Reasoning}{23}{subsubsection.7.4.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.5}Neural Networks}{24}{subsection.7.5}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.1}Recurrent Neural Network (RNN)}{25}{subsubsection.7.5.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.2}Long-Short Term Memory (LSTM)}{26}{subsubsection.7.5.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.3}Keras and TensorFlow}{27}{subsubsection.7.5.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.4}Optimisers}{28}{subsubsection.7.5.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.5}Regularisation}{30}{subsubsection.7.5.5}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.5.6}Dropout}{30}{subsubsection.7.5.6}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.6}Machine Learning}{30}{subsection.7.6}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {7.6.1}Naive Bayes}{30}{subsubsection.7.6.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.7}Bag Of Words}{31}{subsection.7.7}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.8}TF-IDF}{32}{subsection.7.8}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.9}Addictive Smoothing}{32}{subsection.7.9}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {7.10}Regression Performance Metrics}{33}{subsection.7.10}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {8}Solution Approach}{34}{section.8}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.1}Data gathering}{34}{subsection.8.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.2}Data pre-processing}{35}{subsection.8.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.3}Spam Filtering}{35}{subsection.8.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.4}Language Detection}{36}{subsection.8.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.5}Sentiment Analysis}{36}{subsection.8.5}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.6}Neural Network}{37}{subsection.8.6}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.7}Price Forecasting}{39}{subsection.8.7}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.8}Frontend Application}{39}{subsection.8.8}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.9}With reference to Initial PID}{39}{subsection.8.9}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.10}Solution Summary}{40}{subsection.8.10}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {8.11}Initial Data flow Overview}{41}{subsection.8.11}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {9}System Design}{42}{section.9}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {9.1}Dataflow Designs}{42}{subsection.9.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {9.2}Interface Design}{49}{subsection.9.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {10}Implementation}{51}{section.10}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.1}Data collection}{51}{subsection.10.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.1.1}Price Time-Series Historical Data}{51}{subsubsection.10.1.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.1.2}Price Time-Series Live Data}{52}{subsubsection.10.1.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.1.3}Historical Tweet Collection}{53}{subsubsection.10.1.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.1.4}Live Tweet Collection}{55}{subsubsection.10.1.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.2}Data pre-processing}{57}{subsection.10.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.2.1}Tweet Filtering}{57}{subsubsection.10.2.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.2.2}Language detection filtering}{58}{subsubsection.10.2.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.2.3}Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}{60}{subsubsection.10.2.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.3}Spam Filtering}{61}{subsection.10.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.3.1}Naive Bayes model}{64}{subsubsection.10.3.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.3.2}Classification}{65}{subsubsection.10.3.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.3.3}Predict}{66}{subsubsection.10.3.3}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.3.4}Metrics}{66}{subsubsection.10.3.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.4}Sentiment Analysis}{67}{subsection.10.4}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.5}Recurrent Neural Network - LSTM}{68}{subsection.10.5}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.5.1}Dataset Creation}{68}{subsubsection.10.5.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.5.2}Training and Testing Model}{70}{subsubsection.10.5.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.6}Future Prediction Forecasting}{72}{subsection.10.6}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {10.7}User Interface}{74}{subsection.10.7}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.7.1}Key Functions}{74}{subsubsection.10.7.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {10.7.2}Final Interface}{77}{subsubsection.10.7.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {11}Testing Metrics and Accuracy}{79}{section.11}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {11.1}Integration Testing}{79}{subsection.11.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {11.2}Accuracy of Model \& Results}{80}{subsection.11.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {11.2.1}Results Discussion}{80}{subsubsection.11.2.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsubsection}{\numberline {11.2.2}Execution Speeds}{83}{subsubsection.11.2.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {12}Discussion: Contribution and Reflection}{84}{section.12}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {12.1}Limitations}{85}{subsection.12.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {12.2}Reflection}{86}{subsection.12.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {13}Social, Legal and Ethical Issues}{88}{section.13}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {14}Conclusion and Future Improvements}{89}{section.14}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {14.1}Conclusion}{89}{subsection.14.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {14.2}Future Improvements}{90}{subsection.14.2}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {section}{\numberline {15}Appendices}{98}{section.15}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {15.1}Appendix A - Project Initiation Document}{98}{subsection.15.1}
|
|
\defcounter {refsection}{0}\relax
|
|
\contentsline {subsection}{\numberline {15.2}Appendix B - Log book}{111}{subsection.15.2}
|