27/04
This commit is contained in:
parent
2474b24d52
commit
09158b99f6
90
document.aux
90
document.aux
@ -208,65 +208,61 @@
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Historical price collection and averaging per exchange}{46}{lstlisting.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.1.2}Price Time-Series Live Data}{47}{subsubsection.10.1.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Extraction of Price from exchanges}{47}{lstlisting.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Creation of the unbiased hourly price}{48}{lstlisting.3}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.1.3}Historical Tweet Collection}{49}{subsubsection.10.1.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}Sample Curl request - data saved to json and python scripted called to process data}{49}{lstlisting.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Sift-text python script - used alongside Curl command in Listing 4}{49}{lstlisting.5}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.1.4}Live Tweet Collection}{51}{subsubsection.10.1.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Spam filter initialisation and training functions}{51}{lstlisting.6}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}Tweepy Streamer setup}{52}{lstlisting.7}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}Tweepy Stream: 'on\_data' method}{53}{lstlisting.8}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.2}Data pre-processing}{56}{subsection.10.2}}
|
||||
\newlabel{processing}{{10.2}{56}{Data pre-processing}{subsection.10.2}{}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.1}Tweet Filtering}{56}{subsubsection.10.2.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}Basic data filtering and processing function - defined in 'tweet\_collector.py'}{56}{lstlisting.9}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.2}Language detection filtering}{57}{subsubsection.10.2.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {10}Language detection and filter function \cite {38}}{57}{lstlisting.10}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.1.3}Historical Tweet Collection}{48}{subsubsection.10.1.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {3}Sample Curl request - data saved to json and python scripted called to process data}{48}{lstlisting.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {4}Sift-text python script - used alongside Curl command in Listing 4}{48}{lstlisting.4}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.1.4}Live Tweet Collection}{50}{subsubsection.10.1.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {5}Tweepy Streamer setup}{50}{lstlisting.5}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.2}Data pre-processing}{52}{subsection.10.2}}
|
||||
\newlabel{processing}{{10.2}{52}{Data pre-processing}{subsection.10.2}{}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.1}Tweet Filtering}{52}{subsubsection.10.2.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {6}Basic data filtering and processing function - defined in 'tweet\_collector.py'}{52}{lstlisting.6}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.2}Language detection filtering}{53}{subsubsection.10.2.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {7}Language detection and filter function \cite {38}}{53}{lstlisting.7}}
|
||||
\abx@aux@cite{38}
|
||||
\abx@aux@segm{0}{0}{38}
|
||||
\abx@aux@segm{0}{0}{38}
|
||||
\abx@aux@cite{39}
|
||||
\abx@aux@segm{0}{0}{39}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.3}Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}{58}{subsubsection.10.2.3}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.2.3}Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}{55}{subsubsection.10.2.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {8}pre-processing of data prior to being used by the spam filter}{55}{lstlisting.8}}
|
||||
\abx@aux@cite{40}
|
||||
\abx@aux@segm{0}{0}{40}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {11}pre-processing of data prior to being used by the spam filter}{59}{lstlisting.11}}
|
||||
\abx@aux@segm{0}{0}{40}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.3}Spam Filtering}{61}{subsection.10.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {12}Spam filter training Class - \textit {tweet\_collector.py}}{61}{lstlisting.12}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.3}Spam Filtering}{56}{subsection.10.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {9}Spam filter training Class - \textit {tweet\_collector.py}}{56}{lstlisting.9}}
|
||||
\abx@aux@cite{41}
|
||||
\abx@aux@segm{0}{0}{41}
|
||||
\abx@aux@segm{0}{0}{34}
|
||||
\abx@aux@cite{42}
|
||||
\abx@aux@segm{0}{0}{42}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.1}Naive Bayes model}{63}{subsubsection.10.3.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {13}classifer class of spam\_filter.py}{64}{lstlisting.13}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.2}Classification}{65}{subsubsection.10.3.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {14}Classify Function of Parent classifier class of spam\_filter.py}{66}{lstlisting.14}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.3}Predict}{66}{subsubsection.10.3.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {15}Predict function of parent classifier class of spam\_filter.py}{66}{lstlisting.15}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.4}Metrics}{66}{subsubsection.10.3.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {16}Metrics function for calculating the performance and accuracy of the model}{67}{lstlisting.16}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.4}Sentiment Analysis}{68}{subsection.10.4}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.5}Recurrent Neural Network - LSTM}{69}{subsection.10.5}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {17}LSTM packages}{69}{lstlisting.17}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.5.1}Script Execution}{70}{subsubsection.10.5.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {18}Start of execution of the LSTM script}{70}{lstlisting.18}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.5.2}Dataset Creation}{70}{subsubsection.10.5.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {19}Dataset creation and preprocessing}{70}{lstlisting.19}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.5.3}Training and Testing Model}{72}{subsubsection.10.5.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {20}LSTM model creation\tmspace +\thinmuskip {.1667em} layering\tmspace +\thinmuskip {.1667em} compiling and fitting}{72}{lstlisting.20}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.6}Future Prediction Forecasting}{73}{subsection.10.6}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.7}User Interface}{75}{subsection.10.7}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {11}Testing Metrics and Accuracy}{76}{section.11}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {12}Project Evaluation}{77}{section.12}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {13}Discussion: Contribution and Reflection}{77}{section.13}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {13.1}Limitations}{77}{subsection.13.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {14}Conclusion and Future Improvements}{78}{section.14}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {14.1}Conclusion}{78}{subsection.14.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {14.2}Future Improvements}{78}{subsection.14.2}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {15}Appendices}{83}{section.15}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {15.1}Appendix A - Project Initiation Document}{83}{subsection.15.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.1}Naive Bayes model}{59}{subsubsection.10.3.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {10}classifer class of spam\_filter.py}{59}{lstlisting.10}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.2}Classification}{60}{subsubsection.10.3.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {11}Classify Function of Parent classifier class of spam\_filter.py}{60}{lstlisting.11}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.3}Predict}{61}{subsubsection.10.3.3}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {12}Predict function of parent classifier class of spam\_filter.py}{61}{lstlisting.12}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.3.4}Metrics}{61}{subsubsection.10.3.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {13}Metrics function for calculating the performance and accuracy of the model}{61}{lstlisting.13}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.4}Sentiment Analysis}{62}{subsection.10.4}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {14}VADER polarity classification}{62}{lstlisting.14}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.5}Recurrent Neural Network - LSTM}{63}{subsection.10.5}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.5.1}Dataset Creation}{63}{subsubsection.10.5.1}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {15}Dataset creation and preprocessing}{64}{lstlisting.15}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {10.5.2}Training and Testing Model}{65}{subsubsection.10.5.2}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {16}LSTM model creation\tmspace +\thinmuskip {.1667em} layering\tmspace +\thinmuskip {.1667em} compiling and fitting}{65}{lstlisting.16}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.6}Future Prediction Forecasting}{67}{subsection.10.6}}
|
||||
\@writefile{lol}{\defcounter {refsection}{0}\relax }\@writefile{lol}{\contentsline {lstlisting}{\numberline {17}Forecasting future price of next hour for Bitcoin}{67}{lstlisting.17}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {10.7}User Interface}{69}{subsection.10.7}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {11}Testing Metrics and Accuracy}{70}{section.11}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {12}Project Evaluation}{71}{section.12}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {13}Discussion: Contribution and Reflection}{71}{section.13}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {13.1}Limitations}{71}{subsection.13.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {14}Conclusion and Future Improvements}{72}{section.14}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {14.1}Conclusion}{72}{subsection.14.1}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {14.2}Future Improvements}{72}{subsection.14.2}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {15}Appendices}{77}{section.15}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {15.1}Appendix A - Project Initiation Document}{77}{subsection.15.1}}
|
||||
\abx@aux@refcontextdefaultsdone
|
||||
\abx@aux@defaultrefcontext{0}{1}{none/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{2}{none/global//global/global}
|
||||
@ -310,4 +306,4 @@
|
||||
\abx@aux@defaultrefcontext{0}{40}{none/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{41}{none/global//global/global}
|
||||
\abx@aux@defaultrefcontext{0}{42}{none/global//global/global}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {15.2}Appendix B - Log book}{96}{subsection.15.2}}
|
||||
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {15.2}Appendix B - Log book}{90}{subsection.15.2}}
|
||||
|
||||
42
document.blg
42
document.blg
@ -1,23 +1,23 @@
|
||||
[0] Config.pm:302> INFO - This is Biber 2.9
|
||||
[0] Config.pm:305> INFO - Logfile is 'document.blg'
|
||||
[18] biber:313> INFO - === Fri Apr 26, 2019, 19:25:29
|
||||
[33] Biber.pm:371> INFO - Reading 'document.bcf'
|
||||
[80] Biber.pm:854> INFO - Using all citekeys in bib section 0
|
||||
[89] Biber.pm:3981> INFO - Processing section 0
|
||||
[96] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0
|
||||
[98] bibtex.pm:1468> INFO - LaTeX decoding ...
|
||||
[132] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib'
|
||||
[144] Utils.pm:169> WARN - year field 'Aug 2, 2017' in entry '42' is not an integer - this will probably not sort properly.
|
||||
[157] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly.
|
||||
[179] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name
|
||||
[195] Utils.pm:169> WARN - year field 'Aug 2, 2017' in entry '34' is not an integer - this will probably not sort properly.
|
||||
[236] Utils.pm:169> WARN - year field 'Feb 26, 2018' in entry '40' is not an integer - this will probably not sort properly.
|
||||
[259] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing)
|
||||
[259] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing)
|
||||
[280] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[280] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[280] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US'
|
||||
[280] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US'
|
||||
[301] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii'
|
||||
[315] bbl.pm:720> INFO - Output to document.bbl
|
||||
[315] Biber.pm:110> INFO - WARNINGS: 7
|
||||
[21] biber:313> INFO - === Fri Apr 26, 2019, 21:24:32
|
||||
[37] Biber.pm:371> INFO - Reading 'document.bcf'
|
||||
[89] Biber.pm:854> INFO - Using all citekeys in bib section 0
|
||||
[100] Biber.pm:3981> INFO - Processing section 0
|
||||
[108] Biber.pm:4154> INFO - Looking for bibtex format file 'report.bib' for section 0
|
||||
[109] bibtex.pm:1468> INFO - LaTeX decoding ...
|
||||
[148] bibtex.pm:1294> INFO - Found BibTeX data source 'report.bib'
|
||||
[181] Utils.pm:169> WARN - year field 'Aug 2, 2017' in entry '34' is not an integer - this will probably not sort properly.
|
||||
[219] Utils.pm:169> WARN - Name "Mairal, J., Ponce, J., Sapiro, G., Zisserman, A." has too many commas: skipping name
|
||||
[226] Utils.pm:169> WARN - year field 'Feb 26, 2018' in entry '40' is not an integer - this will probably not sort properly.
|
||||
[234] Utils.pm:169> WARN - year field 'Aug 2, 2017' in entry '42' is not an integer - this will probably not sort properly.
|
||||
[236] Utils.pm:169> WARN - year field 'Mar 13, 2016' in entry '23' is not an integer - this will probably not sort properly.
|
||||
[291] Utils.pm:169> WARN - BibTeX subsystem: warning: comma(s) at end of name (removing)
|
||||
[291] Utils.pm:169> WARN - BibTeX subsystem: author, warning: comma(s) at end of name (removing)
|
||||
[315] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
|
||||
[315] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
|
||||
[315] Biber.pm:3809> INFO - Sorting list 'none/global//global/global' of type 'entry' with template 'none' and locale 'en-US'
|
||||
[315] Biber.pm:3815> INFO - No sort tailoring available for locale 'en-US'
|
||||
[338] bbl.pm:617> INFO - Writing 'document.bbl' with encoding 'ascii'
|
||||
[353] bbl.pm:720> INFO - Output to document.bbl
|
||||
[353] Biber.pm:110> INFO - WARNINGS: 7
|
||||
|
||||
503
document.log
503
document.log
@ -1,4 +1,4 @@
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 26 APR 2019 20:55
|
||||
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=pdflatex 2018.10.16) 27 APR 2019 14:55
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
%&-line parsing enabled.
|
||||
@ -972,7 +972,7 @@ LaTeX Info: Redefining \nameref on input line 91.
|
||||
\@outlinefile=\write5
|
||||
\openout5 = `document.out'.
|
||||
|
||||
<images/reading_logo.png, id=321, 504.88625pt x 183.68625pt>
|
||||
<images/reading_logo.png, id=309, 504.88625pt x 183.68625pt>
|
||||
File: images/reading_logo.png Graphic file (type png)
|
||||
<use images/reading_logo.png>
|
||||
Package pdftex.def Info: images/reading_logo.png used on input line 94.
|
||||
@ -1063,19 +1063,19 @@ Missing character: There is no
|
||||
Missing character: There is no € in font cmr12!
|
||||
Missing character: There is no ™ in font cmr12!
|
||||
[20]
|
||||
<images/perceptron.png, id=539, 706.64pt x 392.46625pt>
|
||||
<images/perceptron.png, id=524, 706.64pt x 392.46625pt>
|
||||
File: images/perceptron.png Graphic file (type png)
|
||||
<use images/perceptron.png>
|
||||
Package pdftex.def Info: images/perceptron.png used on input line 338.
|
||||
(pdftex.def) Requested size: 284.52713pt x 170.72142pt.
|
||||
[21 <./images/perceptron.png>]
|
||||
<images/rnn_ffn.png, id=553, 844.15375pt x 342.27875pt>
|
||||
<images/rnn_ffn.png, id=538, 844.15375pt x 342.27875pt>
|
||||
File: images/rnn_ffn.png Graphic file (type png)
|
||||
<use images/rnn_ffn.png>
|
||||
Package pdftex.def Info: images/rnn_ffn.png used on input line 358.
|
||||
(pdftex.def) Requested size: 426.80307pt x 170.72112pt.
|
||||
[22 <./images/rnn_ffn.png>]
|
||||
<images/lstm.png, id=566, 1160.335pt x 1029.8475pt>
|
||||
<images/lstm.png, id=551, 1160.335pt x 1029.8475pt>
|
||||
File: images/lstm.png Graphic file (type png)
|
||||
<use images/lstm.png>
|
||||
Package pdftex.def Info: images/lstm.png used on input line 376.
|
||||
@ -1137,167 +1137,156 @@ Missing character: There is no
|
||||
Missing character: There is no € in font cmr12!
|
||||
Missing character: There is no ™ in font cmr12!
|
||||
[34] [35] [36]
|
||||
<images/Generic_Flow.png, id=687, 1900.09875pt x 529.98pt>
|
||||
<images/Generic_Flow.png, id=672, 1900.09875pt x 529.98pt>
|
||||
File: images/Generic_Flow.png Graphic file (type png)
|
||||
<use images/Generic_Flow.png>
|
||||
Package pdftex.def Info: images/Generic_Flow.png used on input line 637.
|
||||
Package pdftex.def Info: images/Generic_Flow.png used on input line 639.
|
||||
(pdftex.def) Requested size: 512.13474pt x 227.62009pt.
|
||||
|
||||
Overfull \hbox (71.28728pt too wide) in paragraph at lines 637--638
|
||||
Overfull \hbox (71.28728pt too wide) in paragraph at lines 639--640
|
||||
[][]
|
||||
[]
|
||||
|
||||
[37 <./images/Generic_Flow.png (PNG copy)>]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 648--650
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 650--652
|
||||
|
||||
[]
|
||||
|
||||
<images/Dataflow.png, id=693, 3767.07375pt x 1526.70375pt>
|
||||
<images/Dataflow.png, id=678, 3767.07375pt x 1526.70375pt>
|
||||
File: images/Dataflow.png Graphic file (type png)
|
||||
<use images/Dataflow.png>
|
||||
Package pdftex.def Info: images/Dataflow.png used on input line 653.
|
||||
Package pdftex.def Info: images/Dataflow.png used on input line 655.
|
||||
(pdftex.def) Requested size: 512.09683pt x 227.62125pt.
|
||||
|
||||
Overfull \hbox (71.24937pt too wide) in paragraph at lines 653--655
|
||||
Overfull \hbox (71.24937pt too wide) in paragraph at lines 655--657
|
||||
[]
|
||||
[]
|
||||
|
||||
[38 <./images/Dataflow.png (PNG copy)>]
|
||||
<images/Data_Collector.png, id=698, 1152.305pt x 647.41875pt>
|
||||
<images/Data_Collector.png, id=683, 1152.305pt x 647.41875pt>
|
||||
File: images/Data_Collector.png Graphic file (type png)
|
||||
<use images/Data_Collector.png>
|
||||
Package pdftex.def Info: images/Data_Collector.png used on input line 661.
|
||||
Package pdftex.def Info: images/Data_Collector.png used on input line 663.
|
||||
(pdftex.def) Requested size: 426.78574pt x 227.61746pt.
|
||||
[39 <./images/Data_Collector.png (PNG copy)>]
|
||||
<images/Analysis_Engine.png, id=703, 1658.195pt x 719.68875pt>
|
||||
<images/Analysis_Engine.png, id=688, 1658.195pt x 719.68875pt>
|
||||
File: images/Analysis_Engine.png Graphic file (type png)
|
||||
<use images/Analysis_Engine.png>
|
||||
Package pdftex.def Info: images/Analysis_Engine.png used on input line 676.
|
||||
Package pdftex.def Info: images/Analysis_Engine.png used on input line 678.
|
||||
(pdftex.def) Requested size: 483.67276pt x 227.62561pt.
|
||||
|
||||
Overfull \hbox (42.8253pt too wide) in paragraph at lines 676--678
|
||||
Overfull \hbox (42.8253pt too wide) in paragraph at lines 678--680
|
||||
[]
|
||||
[]
|
||||
|
||||
[40 <./images/Analysis_Engine.png (PNG copy)>] [41]
|
||||
<images/Neural_Network.png, id=713, 1502.61375pt x 1032.85875pt>
|
||||
<images/Neural_Network.png, id=698, 1502.61375pt x 1032.85875pt>
|
||||
File: images/Neural_Network.png Graphic file (type png)
|
||||
<use images/Neural_Network.png>
|
||||
Package pdftex.def Info: images/Neural_Network.png used on input line 694.
|
||||
Package pdftex.def Info: images/Neural_Network.png used on input line 696.
|
||||
(pdftex.def) Requested size: 483.6893pt x 341.42757pt.
|
||||
|
||||
Overfull \hbox (42.84184pt too wide) in paragraph at lines 694--696
|
||||
Overfull \hbox (42.84184pt too wide) in paragraph at lines 696--698
|
||||
[]
|
||||
[]
|
||||
|
||||
[42 <./images/Neural_Network.png (PNG copy)>]
|
||||
<images/Future_Predictions.png, id=718, 1596.96625pt x 490.83376pt>
|
||||
<images/Future_Predictions.png, id=703, 1596.96625pt x 490.83376pt>
|
||||
File: images/Future_Predictions.png Graphic file (type png)
|
||||
<use images/Future_Predictions.png>
|
||||
Package pdftex.def Info: images/Future_Predictions.png used on input line 708.
|
||||
Package pdftex.def Info: images/Future_Predictions.png used on input line 710.
|
||||
|
||||
(pdftex.def) Requested size: 512.1362pt x 227.62119pt.
|
||||
|
||||
Overfull \hbox (71.28874pt too wide) in paragraph at lines 708--710
|
||||
Overfull \hbox (71.28874pt too wide) in paragraph at lines 710--712
|
||||
[]
|
||||
[]
|
||||
|
||||
[43 <./images/Future_Predictions.png (PNG copy)>]
|
||||
<images/Frontend_Application.png, id=724, 804.00375pt x 599.23875pt>
|
||||
<images/Frontend_Application.png, id=709, 804.00375pt x 599.23875pt>
|
||||
File: images/Frontend_Application.png Graphic file (type png)
|
||||
<use images/Frontend_Application.png>
|
||||
Package pdftex.def Info: images/Frontend_Application.png used on input line 72
|
||||
1.
|
||||
3.
|
||||
(pdftex.def) Requested size: 284.52162pt x 256.07664pt.
|
||||
[44 <./images/Frontend_Application.png (PNG copy)>]
|
||||
<images/interface_design.png, id=729, 1086.0575pt x 1536.74126pt>
|
||||
<images/interface_design.png, id=714, 1086.0575pt x 1536.74126pt>
|
||||
File: images/interface_design.png Graphic file (type png)
|
||||
<use images/interface_design.png>
|
||||
Package pdftex.def Info: images/interface_design.png used on input line 740.
|
||||
Package pdftex.def Info: images/interface_design.png used on input line 742.
|
||||
(pdftex.def) Requested size: 227.61479pt x 369.88063pt.
|
||||
[45 <./images/interface_design.png>]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 751--753
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 753--755
|
||||
|
||||
[]
|
||||
|
||||
(/usr/share/texlive/texmf-dist/tex/latex/listings/lstlang1.sty
|
||||
File: lstlang1.sty 2015/06/04 1.6 listings language file
|
||||
) [46] [47]
|
||||
[48]
|
||||
LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available
|
||||
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 877.
|
||||
[49] [50]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 964--966
|
||||
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 833.
|
||||
|
||||
[48] [49]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 903--905
|
||||
|
||||
[]
|
||||
|
||||
[51]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 1001--1003
|
||||
[50] [51]
|
||||
Missing character: There is no  in font cmr10!
|
||||
Missing character: There is no £ in font cmr10!
|
||||
LaTeX Font Info: Try loading font information for OML+cmr on input line 958.
|
||||
|
||||
[]
|
||||
|
||||
[52]
|
||||
LaTeX Font Info: Try loading font information for OML+cmr on input line 1014
|
||||
.
|
||||
(/usr/share/texlive/texmf-dist/tex/latex/base/omlcmr.fd
|
||||
File: omlcmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
|
||||
)
|
||||
LaTeX Font Info: Font shape `OML/cmr/m/n' in size <10> not available
|
||||
(Font) Font shape `OML/cmm/m/it' tried instead on input line 1014.
|
||||
|
||||
[53] [54]
|
||||
[55]
|
||||
Missing character: There is no  in font cmr10!
|
||||
Missing character: There is no £ in font cmr10!
|
||||
[56] [57] [58] [59] [60] [61] [62] [63] [64] [65] [66] [67] [68] [69]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 1583--1585
|
||||
|
||||
[]
|
||||
|
||||
[70] [71] [72] [73] [74] [75] [76] [77] [78]
|
||||
Overfull \hbox (5.27716pt too wide) in paragraph at lines 1805--1805
|
||||
(Font) Font shape `OML/cmm/m/it' tried instead on input line 958.
|
||||
[52]
|
||||
[53] [54] [55] [56] [57] [58] [59] [60] [61] [62] [63] [64] [65] [66] [67]
|
||||
[68] [69] [70] [71] [72]
|
||||
Overfull \hbox (5.27716pt too wide) in paragraph at lines 1509--1509
|
||||
\OT1/cmr/m/it/12 ence on sig-nal pro-cess-ing, com-mu-ni-ca-tion, power and em-
|
||||
bed-ded sys-tem (SCOPES)\OT1/cmr/m/n/12 ,
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (42.7786pt too wide) in paragraph at lines 1805--1805
|
||||
Overfull \hbox (42.7786pt too wide) in paragraph at lines 1509--1509
|
||||
\OT1/cmr/m/n/12 works,'' To-wards Data Sci-ence, 2018. [On-line]. Avail-able: [
|
||||
]$\OT1/cmtt/m/n/12 https : / / towardsdatascience .
|
||||
[]
|
||||
|
||||
[79]
|
||||
Overfull \hbox (86.07425pt too wide) in paragraph at lines 1805--1805
|
||||
[73]
|
||||
Overfull \hbox (86.07425pt too wide) in paragraph at lines 1509--1509
|
||||
\OT1/cmr/m/n/12 works,'' Ma-chine Larn-ing Mas-tery, 2017. [On-line]. Avail-abl
|
||||
e: []$\OT1/cmtt/m/n/12 https : / / machinelearningmastery .
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (30.84552pt too wide) in paragraph at lines 1805--1805
|
||||
Overfull \hbox (30.84552pt too wide) in paragraph at lines 1509--1509
|
||||
\OT1/cmr/m/n/12 lem,'' Su-per Data Sci-ence, 2018. [On-line]. Avail-able: []$\O
|
||||
T1/cmtt/m/n/12 https : / / www . superdatascience .
|
||||
[]
|
||||
|
||||
[80]
|
||||
Overfull \hbox (9.16136pt too wide) in paragraph at lines 1805--1805
|
||||
[74]
|
||||
Overfull \hbox (9.16136pt too wide) in paragraph at lines 1509--1509
|
||||
\OT1/cmr/m/n/12 2019. [On-line]. Avail-able: []$\OT1/cmtt/m/n/12 https : / / me
|
||||
dium . com / datadriveninvestor / overview -[]
|
||||
[]
|
||||
|
||||
[81]
|
||||
Overfull \hbox (2.93918pt too wide) in paragraph at lines 1805--1805
|
||||
[75]
|
||||
Overfull \hbox (2.93918pt too wide) in paragraph at lines 1509--1509
|
||||
[]\OT1/cmr/m/n/12 P. Cryp-tog-ra-phy, ``A tu-to-rial on au-to-matic lan-guage i
|
||||
den-ti-fi-ca-tion - ngram based,''
|
||||
[]
|
||||
|
||||
[82]
|
||||
[76]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1849, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1513, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf>
|
||||
Package pdftex.def Info: PID.pdf used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf used on input line 1514.
|
||||
(pdftex.def) Requested size: 597.551pt x 845.07512pt.
|
||||
|
||||
|
||||
@ -1305,7 +1294,7 @@ pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf>
|
||||
Package pdftex.def Info: PID.pdf used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf used on input line 1514.
|
||||
(pdftex.def) Requested size: 597.551pt x 845.07512pt.
|
||||
|
||||
|
||||
@ -1315,269 +1304,255 @@ rsion <1.7>, but at most version <1.5> allowed
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1852, page=1, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1516, page=1, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 1>
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1514.
|
||||
(pdftex.def) Requested size: 597.551pt x 845.07512pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 1>
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[83]
|
||||
[77]
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 1>
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 1>
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 1>
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page1 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[84 <./PID.pdf>]
|
||||
[78 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1882, page=2, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1547, page=2, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 2>
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 2>
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 2>
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1810.
|
||||
Package pdftex.def Info: PID.pdf , page2 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[79 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1554, page=3, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[80 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1568, page=4, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[81 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1574, page=5, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[82 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1580, page=6, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[83 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1586, page=7, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1514.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[84 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1592, page=8, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[85 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1889, page=3, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1602, page=9, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 3>
|
||||
Package pdftex.def Info: PID.pdf , page3 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[86 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1903, page=4, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1612, page=10, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 4>
|
||||
Package pdftex.def Info: PID.pdf , page4 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[87 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1909, page=5, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1624, page=11, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 5>
|
||||
Package pdftex.def Info: PID.pdf , page5 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[88 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1915, page=6, 597.55246pt x 845.07718pt>
|
||||
<PID.pdf, id=1630, page=12, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 6>
|
||||
Package pdftex.def Info: PID.pdf , page6 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1514.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[89 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1921, page=7, 597.55246pt x 845.07718pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 7>
|
||||
Package pdftex.def Info: PID.pdf , page7 used on input line 1810.
|
||||
(pdftex.def) Requested size: 562.1644pt x 795.0303pt.
|
||||
[90 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1927, page=8, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 8>
|
||||
Package pdftex.def Info: PID.pdf , page8 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[91 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1937, page=9, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 9>
|
||||
Package pdftex.def Info: PID.pdf , page9 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[92 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1947, page=10, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 10>
|
||||
Package pdftex.def Info: PID.pdf , page10 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[93 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1959, page=11, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 11>
|
||||
Package pdftex.def Info: PID.pdf , page11 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[94 <./PID.pdf>]
|
||||
|
||||
pdfTeX warning: /usr/bin/pdflatex (file ./PID.pdf): PDF inclusion: found PDF ve
|
||||
rsion <1.7>, but at most version <1.5> allowed
|
||||
<PID.pdf, id=1966, page=12, 845.07718pt x 597.55246pt>
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
File: PID.pdf Graphic file (type pdf)
|
||||
<use PID.pdf, page 12>
|
||||
Package pdftex.def Info: PID.pdf , page12 used on input line 1810.
|
||||
(pdftex.def) Requested size: 795.0303pt x 562.1644pt.
|
||||
[95 <./PID.pdf>]
|
||||
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1814.
|
||||
[96]
|
||||
Package atveryend Info: Empty hook `AfterLastShipout' on input line 1814.
|
||||
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1519.
|
||||
[90]
|
||||
Package atveryend Info: Empty hook `AfterLastShipout' on input line 1519.
|
||||
(./document.aux)
|
||||
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1814.
|
||||
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1814.
|
||||
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1519.
|
||||
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1519.
|
||||
|
||||
|
||||
|
||||
Package rerunfilecheck Warning: File `document.out' has changed.
|
||||
(rerunfilecheck) Rerun to get outlines right
|
||||
(rerunfilecheck) or use package `bookmark'.
|
||||
|
||||
Package rerunfilecheck Info: Checksums for `document.out':
|
||||
(rerunfilecheck) Before: CDC279F7387D171F7AA2EB28D466CD78;12655
|
||||
(rerunfilecheck) After: CD60AF62A8BBB902B6DAD8AA7E67EBB5;12369.
|
||||
Package rerunfilecheck Info: File `document.out' has not changed.
|
||||
(rerunfilecheck) Checksum: 59F160EAB469F5A1258FC6296E21E0C8;12216.
|
||||
Package logreq Info: Writing requests to 'document.run.xml'.
|
||||
\openout1 = `document.run.xml'.
|
||||
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
25256 strings out of 492982
|
||||
396294 string characters out of 6134895
|
||||
1047110 words of memory out of 5000000
|
||||
27406 multiletter control sequences out of 15000+600000
|
||||
13923 words of font info for 54 fonts, out of 8000000 for 9000
|
||||
24875 strings out of 492982
|
||||
390821 string characters out of 6134895
|
||||
1019110 words of memory out of 5000000
|
||||
27326 multiletter control sequences out of 15000+600000
|
||||
13564 words of font info for 53 fonts, out of 8000000 for 9000
|
||||
1141 hyphenation exceptions out of 8191
|
||||
45i,18n,78p,2008b,1820s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||
pdfTeX warning (dest): name{subsection.10.9} has been referenced but does not
|
||||
exist, replaced by a fixed one
|
||||
|
||||
pdfTeX warning (dest): name{subsection.10.8} has been referenced but does not e
|
||||
xist, replaced by a fixed one
|
||||
|
||||
</usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></usr/
|
||||
share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbxti10.pfb></usr/shar
|
||||
e/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/share/texl
|
||||
ive/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/texlive/te
|
||||
xmf-dist/fonts/type1/public/amsfonts/cm/cmmi12.pfb></usr/share/texlive/texmf-di
|
||||
st/fonts/type1/public/amsfonts/cm/cmmi8.pfb></usr/share/texlive/texmf-dist/font
|
||||
s/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texlive/texmf-dist/fonts/type1
|
||||
/public/amsfonts/cm/cmr12.pfb></usr/share/texlive/texmf-dist/fonts/type1/public
|
||||
/amsfonts/cm/cmr17.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfon
|
||||
ts/cm/cmr6.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cm
|
||||
r8.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb
|
||||
></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb></usr/
|
||||
share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb></usr/share/t
|
||||
exlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti12.pfb></usr/share/texlive
|
||||
/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt12.pfb>
|
||||
Output written on document.pdf (96 pages, 1570495 bytes).
|
||||
45i,18n,78p,2008b,1818s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||
</usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb></us
|
||||
r/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></usr/shar
|
||||
e/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share/texl
|
||||
ive/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi12.pfb></usr/share/texlive/te
|
||||
xmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb></usr/share/texlive/texmf-dis
|
||||
t/fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texlive/texmf-dist/fonts
|
||||
/type1/public/amsfonts/cm/cmr12.pfb></usr/share/texlive/texmf-dist/fonts/type1/
|
||||
public/amsfonts/cm/cmr17.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/
|
||||
amsfonts/cm/cmr6.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts
|
||||
/cm/cmr8.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy
|
||||
10.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb>
|
||||
</usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb></usr/s
|
||||
hare/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti12.pfb></usr/share/t
|
||||
exlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt12.pfb>
|
||||
Output written on document.pdf (90 pages, 1526653 bytes).
|
||||
PDF statistics:
|
||||
2249 PDF objects out of 2487 (max. 8388607)
|
||||
2076 compressed objects within 21 object streams
|
||||
1065 named destinations out of 1200 (max. 500000)
|
||||
768 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
1851 PDF objects out of 2073 (max. 8388607)
|
||||
1689 compressed objects within 17 object streams
|
||||
765 named destinations out of 1000 (max. 500000)
|
||||
744 words of extra memory for PDF output out of 10000 (max. 10000000)
|
||||
|
||||
|
||||
29
document.out
29
document.out
@ -60,18 +60,17 @@
|
||||
\BOOKMARK [3][-]{subsubsection.10.3.4}{\376\377\000M\000e\000t\000r\000i\000c\000s}{subsection.10.3}% 60
|
||||
\BOOKMARK [2][-]{subsection.10.4}{\376\377\000S\000e\000n\000t\000i\000m\000e\000n\000t\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.10}% 61
|
||||
\BOOKMARK [2][-]{subsection.10.5}{\376\377\000R\000e\000c\000u\000r\000r\000e\000n\000t\000\040\000N\000e\000u\000r\000a\000l\000\040\000N\000e\000t\000w\000o\000r\000k\000\040\000-\000\040\000L\000S\000T\000M}{section.10}% 62
|
||||
\BOOKMARK [3][-]{subsubsection.10.5.1}{\376\377\000S\000c\000r\000i\000p\000t\000\040\000E\000x\000e\000c\000u\000t\000i\000o\000n}{subsection.10.5}% 63
|
||||
\BOOKMARK [3][-]{subsubsection.10.5.2}{\376\377\000D\000a\000t\000a\000s\000e\000t\000\040\000C\000r\000e\000a\000t\000i\000o\000n}{subsection.10.5}% 64
|
||||
\BOOKMARK [3][-]{subsubsection.10.5.3}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{subsection.10.5}% 65
|
||||
\BOOKMARK [2][-]{subsection.10.6}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section.10}% 66
|
||||
\BOOKMARK [2][-]{subsection.10.7}{\376\377\000U\000s\000e\000r\000\040\000I\000n\000t\000e\000r\000f\000a\000c\000e}{section.10}% 67
|
||||
\BOOKMARK [1][-]{section.11}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000e\000t\000r\000i\000c\000s\000\040\000a\000n\000d\000\040\000A\000c\000c\000u\000r\000a\000c\000y}{}% 68
|
||||
\BOOKMARK [1][-]{section.12}{\376\377\000P\000r\000o\000j\000e\000c\000t\000\040\000E\000v\000a\000l\000u\000a\000t\000i\000o\000n}{}% 69
|
||||
\BOOKMARK [1][-]{section.13}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 70
|
||||
\BOOKMARK [2][-]{subsection.13.1}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section.13}% 71
|
||||
\BOOKMARK [1][-]{section.14}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 72
|
||||
\BOOKMARK [2][-]{subsection.14.1}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section.14}% 73
|
||||
\BOOKMARK [2][-]{subsection.14.2}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section.14}% 74
|
||||
\BOOKMARK [1][-]{section.15}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 75
|
||||
\BOOKMARK [2][-]{subsection.15.1}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section.15}% 76
|
||||
\BOOKMARK [2][-]{subsection.15.2}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section.15}% 77
|
||||
\BOOKMARK [3][-]{subsubsection.10.5.1}{\376\377\000D\000a\000t\000a\000s\000e\000t\000\040\000C\000r\000e\000a\000t\000i\000o\000n}{subsection.10.5}% 63
|
||||
\BOOKMARK [3][-]{subsubsection.10.5.2}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000o\000d\000e\000l}{subsection.10.5}% 64
|
||||
\BOOKMARK [2][-]{subsection.10.6}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n\000\040\000F\000o\000r\000e\000c\000a\000s\000t\000i\000n\000g}{section.10}% 65
|
||||
\BOOKMARK [2][-]{subsection.10.7}{\376\377\000U\000s\000e\000r\000\040\000I\000n\000t\000e\000r\000f\000a\000c\000e}{section.10}% 66
|
||||
\BOOKMARK [1][-]{section.11}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000M\000e\000t\000r\000i\000c\000s\000\040\000a\000n\000d\000\040\000A\000c\000c\000u\000r\000a\000c\000y}{}% 67
|
||||
\BOOKMARK [1][-]{section.12}{\376\377\000P\000r\000o\000j\000e\000c\000t\000\040\000E\000v\000a\000l\000u\000a\000t\000i\000o\000n}{}% 68
|
||||
\BOOKMARK [1][-]{section.13}{\376\377\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n\000:\000\040\000C\000o\000n\000t\000r\000i\000b\000u\000t\000i\000o\000n\000\040\000a\000n\000d\000\040\000R\000e\000f\000l\000e\000c\000t\000i\000o\000n}{}% 69
|
||||
\BOOKMARK [2][-]{subsection.13.1}{\376\377\000L\000i\000m\000i\000t\000a\000t\000i\000o\000n\000s}{section.13}% 70
|
||||
\BOOKMARK [1][-]{section.14}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{}% 71
|
||||
\BOOKMARK [2][-]{subsection.14.1}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n}{section.14}% 72
|
||||
\BOOKMARK [2][-]{subsection.14.2}{\376\377\000F\000u\000t\000u\000r\000e\000\040\000I\000m\000p\000r\000o\000v\000e\000m\000e\000n\000t\000s}{section.14}% 73
|
||||
\BOOKMARK [1][-]{section.15}{\376\377\000A\000p\000p\000e\000n\000d\000i\000c\000e\000s}{}% 74
|
||||
\BOOKMARK [2][-]{subsection.15.1}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000A\000\040\000-\000\040\000P\000r\000o\000j\000e\000c\000t\000\040\000I\000n\000i\000t\000i\000a\000t\000i\000o\000n\000\040\000D\000o\000c\000u\000m\000e\000n\000t}{section.15}% 75
|
||||
\BOOKMARK [2][-]{subsection.15.2}{\376\377\000A\000p\000p\000e\000n\000d\000i\000x\000\040\000B\000\040\000-\000\040\000L\000o\000g\000\040\000b\000o\000o\000k}{section.15}% 76
|
||||
|
||||
BIN
document.pdf
BIN
document.pdf
Binary file not shown.
Binary file not shown.
437
document.tex
437
document.tex
@ -604,6 +604,8 @@
|
||||
|
||||
Due to the continued support and development of TensorFlow, the board community and support of a high-level wrapper - Keras, this library will be used for this project. Although, Pytorch is a good alternative it is not as easy to use as implement when compared to TensorFlow using Keras.
|
||||
|
||||
The Adam optimiser will be used for the neural network. This is due to that it accomplishes what both RMSProp and Adagrad set out for solving issues with gradient descent, but builds upon these by also using the average of the second moments of the gradients (uncentred variance).
|
||||
|
||||
\subsection{Price Forecasting}
|
||||
This part of the system will be responsible for prediction the next time-step of Bitcoin's price for the next hour based on past data. It will use the trained model from the neural network to predict the future hour price when given live hourly data, price and sentiment. The system will also have a look back of 5 which will allow it to see historical data to aid in the predictions. This will occur on the hour every hour when new data is received and processed, this data will also be merged and the split into training and testing data. The sizing can be decided upon system creation, but the standard sizing for training is 75:25, training and testing respectively.
|
||||
|
||||
@ -748,12 +750,12 @@
|
||||
\begin{center}
|
||||
\section{Implementation}\label{implementation}
|
||||
\end{center}
|
||||
This section will outline the method and process of development of this system to satisfy the chosen solution, technical specification and the problem statement. Each section of the system will be outlined and discussed with relevant codes snippets of essential methods from the system to highlight the processing of data throughout.
|
||||
This section will outline the method and process of development of the system to satisfy the chosen solution, technical specification and the problem statement. Each section of the system will be outlined and discussed with relevant codes snippets of essential methods from the system to highlight the processing of data throughout. Some sections will reference code snippets in \textit{Appendix B}, due to the size of some crucial methods.
|
||||
\newline
|
||||
|
||||
\subsection{Data collection}\label{collection}
|
||||
\subsubsection{Price Time-Series Historical Data}
|
||||
Historical price data were extracted from a CSV historical price tracker, \textit{Bitcoin Charts} \cite{37}. This tracker provided the historical data from the three exchanges used for Live price collection - Coinbase, Bitfinex and Gemini, since the exchanges supported the cryptocurrency. The data used spans from \textit{2018-01-06} to \textit{2019-01-06}.
|
||||
Historical price data was extracted from a CSV historical price tracker, \textit{Bitcoin Charts} \cite{37}. This tracker provided the historical data from the three exchanges used for Live price collection - Coinbase, Bitfinex and Gemini, since the exchanges supported the cryptocurrency. The data used spans from \textit{2018-01-06} to \textit{2019-01-06}.
|
||||
|
||||
\begin{lstlisting}[language=Python, caption=Historical price collection and averaging per exchange]
|
||||
...
|
||||
@ -783,32 +785,11 @@ data = data.round(3)
|
||||
\subsubsection{Price Time-Series Live Data}
|
||||
Live price data, as described in the solution approach, were extracted every hour from three exchanges - Coinbase, Bitfinex and Gemini were chosen for providing this data due to being the most popular exchange platforms that provide an API for retrieving live price data.
|
||||
|
||||
Key packages used:
|
||||
\begin{lstlisting}[language=Python, caption=]
|
||||
import requests
|
||||
|
||||
from coinbase.wallet.client import Client
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
env_path = Path('.')/'data_collector/prices/config/coinbase.env'
|
||||
load_dotenv(dotenv_path=env_path)
|
||||
\end{lstlisting}
|
||||
|
||||
\textbf{\textit{Requests}} was used to make the API endpoint calls to obtain the response that contained the three prices for the hour needed.
|
||||
|
||||
The \textbf{\textit{Coinbase}} package was mandatory for establishing a connection with the Coinbase API, and regardless this exchange was still used as it is regarded as the most popular exchange to the general public with one of the highest flow of traffic through the site to purchase cryptocurrencies.
|
||||
|
||||
Both the \textbf{\textit{dotenv}} and \textbf{\textit{pathlib}} packages were used to extract the API keys - access and secret keys, from the relevant \textit{'.env'} file used alongside the Coinbase package for connection to the Coinbase API.
|
||||
|
||||
The \textit{'high'}, \textit{'mid'} and \textit{low} prices were extracted from the endpoint response and averaged to provide an overall hourly price per exchange.
|
||||
|
||||
\begin{lstlisting}[language=Python, caption=Extraction of Price from exchanges]
|
||||
def coinbase():
|
||||
|
||||
api_key = keys().api_key
|
||||
api_secret = keys().api_secret
|
||||
|
||||
...
|
||||
try:
|
||||
client = Client(api_key, api_secret)
|
||||
repsonse = client.get_spot_price(currency_pair = 'BTC-USD')
|
||||
@ -842,33 +823,8 @@ def gemini():
|
||||
|
||||
The above code shows how this was implemented as a system for the price extraction from the APIs.
|
||||
|
||||
These functions are called every hour by a master function which uses the averaged price from each exchange to average and creates a fair, unbiased hourly price, which is the saved to a CSV containing the live unbiased price for the hour along with the time of creation. The below code shows how this is implemented:
|
||||
These functions are called every hour by a master function which uses the averaged price from each exchange to average and creates a fair, unbiased hourly price, which is the saved to a CSV containing the live unbiased price for the hour along with the time of creation. The function also checks if an error state is returned from any of the exchange functions and sets the default price to zero, instead of averaging the three exchanges only the responses that successfully returned a price are averaged.
|
||||
|
||||
\begin{lstlisting}[language=Python, caption=Creation of the unbiased hourly price]
|
||||
def collector(priceCSV, fieldnames):
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
coinbase_P = coinbase()
|
||||
bitfinex_P = bitfinex()
|
||||
gemini_P = gemini()
|
||||
|
||||
if coinbase_P == 0 or bitfinex_P == 0 or gemini_P == 0:
|
||||
if coinbase_P and bitfinex_P == 0:
|
||||
averagePrice = gemini_P
|
||||
return
|
||||
elif coinbase_P and gemini_P == 0:
|
||||
averagePrice = bitfinex_P
|
||||
return
|
||||
elif bitfinex_P and gemini_P == 0:
|
||||
averagePrice = coinbase_P
|
||||
return
|
||||
averagePrice = (coinbase_P + bitfinex_P + gemini_P)/2
|
||||
else:
|
||||
averagePrice = (coinbase_P + bitfinex_P + gemini_P)/3
|
||||
|
||||
averagePrice = round(averagePrice, 3)
|
||||
\end{lstlisting}
|
||||
|
||||
\subsubsection{Historical Tweet Collection}
|
||||
Historical tweets were obtained directly from the Twitter API through a simple Curl command for the given date range of the past year. Multiple accounts were created to obtain the amount of data needed, as detailed in the data gathering section under the solution approach. Due to the vast amount need, 5 tweets averaged per hour for the past year would require 1.2 requests per day (40320 total to get a whole year's worth), totalling 9,050,000 tweets. As this was highly unfeasible with the API access available for this project, 1 tweet per hour (25 per day, 1 request per 4 days) was obtained rather than the average, which resulted in only ~92 requests needed to get the required data.
|
||||
@ -935,48 +891,21 @@ def processTweet(tweet, tweetFilter):
|
||||
.... # other finished else statements with print statements
|
||||
\end{lstlisting}
|
||||
|
||||
As detailed in the comments for the code, this function conducts multiple methods on the data, all of which are predefined in other files. These are not redefined in this function to reduce code duplication throughout the system and hence are imported at the beginning of the file. Due to the nature of spam filtering tweets were inevitably removed; therefore a few hours were missing data. This resolved by making another request for that specific hour and averaging the sentiment for the given hour to fill missing data.
|
||||
As detailed in the comments for the code, this function conducts external functions and data manipulation on the data, most of which are predefined in the \textit{tweet\_collector.py} script. These are not redefined in this function to reduce code duplication throughout the system and hence are imported at the beginning of the file. Due to the nature of spam filtering tweets were inevitably removed; therefore a few hours of data were missing. This was resolved by making another request for that specific hour and averaging the sentiment for the given hour to fill missing data.
|
||||
|
||||
\subsubsection{Live Tweet Collection}
|
||||
Live tweets were obtained through the use of the Tweepy package to stream current tweets per hour from the Twitter API. Spam filter detection,, data pre-processing and language detection are also conducted on this data and are defined within this python script \textit{'tweet\_collector.py'}, these functions will be described in the relevant sections in Data processing section.
|
||||
Live tweets were obtained through the use of the Tweepy package to stream current tweets per hour from the Twitter API. Spam filter detection, data pre-processing and language detection are also conducted on this data and are defined within this python script \textit{'tweet\_collector.py'}, these functions are described in the relevant sections in the Data processing section.
|
||||
|
||||
When this script, \textit{'tweet\_collector.py'}, is ran it firstly initialises the CSV files for storing tweets and tweets that have been assigned polarities by the VADER. More importantly it initialises the spam filter and trains it based on the pre-labelled spam dataset.
|
||||
\begin{lstlisting}[language=python, caption=Spam filter initialisation and training functions]
|
||||
## In __main__ when script is first ran
|
||||
...
|
||||
On the initial running of the \textit{'tweet\_collector.py'} script the CSV files for storing tweets and tweets are initialised, which will contain the polarities assigned by the VADER analyser. More importantly, it initialises the spam filter and trains it based on the pre-labelled spam dataset.
|
||||
|
||||
tweetFilter = filterSpam(training_set)
|
||||
tweetFilter.trainFilter()
|
||||
## Initialise with loaded training_set and train
|
||||
|
||||
prediction = tweetFilter.testData_Prediction()
|
||||
# test classification model with test tweets
|
||||
|
||||
tweetFilter.filterStatistics(prediction)
|
||||
# Print metric accuracys for test data
|
||||
|
||||
tweetFilter.testPrediction()
|
||||
# Test classifier with hard specified tweets - to check if it correctly classifies
|
||||
\end{lstlisting}
|
||||
|
||||
Said functions relate to a function defined under the \textit{filterSpam} class which are used to create the training and test datasets. This function will be described in the Spam Filtering section below.
|
||||
Functions used for training relate to relevant functions defined under the \textit{filterSpam} class which are used to create the training and test datasets. This function is described in the Spam Filtering section below.
|
||||
|
||||
The streaming of tweets are handled by the Tweepy package and is first initialised upon starting of the python script. The streaming method works by establishing a listener and authenticated with the Twitter API; it then listens on that connection for data. This streamer can also filter on language and a specified hashtag which is loaded from a \textit{'.env'} file also containing the API keys for authentication.
|
||||
\newline
|
||||
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Tweepy Streamer setup]
|
||||
...# in __main__ #Code ran first on script run
|
||||
twitter_streamer = Streamer()
|
||||
twitter_streamer.stream_tweets(tweets_file, temp_tweets, hashtag, tweetFilter, analyser)
|
||||
|
||||
#========================================
|
||||
class Streamer():
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
# Initialise stream object
|
||||
|
||||
def stream_tweets(self, tweets_file, temp_tweets, hashtag, tweetFilter, analyser):
|
||||
listener = Listener(tweets_file, temp_tweets, tweetFilter, analyser)
|
||||
auth = OAuthHandler(keys().api_key, keys().api_secret)
|
||||
@ -999,103 +928,6 @@ def processTweet(tweet, tweetFilter):
|
||||
Once the listener and streamer are declared, and Tweepy begins listening all data is processed through the \textit{on\_data} method. In this function, the tweet is extracted from the response and performs data pre-processing, language detection, spam classification and sentiment analysis on the data. Additionally, there is an initial time interval that checks for a time limit - this is used to ensure that the script runs for just under an hour and restarts every hour. This allows the average of the gathered tweets' sentiment to be summed for that hour and then used for the network price predictions.
|
||||
|
||||
The tweet text can be nested in multiple attributes in the response; this depends on a few factors of what the tweet is and how it was posted on Twitter. If a user retweeted the tweet, the text of the tweet would be nested under \textit{'retweeted\_status'} in the JSON response, also there is a check to see if the tweets are above the original twitter tweet character limit (140 characters). This is a possible legacy parameter in the Twitter API but is checked upon data response. If an attribute \textit{'extended\_tweet'} exists the character limit for the tweet exceeds 140 but is under the 280 characters hard limit of Twitter, this exact filtering is the same if it in a non-retweeted tweet.
|
||||
\newline
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Tweepy Stream: 'on\_data' method]
|
||||
import spam_filter
|
||||
import analysis_engine.sentiment_analysis as sentiment_analysis
|
||||
from tweepy import OAuthHandler
|
||||
from tweepy import Stream
|
||||
from tweepy.streaming import StreamListener
|
||||
import csv
|
||||
...
|
||||
def on_data(self, data):
|
||||
## Check time limit for under an hour - if limit reached kill script
|
||||
if (time.time() - self.start_time) < self.limit:
|
||||
|
||||
now = datetime.now() + timedelta(hours=1)
|
||||
## Sets current time, add 1 hour due to script finished before the completed hour is finished
|
||||
|
||||
data = json.loads(data)
|
||||
|
||||
# Tweet Extraction from response
|
||||
try:
|
||||
# Check if tweet is a retweet
|
||||
if 'retweeted_status' in data:
|
||||
if 'extended_tweet' in data['retweeted_status']:
|
||||
#if tweet is over the 140 word limit
|
||||
text = data['retweeted_status']['extended_tweet']['full_text']
|
||||
print("Uncleaned Tweet:", text)
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
text = data['retweeted_status']['text']
|
||||
print("Uncleaned Tweet:", text)
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
# Else if a normal Tweet
|
||||
if 'extended_tweet' in data:
|
||||
# If tweet is over 140 word limit
|
||||
text = data['extended_tweet']['full_text']
|
||||
print("Uncleaned Tweet:", text)
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
# Else if not found in nested attributes look in top-level
|
||||
text = data['text']
|
||||
print("Uncleaned Tweet: ", text)
|
||||
sys.stdout.flush()
|
||||
|
||||
# Data cleaning and pre-processing prior to polarity classification
|
||||
removedLines = utilityFuncs().fixLines(text)
|
||||
removedSpecialChars = utilityFuncs().cleanTweet(removedLines)
|
||||
removedSpacing = utilityFuncs().removeSpacing(removedSpecialChars[0])
|
||||
|
||||
tweetLength = utilityFuncs().checkLength(removedSpacing)
|
||||
|
||||
# Check if tweet is long enough to perform polarity classification on (> 5 words (checked through tokenisation))
|
||||
if tweetLength == True:
|
||||
checkIfEnglish = utilityFuncs().detectLaguage(removedSpecialChars[0])
|
||||
# Check if the text in tweet is predominantly English, if not drop
|
||||
if checkIfEnglish == True:
|
||||
tweetText = utilityFuncs().remove_non_ascii(removedSpacing)
|
||||
print("Cleaned Tweet: ", tweetText)
|
||||
sys.stdout.flush()
|
||||
|
||||
# re-combine emojis onto end of tweet (Due to VADER supporting emoticon sentiment assignment)
|
||||
cleanedTweet = tweetText+' '+removedSpecialChars[1]
|
||||
|
||||
## Check if spam, drop if classified as such
|
||||
classification = self.tweetFilter.testTweet(cleanedTweet)
|
||||
|
||||
if classification == False:
|
||||
## Perform Sentiment Analysis using VADER
|
||||
ovSentiment, compound = self.analyser.get_vader_sentiment(cleanedTweet)
|
||||
|
||||
# Save date/hour, tweet text, highest sentiment score from Positive or Negative and compound score
|
||||
try:
|
||||
# temp file which is used at end of hour streaming to average sentiment for hour
|
||||
with open(temp_tweets, mode='a') as csv_file:
|
||||
writer = csv.DictWriter(csv_file, fieldnames=temp_fieldnames)
|
||||
writer.writerow({'created_at': now.strftime("%Y-%m-%d %H:%M:%S"), 'tweet': cleanedTweet, 'sentiment': ovSentiment, 'compound': compound})
|
||||
except BaseException as exception:
|
||||
print("1 Error: %s" % str(exception))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Save date/hour, tweet text, highest sentiment score from Positive or Negative and compound score
|
||||
try:
|
||||
# tweet file for storing all collected tweets from every hour
|
||||
with open(tweets_file, mode='a') as csv_file:
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames_tweet)
|
||||
writer.writerow({'created_at': now.strftime("%Y-%m-%d %H:%M:%S"), 'tweet': cleanedTweet, 'sentiment': ovSentiment, 'compound': compound})
|
||||
except BaseException as exception:
|
||||
print("2 Error: %s" % str(exception))
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
print("Console: ", "Tweet is spam. Not storing tweet in dataset")
|
||||
sys.stdout.flush()
|
||||
...
|
||||
... # Closing Else statments with print statments for when the tweet doesn't meet criteria
|
||||
...
|
||||
\end{lstlisting}
|
||||
|
||||
As for the key facts about this function; the length of the tweet is checked to be above 5 (tokenised) due to any tweets with fewer words will not contain enough information to be given a proper polarity classification and almost always returns as 100\% neutral, which is of no use and will have no effect on the hours average sentiment. The entire code in the function is encapsulated in a try-catch to check if data was received and handles non-responses and missing data. If there was no data the issue is ignored unless a connection between the streamer and API is broken it otherwise exits the script.
|
||||
|
||||
@ -1109,12 +941,6 @@ def on_data(self, data):
|
||||
Various 'Utility Functions' have been used to initially filter out unwanted data from tweet text. These functions called by, both live tweet (\textit{tweet\_collector.py}) and historical tweet (\textit{sift\_text.py}) processing, prior any polarity classification or storing of tweet data to CSV files.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Basic data filtering and processing function - defined in 'tweet\_collector.py']
|
||||
import re
|
||||
import emoji as ji
|
||||
## Key packages used
|
||||
...
|
||||
class utilityFuncs():
|
||||
|
||||
def cleanTweet(self, text):
|
||||
# Function to clean tweets, removes links and special characters
|
||||
return re.sub(r'([^0-9A-Za-z \-\%\£\$ \t])|(@[A-Za-z0-9]+)|(http\S+)', '', text), ' '.join(c for c in text if c in ji.UNICODE_EMOJI)
|
||||
@ -1141,21 +967,15 @@ class utilityFuncs():
|
||||
\end{lstlisting}
|
||||
|
||||
Due to VADER being a lexicon-based sentiment analyser little data pre-processing needs conducting on the tweet text. The functions above primarily remove unnecessary text from the tweet that will either provide no insight into public opinion or can obstruct a proper classification of the sentiment - such as the existence of URLs in the given text. Additionally, the 'clean\_tweet' function removes the emojis in the given text if any are presently using the emoji package - which in turn is another lexicon that compares the given text to any emoticon contained within the lexicon. These are removed at this stage but are later re-added back to the text as VADER support emoticon classification.
|
||||
The last function in 'utility functions', 'checkLength' splits the text up into individual words (tokens - a process of tokenisation), this is used to check the total length of a tweet. If the tweet is less than five words it is dropped from classification, this is due to
|
||||
The last function in 'utility functions', 'checkLength' splits the text up into individual words (tokens - a process of tokenisation), this is used to check the total length of a tweet. If the tweet is less than five words, it is dropped from classification. This is due to text containing less than five words are less likely to produce a meaningful polarity classification than texts above that word limit. Additionally, any meaningful information is unlikely to be forced into five words.
|
||||
|
||||
\subsubsection{Language detection filtering}
|
||||
|
||||
This feature of the system is used as an additional filter for filtering out non-English tweets. As discussed in the solution approach, Tweepy/Twitter API provides a means to filter out non-English based tweets, this, however, doesn't work if the user has settings on Twitter set to be English as a prefered language and the region 'en'. Due to this non-English characters can still be within collected tweets; thus these are detected and filtered with the below function.
|
||||
The language detection feature of the system is used as an additional filter for filtering out non-English tweets. As discussed in the solution approach, Tweepy/Twitter API provides a means to filter out non-English based tweets. This, however, will not work if the user has settings on Twitter such as the prefered language and the region set to be English. Due to this, non-English characters can still be contained within the collected tweets; thus these are detected and filtered with the language detection function.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Language detection and filter function \cite{38}]
|
||||
def detectLaguage(self, text):
|
||||
"""
|
||||
Calculate the probability of given text is written in several languages
|
||||
Using nltk stopwords and comparing to all supported languages
|
||||
|
||||
There are other ways to identify this - TextBlob.detect_language and Ngrams
|
||||
"""
|
||||
language_ratios = {}
|
||||
...
|
||||
|
||||
# Split words up into tokens - tokenisation
|
||||
tokens = wordpunct_tokenize(text)
|
||||
@ -1186,20 +1006,19 @@ def detectLaguage(self, text):
|
||||
# If text is not predominately English drop tweet
|
||||
\end{lstlisting}
|
||||
|
||||
This function uses several natural languages pre-processing techniques to identify the most predominant language for a given text. It accomplishes this by first tokenising the text into tokens and converting them to lower case - this is so that the stopwords can be identified. For each of the languages supported by the Natural Language Toolkit Python package, the stopwords are identified in the text and compared to the stopwords in the language corpus' in NLTK. The ratios for the individual languages are formed, and then the predominant language identified. If the language is not predominantly English, the tweet is dropped. There is however an issue with this approach, if a tweet contains too many special characters - characters that are allowed, the tweet occasionally is not classified as English even when it predominantly is upon visual inspection; therefore the tweet is dropped and not processed. This isn't a significant issue as about 3000 tweets can be collected in an hour, and some of these would be filtered out by the spam filter regardless.
|
||||
The language detection function uses several natural language pre-processing techniques to identify the most predominant language for the given text. This is accomplished by first tokenising the text into tokens and converting them to lower case - this is so that the stopwords can be identified. For each of the languages supported by the Natural Language Toolkit Python package, the stopwords are identified in the text and compared to the stopwords in the language corpus' in NLTK. The ratios for the individual languages are formed, and then the predominant language identified. If the language is not predominantly English, the tweet is dropped.
|
||||
|
||||
Additionally, an n-grams method could be used to distinguish the language of a given text and may perform more accurately than the word-based approach that was implemented \cite{39}. This could be a later improvement as the word-based approach is sufficient and requires a corpus for each language to compare against to be presented. Therefore it could be used as a comparison between approaches and seen as a possible improvement.
|
||||
There is however an issue with this approach, if a tweet contains too many special characters - characters that are allowed, the tweet occasionally is not classified as English even when it predominantly is upon visual inspection; therefore the tweet is dropped and not processed. This isn't a significant issue as about 3000 tweets can be collected in an hour, and some of these would be filtered out by the spam filter regardless.
|
||||
|
||||
Additionally, an n-gram method could be used to distinguish the language of the given text and may perform more accurately than the word-based approach that was implemented \cite{39}. This could be a later improvement, as the n-gram approach requires a corpus for each language to compare against to be presented, the word-based approach is sufficient for its use case. Therefore it could be used as a comparison between approaches and seen as a possible improvement at a later date.
|
||||
|
||||
\newpage
|
||||
|
||||
\subsubsection{Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}
|
||||
|
||||
Prior to any text being processed to both train the Naive Bayes classifier of the spam filter or to classify live tweets, the data needs to be pre-processed to extract the features from the text so that the classifier can identify the probability of each word in the given text. The explanation of how this classifier functions will be detailed in the 'Spam Filtering' Section.
|
||||
Prior to any text being processed to both train the Naive Bayes classifier of the spam filter or to classify live tweets, the data needs to be pre-processed to extract the feature vetors from the text, so that the classifier can identify the probability of each word in the given text. The explanation of how this classifier functions will be detailed in the 'Spam Filtering' Section.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=pre-processing of data prior to being used by the spam filter]
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import PorterStemmer
|
||||
...
|
||||
|
||||
def processTweet(tweet, gram = 2):
|
||||
tweet = tweet.lower() # convert to lower case
|
||||
|
||||
@ -1234,31 +1053,22 @@ def processTweet(tweet, gram = 2):
|
||||
\item Stemming: Reduces words down to their smaller form, as in it remove suffixes from inflected words - 'studying' become 'study' \cite{40}. The Porter Stemmer works by removing the suffixes from the text - 'going' becomes 'go', however, this applies to other words such as 'leaves' becomes 'leav' which is not a word. However, this method will be applied equally to all words containing such suffixes so all variations will become so, thus still allowing the probability classifications to occur on the word as all variations will be the same.
|
||||
\end{itemize}
|
||||
|
||||
As discovered from \cite{40}, lemmatisation could be an alternative and arguably a better solution to stemming. Lemmatization works fundamentally the same as stemming but reduces the inflected words properly ensuring that a root word belongs to a language. Using the same words that are used to describe stemming, lemmatisation reduces 'goes' to 'go' and 'leaves' to 'leaf' - removing the suffixes down to create the actual root word.
|
||||
Although lemmatisation will provide the classifier with an actual English word, regardless stemming will still reduce the words down to the same form, this added with a lemmatiser needing a corpus for classifying the words to their root words and additional computational time to do so, the former of using a stemmer is sufficient.
|
||||
\newpage
|
||||
As discovered from \cite{40}, lemmatisation could be an alternative and arguably a better solution to stemming. Lemmatization works fundamentally the same as stemming but reduces the inflected words properly ensuring that a root word belongs to a language. Using the same words that are used to describe stemming, lemmatisation reduces 'goes' to 'go' and 'leaves' to 'leaf' as an example - by removing the suffixes down to create the actual root word.
|
||||
Although lemmatisation will provide the classifier with an actual English word, stemming regardless still reduces the words down to a similar form, this added with a lemmatiser needing a corpus for classifying the words to their root words and additional computational time to do so, the former of using a stemmer is sufficient for the use case.
|
||||
|
||||
\subsection{Spam Filtering}
|
||||
This section of the implementation will describe how the spam filter is initialised in the \textit{tweet\_collector}, how it is trained and how it classifies tweets as being either spam or ham (wanted data).
|
||||
|
||||
\textit{Listing 12} shows the initalisation and method functions used within the \textit{tweet\_collector}, that creates the training and testing datasets, and tests classifier on hard specified tweets and checks their classification.
|
||||
The function is initialised within the \textit{tweet\_collector}, that creates the training and testing datasets, and tests classifier on hard specified tweets and checks their classification.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Spam filter training Class - \textit{tweet\_collector.py}]
|
||||
import pandas as pd
|
||||
import spam_filter
|
||||
import numpy as np
|
||||
...
|
||||
|
||||
class filterSpam(object):
|
||||
|
||||
def __init__(self, training_set):
|
||||
self.training_set = training_set
|
||||
## initialises function and globalises training set for use in every function where needed
|
||||
|
||||
def trainFilter(self):
|
||||
...
|
||||
def trainFilter(self):
|
||||
self.dataset() ## Split dataset 75:25
|
||||
self.train() ## Train based on training dataset
|
||||
|
||||
def dataset(self):
|
||||
def dataset(self):
|
||||
self.data = pd.read_csv(self.training_set)
|
||||
|
||||
self.data['class'] = self.data['classes'].map({'ham': 0, 'spam': 1})
|
||||
@ -1298,23 +1108,16 @@ def testData_Prediction(self):
|
||||
return prediction
|
||||
|
||||
def testPrediction(self):
|
||||
|
||||
# Test Spam/Ham tweets - should return True and False respectivly
|
||||
spam = spam_filter.processTweet("Earn more than 0015 btc free No deposit No investment Free Bitcoins - Earn $65 free btc in 5 minutes bitcoin freebtc getbtc")
|
||||
|
||||
ham = spam_filter.processTweet("Bitcoin closed with some gains in month of February")
|
||||
# Process Tweets - Tokenise and Stem
|
||||
|
||||
|
||||
hamTweet = self.spamFilter.classify(ham)
|
||||
spamTweet = self.spamFilter.classify(spam)
|
||||
# Classify both tweets
|
||||
|
||||
print("Console: ", "Spam Tweet -- ", spamTweet)
|
||||
sys.stdout.flush()
|
||||
print("Console: ", "Ham Tweet -- ", hamTweet)
|
||||
sys.stdout.flush()
|
||||
|
||||
def filterStatistics(self, prediction):
|
||||
# Get performance metrics for prediction data compared to actual test data
|
||||
spam_filter.metrics(self.testData['class'], prediction)
|
||||
@ -1328,7 +1131,6 @@ def testTweet(self, tweet):
|
||||
\end{lstlisting}
|
||||
|
||||
\begin{itemize}
|
||||
\item filterSpam - \_\_init\_\_: is called when the \textit{tweet\_collector} script is first executed which initialises the object, first described in the 'Live Tweet Collection' section above.
|
||||
\item trainFilter: is a function that calls the dataset function which created the training and testing dataset, followed by the train function which trains the initialised classifier. This function's sole purpose is to serve as a parent function that only needs to be called to perform the child functions once.
|
||||
\item dataset: This function loads the pre-labelled spam dataset, remaps the labels to integers 0:1 to ham:spam respectively, creates a dictionary with an index of 75\% of the original data for the training dataset and 25\% for the testing dataset. This function does this by extracting the data at the set point from the spam dataset into the relevant new datasets which resetting indexes and dropping old columns to form appropriate data.
|
||||
\item train: Is used to call the classifier function defined in the \textit{spam\_filter} script and passes the training data for it to initialise then train on.
|
||||
@ -1340,35 +1142,19 @@ def testTweet(self, tweet):
|
||||
|
||||
\subsubsection{Naive Bayes model}
|
||||
|
||||
The spam filter classifier, using a Naive Bayes model, was coded from scratch. Ultimately unneeded as the Scikit-learn python package comes with four inbuilt Naive Bayes classification models (Bernoulli, Complement, Multinomial, Gaussian)\cite{41}. The Naive Bayes model implemented was a multinomial Bayes model as the data used for classification was of multinomial distribution and categorical. This algorithm was not compared to the Scikit-learn's inbuilt model for accuracy as this was not the focus of this project. The model was coded from scratch due to finding information on how this would be done with techniques such as TFIDF and Additive Smoothing as detailed in the literature review, the tutorial that helped the greatest \textit{Spam Classifier in Python from scratch} \cite{34} \cite{42}. For an explanation of how the maths work behind this classifier see Literature review sections 'Bag Of Words', 'TF-IDF' and 'Addictive Smoothing'.
|
||||
The spam filter classifier, using a Naive Bayes model, was coded from scratch. This was ultimately unneeded as the Scikit-learn python package comes with four inbuilt Naive Bayes classification models (Bernoulli, Complement, Multinomial, Gaussian)\cite{41}. The model was coded from scratch due to finding information on how this would be done with techniques such as TFIDF and Additive Smoothing as detailed in the literature review, the tutorial that helped the greatest \textit{Spam Classifier in Python from scratch} \cite{34} \cite{42}. For an explanation of how the maths work behind this classifier see Literature review sections 'Bag Of Words', 'TF-IDF' and 'Addictive Smoothing'.
|
||||
|
||||
The Naive Bayes model implemented was a multinomial Bayes model as the data used for classification was of multinomial distribution and categorical. This algorithm was not compared to the Scikit-learn's inbuilt model for accuracy as this was not the focus of this project.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=classifer class of spam\_filter.py]
|
||||
class classifier(object):
|
||||
def __init__(self, trainData):
|
||||
self.tweet = trainData['tweet']
|
||||
self.labels = trainData['class']
|
||||
|
||||
def TF_and_IDF(self):
|
||||
noTweets = self.tweet.shape[0]
|
||||
self.spam = self.labels.value_counts()[1]
|
||||
self.ham = self.labels.value_counts()[0]
|
||||
self.total = self.spam + self.ham
|
||||
|
||||
# Initialise spam vars
|
||||
self.spamCount = 0
|
||||
self.hamCount = 0
|
||||
self.tfSpam = dict()
|
||||
self.tfHam = dict()
|
||||
self.idfSpam = dict()
|
||||
self.idfHam = dict()
|
||||
|
||||
# Bag Of Words implementation - pro
|
||||
def TF_and_IDF(self):
|
||||
...
|
||||
# Bag Of Words implementation
|
||||
|
||||
for entry in range(noTweets):
|
||||
processed = processTweet(self.tweet[entry])
|
||||
count = list()
|
||||
#To keep track of whether the word has ocured in the message or not. IDF count
|
||||
|
||||
#To keep track of whether the word has ocured in the message or not. TF count
|
||||
for word in processed:
|
||||
if self.labels[entry]:
|
||||
self.tfSpam[word] = self.tfSpam.get(word, 0) + 1
|
||||
@ -1388,12 +1174,8 @@ class classifier(object):
|
||||
else:
|
||||
self.idfHam[word] = self.idfHam.get(word, 0) + 1
|
||||
|
||||
def TF_IDF(self):
|
||||
self.probSpam = dict()
|
||||
self.probHam = dict()
|
||||
self.sumSpam = 0
|
||||
self.sumHam = 0
|
||||
|
||||
def TF_IDF(self):
|
||||
...
|
||||
# Calculate probability of word being spam or ham based on occurance in text compared to counted sets along with relevant keys
|
||||
for word in self.tfSpam:
|
||||
self.probSpam[word] = (self.tfSpam[word]) * log((self.spam + self.ham) / (self.idfSpam[word] + self.idfHam.get(word, 0)))
|
||||
@ -1414,7 +1196,7 @@ class classifier(object):
|
||||
|
||||
\subsubsection{Classification}
|
||||
|
||||
This function aims to classify the pre-processed tweet data as either spam or ham based on the term-frequency and probabilities calculated in the 'TF\_IDF' function. This conducted for each word in the processed tweet is identified if the word is contained in the spam set, based on the level of occurrence the probability is assigned a weight (The more it occures, the more likely it is a generic word), this is also identified for the level of occurrence in the ham set. Totals for the probability are formed, and the total count for both spam and ham are added to the spam and ham probabilities for the processed tweet. If the spam probability \textit{pSpam} is higher than the ham probability \textit{pHam} based on the level of occurrence of each word in the modelled respective sets, a boolean is returned based on which probability is higher - which identifies if the tweet is predominantly spam or ham (\textit{True} or \textit{False}).
|
||||
The classification function aims to classify the pre-processed tweet data as either spam or ham based on the term-frequency and probabilities calculated in the 'TF\_IDF' function. This conducted for each word in the processed tweet is identified if the word is contained in the spam set, based on the level of occurrence the probability is assigned a weight (The more it occurs, the more likely it is a generic word), this is also identified for the level of occurrence in the ham set. Totals for the probability are formed, and the total count for both spam and ham are added to the spam and ham probabilities for the processed tweet. If the spam probability \textit{pSpam} is higher than the ham probability \textit{pHam} based on the level of occurrence of each word in the modelled respective sets, a boolean is returned based on which probability is higher - which identifies if the tweet is predominantly spam or ham (\textit{True} or \textit{False}).
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Classify Function of Parent classifier class of spam\_filter.py]
|
||||
def classify(self, processed):
|
||||
@ -1436,7 +1218,7 @@ def classify(self, processed):
|
||||
|
||||
\subsubsection{Predict}
|
||||
|
||||
The predict function under the classify parent class used by the \textit{tweet\_collector} to test the trained classifier on the test dataset. For each tweet in the dataset, the data is processed through the \textit{processTweet} function previously described, this returns a dictionary of words in the text which then used in the \textit{classify} function described above to identify whether or not each tweet is predominantly spam or ham, the result of all tweets are returned. The \textit{tweet\_collector then uses the returned array} in the \textit{filterStatistics} function, also previously described, to calculate the performance and accuracy of the trained model.
|
||||
The predict function under the \textit{classify} parent class used by the \textit{tweet\_collector} to test the trained classifier on the test dataset. For each tweet in the dataset, the data is processed through the \textit{processTweet} function previously described, this returns a dictionary of words in the text which then used in the \textit{classify} function described above to identify whether or not each tweet is predominantly spam or ham, the result of all tweets are returned. The \textit{tweet\_collector} then uses the returned array in the \textit{filterStatistics} function, also previously described, to calculate the performance and accuracy of the trained model.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Predict function of parent classifier class of spam\_filter.py]
|
||||
def predict(self, testData):
|
||||
@ -1472,23 +1254,13 @@ def metrics(labels, predictions):
|
||||
print("Accuracy: ", accuracy)
|
||||
\end{lstlisting}
|
||||
|
||||
\newpage
|
||||
|
||||
\subsection{Sentiment Analysis}
|
||||
|
||||
This section of the implementation outlines how VADER sentiment analyser is implemented and performs with the rest of the system. The \textit{get\_sentiment} class and its \textit{\_\_init\_\_} function are called in the \textit{tweet\_collector} script upon starting and by the \textit{historical tweets} script to initialise the analyser from the VADER package. Both scripts then call \textit{get\_vader\_sentiment} when needed to give polarity classification to a tweet.
|
||||
This section of the implementation outlines how the VADER sentiment analyser is implemented and performs with the rest of the system. The \textit{get\_sentiment} class and its \textit{\_\_init\_\_} function are called in the \textit{tweet\_collector} script upon starting and by the \textit{historical tweets} script to initialise the analyser from the VADER package. Both scripts then call \textit{get\_vader\_sentiment} when needed to give polarity classification to a tweet.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=]
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
...
|
||||
\begin{lstlisting}[language=python, caption=VADER polarity classification]
|
||||
class get_sentiment(object):
|
||||
|
||||
def __init__(self):
|
||||
## Initialse object and analyser as global objects
|
||||
self.analyser = SentimentIntensityAnalyzer()
|
||||
self.sentiment = {}
|
||||
self.compound = {}
|
||||
|
||||
...
|
||||
def get_vader_sentiment(self, sentence):
|
||||
|
||||
# Calculate the polarity scores of the provided tweet
|
||||
@ -1518,70 +1290,16 @@ class get_sentiment(object):
|
||||
|
||||
The \textit{get\_vader\_sentiment} function provides the polarity scores for the provided tweet. The scores are split into polarity and compound to compare the positive and negative scores to identify the overall greater sentiment in the given tweet. By doing so helps to identify if the tweet was overall negative or positive. The compound score is separated and used separately.
|
||||
|
||||
\newpage
|
||||
|
||||
\subsection{Recurrent Neural Network - LSTM}
|
||||
This section of the implementation describes and discusses how the LSTM neural network is configured, trained, tested and used to create the model later used for price forecasting for both neural networks - with and without hourly sentiment embedded in datasets. Its performance metrics that were calculated to verify the accuracy for the model, appropriate to regression models and K-fold validation implementation are discussed.
|
||||
|
||||
Packages used for both neural networks, with and without hourly sentiment embedded with price data:
|
||||
\begin{lstlisting}[language=python, caption=LSTM packages]
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from math import sqrt
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
|
||||
from sklearn.metrics import mean_squared_error, classification_report
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, LSTM, Dropout
|
||||
import keras.backend as K
|
||||
from sklearn.model_selection import StratifiedKFold, cross_val_score
|
||||
|
||||
from time import sleep
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import csv, sys, json
|
||||
|
||||
from tqdm import tqdm
|
||||
from keras_tqdm import TQDMCallback
|
||||
\end{lstlisting}
|
||||
|
||||
Additionally, this section only shows code from the neural network that has the sentiment embedded in the datasets; comments are made in the code snippets with the reduced code that consists of the neural network and is due to each neural network having almost the same code. The reasons behind not implementing both networks in the same Python script was down to perform. Due to Python executing code synchronously and due the neural networks needing to ran on the dot of an hour and at the same time the code was divided and executed individually. This also reduced the need to recode most of the functions to loop and perform tasks for each network at every given stage of the network even if the majority of the code was duplicated.
|
||||
\newpage
|
||||
|
||||
\subsubsection{Script Execution}
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Start of execution of the LSTM script]
|
||||
import pandas as pd
|
||||
import csv
|
||||
...
|
||||
|
||||
class Network(object):
|
||||
|
||||
def __init__(self, merged_lstm_data):
|
||||
self.lstm_data = merged_lstm_data
|
||||
|
||||
...
|
||||
# Merges hourly and price data into one CSV
|
||||
### Occurs for both neural networks but only specific columns are used in each
|
||||
merged = pd.merge(left=price_file, right=tweet_file, how="inner")
|
||||
print("merge length", len(merged))
|
||||
merged.to_csv('merged_lstm_data.csv')
|
||||
|
||||
# Initialise network and pass merged data
|
||||
network = Network(merged)
|
||||
|
||||
# Calls the dataset creation function from the network class
|
||||
network.data()
|
||||
|
||||
# Calls the future trading function which starts forecasting the price for next hour
|
||||
network.future_trading(live_price, live_sentiment, predictions_file)
|
||||
|
||||
\end{lstlisting}
|
||||
Additionally, this section also discusses the code from the neural network that has the sentiment embedded in the datasets; comments are made in the code snippets with the reduced code that consists of the neural network and is due to each neural network having almost the same code. The reasons behind not implementing both networks in the same Python script was down to perform. Due to Python executing code synchronously and due the neural networks needing to ran on the dot of an hour and at the same time the code was divided and executed individually. This also reduced the need to recode most of the functions to loop and perform tasks for each network at every given stage of the network even if the majority of the code was duplicated.
|
||||
|
||||
\subsubsection{Dataset Creation}
|
||||
|
||||
\textit{Listing 19} shows how the datasets for training (train\_X and train\_Y) and testing (test\_X and test\_Y) are formed and shaped for model training. A look back of 2 is used to create a timestep of one record to ensure predictions are forecasted for the next record. Prices are also scaled between 0 and 1 due to sentiment ranging in the same values and is a standard for model creation to speed up regression and model training as the data is of smaller values - using the scikit-learns MinMaxScaler function.
|
||||
\newline
|
||||
The datasets for training (train\_X and train\_Y) and testing (test\_X and test\_Y) are formed and shaped for model training. A look back of 2 is used to create a timestep of one record to ensure predictions are forecasted for the next record. Prices are also scaled between 0 and 1 due to sentiment ranging in the same values and is a standard for model creation to speed up regression and model training as the data is of smaller values - using the scikit-learns MinMaxScaler function.
|
||||
|
||||
A function for merging the two datasets, price and sentiment occurs using the look back, takes place prior to the training (train\_X and train\_Y) and testing (test\_X and test\_Y) are formed. This function is different for the two networks as one includes the sentiment at the position of its respective price.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Dataset creation and preprocessing]
|
||||
def data(self):
|
||||
@ -1601,6 +1319,32 @@ def data(self):
|
||||
self.model_network(train_X, train_Y, test_X, test_Y)
|
||||
# Call network function to train network
|
||||
|
||||
def preprocess(self):
|
||||
self.model_data = self.lstm_data[['price','compound']].groupby(self.lstm_data['created_at']).mean()
|
||||
#Extract price and compound columns from dataset
|
||||
|
||||
self.sentiment_data = self.model_data['compound'].values.reshape(-1,1)
|
||||
self.price_data = self.model_data['price'].values.reshape(-1,1)
|
||||
## Reshape data to column-wise
|
||||
|
||||
# convert types to float32 for consistancy
|
||||
self.sentiment_data = self.sentiment_data.astype('float32')
|
||||
self.price_data = self.price_data.astype('float32')
|
||||
|
||||
self.scale = MinMaxScaler(feature_range=(0,1))
|
||||
self.scaledPrice = self.scale.fit_transform(self.price_data)
|
||||
# Scale price to values between 0 and 1
|
||||
|
||||
self.price_train_size = int(len(self.scaledPrice) * 0.7 )
|
||||
# use 70% of dataset for training and 30% for testing
|
||||
self.price_test_size = len(self.scaledPrice) - self.price_train_size
|
||||
|
||||
# Get said train data on size
|
||||
|
||||
self.price_train = self.scaledPrice[0:self.price_train_size:]
|
||||
self.price_test = self.scaledPrice[self.price_train_size:len(self.scaledPrice):]
|
||||
#set sizes of dataset to be mapped later
|
||||
|
||||
def create_sets(self, data, lookback, sentiment):
|
||||
data_X, data_Y = [], []
|
||||
for i in range(len(data) - lookback):
|
||||
@ -1619,7 +1363,7 @@ def create_sets(self, data, lookback, sentiment):
|
||||
|
||||
\subsubsection{Training and Testing Model}
|
||||
|
||||
The neural network is set up with four layers each of which configured with 100 LSTM cells, with a dropout of 0.2 each, and returning sequences to each other layer. A dropout was used to ensure that the data would not be overfitted, by setting the dropout to 0.2 probability, 80\% of the data on each layer would be retained for the next layer. Return sequences allow for the returning of the hidden state output for each time step and ensures the next LSTM layer has 2 inputs that carry over from the previous layer, which are the old weights and value outputs from the previous layer.
|
||||
The neural network is set up with four layers each of which configured with 100 LSTM cells, with a dropout of 0.2 each, and returning sequences to each other layer. A dropout was used to ensure that the data would not be overfitted, by setting the dropout to 0.2 probability, 80\% of the data on each layer is retained for the next layer. Return sequences allows for the returning of the hidden state output for each time step and ensures the next LSTM layer has 2 inputs that carry over from the previous layer, which are the old weights and value outputs from the previous layer.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=LSTM model creation\, layering\, compiling and fitting]
|
||||
self.model = Sequential()
|
||||
@ -1657,19 +1401,24 @@ def create_sets(self, data, lookback, sentiment):
|
||||
testY_inverse_sent = scale.inverse_transform(test_Y.reshape(-1, 1))
|
||||
\end{lstlisting}
|
||||
|
||||
As per the discussion in the literature review, the Adam optimiser was used for the compilation of the model. The loss was calculated using the mean squared error and the metrics calculated and returned to present the accuracy of the model in prediction were: mean squared error, root mean squared error, mean absolute error and mean absolute percentage error. Both the metrics and prediction made are saved to a CSV that is then presented to users in the server-hosted UI.
|
||||
As per the discussion in the literature review and outlined in the solution approach, the Adam optimiser was used for the compilation of the model. The loss was calculated using the mean squared error and the metrics calculated and returned to present the accuracy of the model in prediction were: mean squared error, root mean squared error, mean absolute error and mean absolute percentage error. Both the metrics and predictions made are saved to a CSV that is then presented to users in the server-hosted UI.
|
||||
|
||||
The model was fitted on the training sets (X, Y) over 200 epochs with a batch size of 1000 on about 11000 records - which was the total amount of data used to train for a year. Predictions are then made using the test set resulting in the predictions of 'yhat' which is inverted and rescaled to get original price values to save to a CSV and also displayed on the user interface.
|
||||
The model was fitted on the training sets (X, Y) over 200 epochs with a batch size of 1000 on about ~11000 records - was about the total amount of data used to train for a year. Predictions are then made using the test set resulting in the predictions of \textit{'yhat'} which is inverted and rescaled to get original price values to save to a CSV and which are displayed on the user interface.
|
||||
|
||||
\newpage
|
||||
|
||||
\subsection{Future Prediction Forecasting}
|
||||
|
||||
\begin{lstlisting}[language=python, caption=]
|
||||
def remodel(self, price_file, previous_sent, live_price, live_sentiment, predictions_file):
|
||||
Future prediction forecasting is implemented as a loop which is executed every hour. This loads the previous five prices and sentiment data (minus sentiment for the non-sentiment model) and predicts the next hour price in a one-hour timestep. Due to the first four hours there is not enough live data, historical prices and sentiment are used until five hours have passed on initial executing of the network. After such the model predicts on all re-occurring data up to 1000 records to match the initial batch size the model is trained on, then only predicts using the past 1000 records due to the gradient descent of the model being averaged and modelled for a sample size of 1000.
|
||||
|
||||
These predictions along with inverted test data are saved to relevant CSVs to be plotted as graphs on the interface. The function also forms a market prediction of either 'Buy' or 'Sell' based on a hard-coded difference threshold of 25\%, which suggest that at a given point between predictions when the best time for a user to either sell or buy Bitcoin.
|
||||
|
||||
\begin{lstlisting}[language=python, caption=Forecasting future price of next hour for Bitcoin]
|
||||
price = pd.read_csv(live_price)
|
||||
sentiment = pd.read_csv(live_sentiment)
|
||||
|
||||
price_tail = price.tail(5)
|
||||
sentiment_tail = sentiment.tail(5)
|
||||
price_tail = price.tail(i)
|
||||
sentiment_tail = sentiment.tail(i)
|
||||
## Get last 5 live prices and predict on them
|
||||
|
||||
price_tail.index = price_tail['created_at']
|
||||
@ -1696,7 +1445,6 @@ def remodel(self, price_file, previous_sent, live_price, live_sentiment, predict
|
||||
|
||||
difference = ((yhat_inverse[0][0]-self.previous_val)/self.previous_val)*100
|
||||
# Caclulate difference between hour predictions for threshold action prediction (below)
|
||||
|
||||
...
|
||||
## Suggest market action based on 0.25 threshold (2.5%)
|
||||
|
||||
@ -1706,23 +1454,6 @@ def remodel(self, price_file, previous_sent, live_price, live_sentiment, predict
|
||||
elif difference < self.threshold:
|
||||
print("Sell")
|
||||
self.state = 'SELL'
|
||||
else:
|
||||
print("Prediction Error!")
|
||||
|
||||
...
|
||||
|
||||
self.test_Y_updating = np.concatenate((self.test_Y_updating, testY_inverse))
|
||||
self.yhat_updating = np.concatenate((self.yhat_updating, yhat_inverse))
|
||||
|
||||
## Output plots to json for display on UI
|
||||
...
|
||||
with open('data/updating.json', mode='w') as file:
|
||||
for x in range(len(cat)):
|
||||
xs[x] = {'index' : x, 'testY_inverse': cat[x][0], 'yhat_inverse' : cat[x][1]}
|
||||
json.dump(xs, file, indent=3)
|
||||
|
||||
...
|
||||
|
||||
...
|
||||
## Output prediction made for hour to CSV file for use in UI
|
||||
try:
|
||||
@ -1737,6 +1468,7 @@ def remodel(self, price_file, previous_sent, live_price, live_sentiment, predict
|
||||
self.previous_val = yhat_inverse[0][0] ##THE NEXT PREDICTED VALUE IN AN HOUR
|
||||
\end{lstlisting}
|
||||
|
||||
\newpage
|
||||
\subsection{User Interface}
|
||||
|
||||
\newpage
|
||||
@ -1780,6 +1512,7 @@ def remodel(self, price_file, previous_sent, live_price, live_sentiment, predict
|
||||
\subsection{Appendix A - Project Initiation Document}
|
||||
Displayed on the following pages below.
|
||||
\includepdf[pages=-]{PID}
|
||||
|
||||
\subsection{Appendix B - Log book}
|
||||
The log book for this project is a physical book and was handed to the School of Computer Science. Due to being a physical book, it cannot be inserted here.
|
||||
|
||||
|
||||
56
document.toc
56
document.toc
@ -98,58 +98,56 @@
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.1.2}Price Time-Series Live Data}{47}{subsubsection.10.1.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.1.3}Historical Tweet Collection}{49}{subsubsection.10.1.3}
|
||||
\contentsline {subsubsection}{\numberline {10.1.3}Historical Tweet Collection}{48}{subsubsection.10.1.3}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.1.4}Live Tweet Collection}{51}{subsubsection.10.1.4}
|
||||
\contentsline {subsubsection}{\numberline {10.1.4}Live Tweet Collection}{50}{subsubsection.10.1.4}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.2}Data pre-processing}{56}{subsection.10.2}
|
||||
\contentsline {subsection}{\numberline {10.2}Data pre-processing}{52}{subsection.10.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.2.1}Tweet Filtering}{56}{subsubsection.10.2.1}
|
||||
\contentsline {subsubsection}{\numberline {10.2.1}Tweet Filtering}{52}{subsubsection.10.2.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.2.2}Language detection filtering}{57}{subsubsection.10.2.2}
|
||||
\contentsline {subsubsection}{\numberline {10.2.2}Language detection filtering}{53}{subsubsection.10.2.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.2.3}Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}{58}{subsubsection.10.2.3}
|
||||
\contentsline {subsubsection}{\numberline {10.2.3}Spam filter - Tokenisation, Ngrams, Stopword removal and Stemming}{55}{subsubsection.10.2.3}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.3}Spam Filtering}{61}{subsection.10.3}
|
||||
\contentsline {subsection}{\numberline {10.3}Spam Filtering}{56}{subsection.10.3}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.3.1}Naive Bayes model}{63}{subsubsection.10.3.1}
|
||||
\contentsline {subsubsection}{\numberline {10.3.1}Naive Bayes model}{59}{subsubsection.10.3.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.3.2}Classification}{65}{subsubsection.10.3.2}
|
||||
\contentsline {subsubsection}{\numberline {10.3.2}Classification}{60}{subsubsection.10.3.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.3.3}Predict}{66}{subsubsection.10.3.3}
|
||||
\contentsline {subsubsection}{\numberline {10.3.3}Predict}{61}{subsubsection.10.3.3}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.3.4}Metrics}{66}{subsubsection.10.3.4}
|
||||
\contentsline {subsubsection}{\numberline {10.3.4}Metrics}{61}{subsubsection.10.3.4}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.4}Sentiment Analysis}{68}{subsection.10.4}
|
||||
\contentsline {subsection}{\numberline {10.4}Sentiment Analysis}{62}{subsection.10.4}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.5}Recurrent Neural Network - LSTM}{69}{subsection.10.5}
|
||||
\contentsline {subsection}{\numberline {10.5}Recurrent Neural Network - LSTM}{63}{subsection.10.5}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.5.1}Script Execution}{70}{subsubsection.10.5.1}
|
||||
\contentsline {subsubsection}{\numberline {10.5.1}Dataset Creation}{63}{subsubsection.10.5.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.5.2}Dataset Creation}{70}{subsubsection.10.5.2}
|
||||
\contentsline {subsubsection}{\numberline {10.5.2}Training and Testing Model}{65}{subsubsection.10.5.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsubsection}{\numberline {10.5.3}Training and Testing Model}{72}{subsubsection.10.5.3}
|
||||
\contentsline {subsection}{\numberline {10.6}Future Prediction Forecasting}{67}{subsection.10.6}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.6}Future Prediction Forecasting}{73}{subsection.10.6}
|
||||
\contentsline {subsection}{\numberline {10.7}User Interface}{69}{subsection.10.7}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {10.7}User Interface}{75}{subsection.10.7}
|
||||
\contentsline {section}{\numberline {11}Testing Metrics and Accuracy}{70}{section.11}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {section}{\numberline {11}Testing Metrics and Accuracy}{76}{section.11}
|
||||
\contentsline {section}{\numberline {12}Project Evaluation}{71}{section.12}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {section}{\numberline {12}Project Evaluation}{77}{section.12}
|
||||
\contentsline {section}{\numberline {13}Discussion: Contribution and Reflection}{71}{section.13}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {section}{\numberline {13}Discussion: Contribution and Reflection}{77}{section.13}
|
||||
\contentsline {subsection}{\numberline {13.1}Limitations}{71}{subsection.13.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {13.1}Limitations}{77}{subsection.13.1}
|
||||
\contentsline {section}{\numberline {14}Conclusion and Future Improvements}{72}{section.14}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {section}{\numberline {14}Conclusion and Future Improvements}{78}{section.14}
|
||||
\contentsline {subsection}{\numberline {14.1}Conclusion}{72}{subsection.14.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {14.1}Conclusion}{78}{subsection.14.1}
|
||||
\contentsline {subsection}{\numberline {14.2}Future Improvements}{72}{subsection.14.2}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {14.2}Future Improvements}{78}{subsection.14.2}
|
||||
\contentsline {section}{\numberline {15}Appendices}{77}{section.15}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {section}{\numberline {15}Appendices}{83}{section.15}
|
||||
\contentsline {subsection}{\numberline {15.1}Appendix A - Project Initiation Document}{77}{subsection.15.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {15.1}Appendix A - Project Initiation Document}{83}{subsection.15.1}
|
||||
\defcounter {refsection}{0}\relax
|
||||
\contentsline {subsection}{\numberline {15.2}Appendix B - Log book}{96}{subsection.15.2}
|
||||
\contentsline {subsection}{\numberline {15.2}Appendix B - Log book}{90}{subsection.15.2}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user