diff --git a/Dockerfile b/Dockerfile index f470f4d..70eb7ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,5 +6,6 @@ RUN python -m pip install --upgrade pip RUN pip install utils pycryptodome && \ pip install python-dotenv schedule tweepy stomp.py python-json-logger COPY . /home/tweet-collector/. +RUN python3 /home/spam-filter/configuration/scripts/nltk_package_downloads.py EXPOSE 9090 CMD ["python", "/home/tweet-collector/src/main.py"] \ No newline at end of file diff --git a/configuration/scripts/nltk_package_downloads.py b/configuration/scripts/nltk_package_downloads.py new file mode 100644 index 0000000..d0ed840 --- /dev/null +++ b/configuration/scripts/nltk_package_downloads.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import nltk + +if __name__ == '__main__': + nltk.download('wordpunct_tokenize') + nltk.download('stopwords') \ No newline at end of file