diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..aa59ca8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.7-alpine +MAINTAINER Andrew Sotheran +RUN apk update && \ + apk add py-pip libc-dev gcc +RUN python -m pip install --upgrade pip +RUN pip install utils pycryptodome && \ + pip install python-dotenv flask python-json-logger vaderSentiment && \ + rm -rf /var/lib/apt/lists/* +COPY . /home/sentiment-analyser/. +EXPOSE 9090 +EXPOSE 9091 +CMD ["python", "/home/sentiment-analyser/src/main.py"] \ No newline at end of file diff --git a/configuration/kubernetes/deployment.yaml b/configuration/kubernetes/deployment.yaml new file mode 100644 index 0000000..de22941 --- /dev/null +++ b/configuration/kubernetes/deployment.yaml @@ -0,0 +1,79 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + linkerd.io/inject: enabled + labels: + name: LABEL + name: RESOURCE_NAME + namespace: production +spec: + replicas: 1 + selector: + matchLabels: + app: RESOURCE_NAME + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + annotations: + linkerd.io/inject: enabled + labels: + app: RESOURCE_NAME + spec: + containers: + - image: REPOSITORY/IMAGE + name: RESOURCE_NAME + env: + - name: KUBERNETES_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CONTAINER_CORE_LIMIT + valueFrom: + resourceFieldRef: + resource: limits.cpu + - name: CONTAINER_MAX_MEMORY + valueFrom: + resourceFieldRef: + resource: limits.memory + ports: + - containerPort: 9090 + name: RESOURCE_NAME + livenessProbe: + httpGet: + path: /health + port: 9091 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 1 + readinessProbe: + httpGet: + port: 9091 + path: /readiness + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 1 + imagePullPolicy: Always + resources: + requests: + cpu: 32m + memory: 32Mi + limits: + cpu: 75m + memory: 64Mi + securityContext: + capabilities: + add: + - NET_ADMIN + - NET_RAW + restartPolicy: Always + imagePullSecrets: + - name: registry-cryptosky-image-registry \ No newline at end of file diff --git a/configuration/kubernetes/service.yaml b/configuration/kubernetes/service.yaml new file mode 100644 index 0000000..223cf9c --- /dev/null +++ b/configuration/kubernetes/service.yaml @@ -0,0 +1,21 @@ +kind: Service +apiVersion: v1 +metadata: + labels: + name: LABEL + name: RESOURCE_NAME + namespace: production +spec: + selector: + app: RESOURCE_NAME + ports: + - name: sentiment + port: 9090 + protocol: TCP + targetPort: 9090 + - name: probes + port: 9091 + protocol: TCP + targetPort: 9091 + sessionAffinity: None + type: ClusterIP \ No newline at end of file diff --git a/configuration/pipelines/build.groovy b/configuration/pipelines/build.groovy new file mode 100644 index 0000000..8a87d4e --- /dev/null +++ b/configuration/pipelines/build.groovy @@ -0,0 +1,95 @@ +#!/usr/bin/env groovy + +env.APPLICATION_NAME = 'sentiment-analyser' +env.APPLICATION_LABEL = 'utilities' +env.GIT_BRANCH = 'master' +env.GIT_REPOSITORY_PATH = "github.com/andyjk15/${env.APPLICATION_NAME}.git" +env.GIT_REPOSITORY_URL = "https://${env.GIT_REPOSITORY_PATH}" +env.GITHUB_CREDENTIALS_ID = 'Github' +env.DIGITAL_OCEAN = 'registry.digitalocean.com' +env.DIGITAL_OCEAN_REPO = 'cryptosky-image-registry' +env.DOCKER_BUILDER = 'registry.cryptosky.me' +env.DOCKER_REPOSITORY = "${env.DIGITAL_OCEAN}/${env.DIGITAL_OCEAN_REPO}" +env.DOCKER_REPOSITORY_TCP = "tcp://${env.DOCKER_BUILDER}:4243" + +env.NAMESPACE = 'production' +env.SLAVE_LABEL = "cryptosky-aio-build" + + +String get_application_version() { + "1.0.0-b${env.BUILD_NUMBER}" +} + +String executeShellScript( String shellPath, String arg1 = '', String arg2 = '', String arg3 = '', String arg4 = '', String arg5 = '' ) { + sh "./${shellPath} ${arg1} ${arg2} ${arg3} ${arg4} ${arg5}" +} + +try { + timestamps { + node ("${env.SLAVE_LABEL}") { + stage('Initialise') { + checkout([$class: 'GitSCM', branches: [[name: 'master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: 'Github', url: env.GIT_REPOSITORY_URL]]]) + + env.APPLICATION_VERSION = get_application_version() + + withCredentials( + [usernamePassword( + credentialsId: 'doctl', + passwordVariable: 'DOCTL_TOKEN', + usernameVariable: 'DOCTL_USERNAME' + )] + ) { + sh "doctl auth init --access-token ${DOCTL_TOKEN}" + sh "doctl registry login" + sh "doctl kubernetes cluster kubeconfig save cryptosky-cluster" + } + } + + stage('Build Image') { + + executeShellScript("configuration/scripts/mapVarsToConfigs.sh", + env.DIGITAL_OCEAN, + env.DIGITAL_OCEAN_REPO, + env.APPLICATION_NAME, + env.APPLICATION_VERSION, + env.APPLICATION_LABEL) + + withDockerServer([uri: "${env.DOCKER_REPOSITORY_TCP}"]) { + docker.build("${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}") + docker.build("${env.APPLICATION_NAME}:latest") + + sh "docker tag ${env.APPLICATION_NAME}:${env.APPLICATION_VERSION} ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}" + sh "docker tag ${env.APPLICATION_NAME}:latest ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest" + + sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}" + sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest" + } + + } + + stage('Tag Repository') { + + withCredentials( + [usernamePassword( + credentialsId: env.GITHUB_CREDENTIALS_ID, + passwordVariable: 'GIT_PASSWORD', + usernameVariable: 'GIT_USERNAME' + )] + ) { + sh "git tag ${env.APPLICATION_VERSION}" + sh "git push https://${GIT_USERNAME}:${GIT_PASSWORD}@${env.GIT_REPOSITORY_PATH} ${env.APPLICATION_VERSION}" + } + } + + stage('Deploy') { + executeShellScript("configuration/scripts/deployToKubernetes.sh", + env.APPLICATION_NAME) + } + } + } +} catch ( exception ) { + currentBuild.result = 'FAILURE' + throw exception +} finally { + currentBuild.result = 'SUCCESS' +} diff --git a/configuration/scripts/deployToKubernetes.sh b/configuration/scripts/deployToKubernetes.sh new file mode 100755 index 0000000..519f5b3 --- /dev/null +++ b/configuration/scripts/deployToKubernetes.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +APPLICATION_NAME=$1 + +kubectl apply -f configuration/kubernetes/deployment.yaml +kubectl apply -f configuration/kubernetes/service.yaml + +kubectl rollout status deployment/${APPLICATION_NAME} --namespace=production \ No newline at end of file diff --git a/configuration/scripts/mapVarsToConfigs.sh b/configuration/scripts/mapVarsToConfigs.sh new file mode 100755 index 0000000..ed3af80 --- /dev/null +++ b/configuration/scripts/mapVarsToConfigs.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +DIGITAL_OCEAN=$1 +DIGITAL_OCEAN_REPO=$2 +APPLICATION_NAME=$3 +APPLICATION_VERSION=$4 +APPLICATION_LABEL=$5 + +DOCKER_REPOSITORY="${DIGITAL_OCEAN}\/${DIGITAL_OCEAN_REPO}" + +sed -i "s/REPOSITORY/${DOCKER_REPOSITORY}/g" configuration/kubernetes/deployment.yaml +sed -i "s/IMAGE/${APPLICATION_NAME}:${APPLICATION_VERSION}/g" configuration/kubernetes/deployment.yaml +sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/deployment.yaml +sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/deployment.yaml + +sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/service.yaml +sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/service.yaml \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/analyser/sentimentAnalyser.py b/src/analyser/sentimentAnalyser.py new file mode 100644 index 0000000..5d0a1bd --- /dev/null +++ b/src/analyser/sentimentAnalyser.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python + +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +from src.utils.jsonLogger import log + +class get_sentiment(object): + + def __init__(self): + self.analyser = SentimentIntensityAnalyzer() + self.sentiment = {} + self.compound = {} + + def get_vader_sentiment(self, sentence): + score = self.analyser.polarity_scores(sentence) + + # Split dict into overall sentiment and compound + sentiment = list(score.values()) + compound = sentiment[3:] + compound = compound[0] + + sentiment = sentiment[:3] + + maxScore = max(sentiment) + + pos = [i for i, j in enumerate(sentiment) if j == maxScore] + + if pos[0] == 1: + log("Tweet is overall Neutral - Score: {}".format(maxScore), 'WARN') + # return neg or pos which ever is higher + if compound < 0: + return { 'Score': score, 'Compound': compound } + else: + return { 'Score': score, 'Compound': compound } + else: + if compound < 0: + return { 'Score': score, 'Compound': compound } + else: + return { 'Score': score, 'Compound': compound } + + def set_newSentiment(self): + log("Adding marketing words and sentiment to lexicon...", 'INFO') + new_sentiment = { + 'bull' : 2, + 'bear' : -2, + 'bullish' : 3.5, + 'bearish' : -3.5, + 'up' : 1.5, + 'down' : -1.5, + 'high' : 2.9, + 'low' : -2.9, + 'higher' : 2.8, + 'lower' : -2.8, + 'absconded' : -2.0, + 'maximalists' : -2.4, + 'regulate' : -2.3, + 'infamous' : 2.2, + 'trade higher' : 2.0, + 'trade lower' : -2.0, + 'revival' : 2.8, + 'centralized' : -2.2, + 'decentralized' : 2.2, + 'centralised' : -2.2, + 'decentralised' : 2.2, + 'decentralization' : 2.3, + 'decentralisation' : 2.3, + 'centralization' : -2.3, + 'centralisation' : -2.3, + 'bans' : -2.6, + 'hodl' : 2.8, + 'ambiguity' : -2.4, + 'revolutionize' : 2.1, + 'revolutionise' : 2.1, + 'consolidation' : 2.5, + 'shorts' : -2.3, + 'longs' : 2.3, + 'long' : 2.2, + 'short' : -2.2, + 'shorting' : -2.8, + 'grow' : 2.2, + 'volatile' : -1.9, + 'rally' : 2.9, + 'rallying' : 2.7, + 'noob' : -1.7, + 'noobs' : -1.9, + 'innovation' : 1.4, + 'bottom' : -1.4, + 'top' : 1.4, + 'topped' : 1.5, + 'bottomed' : -1.5, + 'upwards' : 1.7, + 'downwards' : -1.7, + 'invest' : 2.0, + 'raging' : 3.0, + 'rocketing' : 3.1, + 'swing' : 1.3, + 'swinging' : 1.2, + 'stake' : 1.4, + 'whale' : -2.2, + 'whales' : -2.3, + 'lull' : -2.1, + 'moon' : 2.7, + 'choppy' : -1.2, + 'buy' : 1.9, + 'buying' : 1.7, + 'sell' : -1.7, + 'selling' : -1.9, + 'start selling' : -2.3, + 'stop selling' : 1.4, + 'start buying' : 2.3, + 'stop buying' : -1.4 + } + + self.analyser.lexicon.update(new_sentiment) \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..eea9d13 --- /dev/null +++ b/src/main.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import sys, json +sys.path.append('/home/sentiment-analyser/') + +from threading import Thread + +from src.utils.jsonLogger import setup_logging, log +import analyser.sentimentAnalyser as sentimentAnalyser + +from flask import Flask, request + +from probes.probes import runFlaskProbes + +app = Flask(__name__) +analyser = sentimentAnalyser.get_sentiment() + +@app.route('/sentiment', methods=['GET']) +def tweetPredict(): + tweet = request.args.get('tweet') + + log("Receiving Tweet to classify {}".format(tweet), 'INFO') + + result = analyser.get_vader_sentiment(tweet) + + log("Returning classification result of {}".format(result), 'INFO') + + return json.dumps({'result': result, 'tweet': tweet}), 200, {'ContentType':'application/json'} + +def callSentimentAnalyser(): + analyser.set_newSentiment() + + app.run(port=9090, host="0.0.0.0") + +def callProbes(): + runFlaskProbes() + +if __name__ == '__main__': + setup_logging() + + log("Starting Spam Filter...", 'INFO') + sys.stdout.flush() + + Thread(target=callProbes).start() + + Thread(target=callSentimentAnalyser).start() + # Thread(target=callNewsFilter).start() \ No newline at end of file diff --git a/src/probes/__init__.py b/src/probes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/probes/probes.py b/src/probes/probes.py new file mode 100644 index 0000000..69db79f --- /dev/null +++ b/src/probes/probes.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +from flask import Flask + +import json, requests + +app = Flask(__name__) + +@app.route('/health') +def health(): + return json.dumps({'status': 'UP'}), 200, {'ContentType':'application/json'} + +@app.route('/readiness') +def readiness(): + + result = requests.request("GET", 'http://0.0.0.0:9090/senitment?tweet=Boitcoin%20is%20doing%20very%20well') + result = json.loads(result.text) + + if result != "" : + return json.dumps({ + 'status': 'UP', + 'app': { + 'name': 'CryptoSky Sentiment Analyser', + 'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.', + 'check_status': 'Success - Call to sentiment endpoint', + 'response': result + } + }), 200, {'ContentType': 'application/json'} + else: + return json.dumps({ + 'status': 'DOWN', + 'app': { + 'name': 'CryptoSky Sentiment Analyser', + 'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.', + 'check_status': 'Failure - Call to sentiment endpoint', + 'response': result + } + }), 503, {'ContentType': 'application/json'} + +def runFlaskProbes(): + app.run(port=9091, host="0.0.0.0") + +if __name__ == '__main__': + runFlaskProbes() \ No newline at end of file diff --git a/src/utils/jsonLogger.py b/src/utils/jsonLogger.py new file mode 100644 index 0000000..a781145 --- /dev/null +++ b/src/utils/jsonLogger.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import logging +from pythonjsonlogger import jsonlogger + +import datetime + +class CustomJsonFormatter(jsonlogger.JsonFormatter): + def add_fields(self, log_record, record, message_dict): + super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict) + if not log_record.get('@timestamp'): + # this doesn't use record.created, so it is slightly off + now = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ') + log_record['@timestamp'] = now + if log_record.get('level'): + log_record['level'] = log_record['level'].upper() + else: + log_record['level'] = record.levelname + +def setup_logging(log_level='INFO'): + logger = logging.getLogger(__name__) + logger.propagate = 0 + logger.setLevel(log_level) + logHandler = logging.StreamHandler() + + formatter = CustomJsonFormatter('%(@timestamp)s %(level)s %(name)s %(message)s') + + logHandler.setFormatter(formatter) + logger.addHandler(logHandler) + +def log(message, level): + logger = logging.getLogger(__name__) + if level == 'INFO': + logger.info(message) + elif level == 'WARN': + logger.warn(message) + elif level == 'ERR': + logger.error(message) + elif level == 'DEBUG': + logger.debug(message) \ No newline at end of file