[08.10.20] Set up, configuration and code for sentiment analyser service with VADAR

This commit is contained in:
andrewso 2020-10-08 16:50:11 +01:00
parent f5866f2a3b
commit 19c13d12dc
12 changed files with 476 additions and 0 deletions

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM python:3.7-alpine
MAINTAINER Andrew Sotheran <cryptosky.user@gmail.com>
RUN apk update && \
apk add py-pip libc-dev gcc
RUN python -m pip install --upgrade pip
RUN pip install utils pycryptodome && \
pip install python-dotenv flask python-json-logger vaderSentiment && \
rm -rf /var/lib/apt/lists/*
COPY . /home/sentiment-analyser/.
EXPOSE 9090
EXPOSE 9091
CMD ["python", "/home/sentiment-analyser/src/main.py"]

View File

@ -0,0 +1,79 @@
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
linkerd.io/inject: enabled
labels:
name: LABEL
name: RESOURCE_NAME
namespace: production
spec:
replicas: 1
selector:
matchLabels:
app: RESOURCE_NAME
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
template:
metadata:
annotations:
linkerd.io/inject: enabled
labels:
app: RESOURCE_NAME
spec:
containers:
- image: REPOSITORY/IMAGE
name: RESOURCE_NAME
env:
- name: KUBERNETES_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: CONTAINER_CORE_LIMIT
valueFrom:
resourceFieldRef:
resource: limits.cpu
- name: CONTAINER_MAX_MEMORY
valueFrom:
resourceFieldRef:
resource: limits.memory
ports:
- containerPort: 9090
name: RESOURCE_NAME
livenessProbe:
httpGet:
path: /health
port: 9091
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 1
readinessProbe:
httpGet:
port: 9091
path: /readiness
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 1
imagePullPolicy: Always
resources:
requests:
cpu: 32m
memory: 32Mi
limits:
cpu: 75m
memory: 64Mi
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
restartPolicy: Always
imagePullSecrets:
- name: registry-cryptosky-image-registry

View File

@ -0,0 +1,21 @@
kind: Service
apiVersion: v1
metadata:
labels:
name: LABEL
name: RESOURCE_NAME
namespace: production
spec:
selector:
app: RESOURCE_NAME
ports:
- name: sentiment
port: 9090
protocol: TCP
targetPort: 9090
- name: probes
port: 9091
protocol: TCP
targetPort: 9091
sessionAffinity: None
type: ClusterIP

View File

@ -0,0 +1,95 @@
#!/usr/bin/env groovy
env.APPLICATION_NAME = 'sentiment-analyser'
env.APPLICATION_LABEL = 'utilities'
env.GIT_BRANCH = 'master'
env.GIT_REPOSITORY_PATH = "github.com/andyjk15/${env.APPLICATION_NAME}.git"
env.GIT_REPOSITORY_URL = "https://${env.GIT_REPOSITORY_PATH}"
env.GITHUB_CREDENTIALS_ID = 'Github'
env.DIGITAL_OCEAN = 'registry.digitalocean.com'
env.DIGITAL_OCEAN_REPO = 'cryptosky-image-registry'
env.DOCKER_BUILDER = 'registry.cryptosky.me'
env.DOCKER_REPOSITORY = "${env.DIGITAL_OCEAN}/${env.DIGITAL_OCEAN_REPO}"
env.DOCKER_REPOSITORY_TCP = "tcp://${env.DOCKER_BUILDER}:4243"
env.NAMESPACE = 'production'
env.SLAVE_LABEL = "cryptosky-aio-build"
String get_application_version() {
"1.0.0-b${env.BUILD_NUMBER}"
}
String executeShellScript( String shellPath, String arg1 = '', String arg2 = '', String arg3 = '', String arg4 = '', String arg5 = '' ) {
sh "./${shellPath} ${arg1} ${arg2} ${arg3} ${arg4} ${arg5}"
}
try {
timestamps {
node ("${env.SLAVE_LABEL}") {
stage('Initialise') {
checkout([$class: 'GitSCM', branches: [[name: 'master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: 'Github', url: env.GIT_REPOSITORY_URL]]])
env.APPLICATION_VERSION = get_application_version()
withCredentials(
[usernamePassword(
credentialsId: 'doctl',
passwordVariable: 'DOCTL_TOKEN',
usernameVariable: 'DOCTL_USERNAME'
)]
) {
sh "doctl auth init --access-token ${DOCTL_TOKEN}"
sh "doctl registry login"
sh "doctl kubernetes cluster kubeconfig save cryptosky-cluster"
}
}
stage('Build Image') {
executeShellScript("configuration/scripts/mapVarsToConfigs.sh",
env.DIGITAL_OCEAN,
env.DIGITAL_OCEAN_REPO,
env.APPLICATION_NAME,
env.APPLICATION_VERSION,
env.APPLICATION_LABEL)
withDockerServer([uri: "${env.DOCKER_REPOSITORY_TCP}"]) {
docker.build("${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}")
docker.build("${env.APPLICATION_NAME}:latest")
sh "docker tag ${env.APPLICATION_NAME}:${env.APPLICATION_VERSION} ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}"
sh "docker tag ${env.APPLICATION_NAME}:latest ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest"
sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}"
sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest"
}
}
stage('Tag Repository') {
withCredentials(
[usernamePassword(
credentialsId: env.GITHUB_CREDENTIALS_ID,
passwordVariable: 'GIT_PASSWORD',
usernameVariable: 'GIT_USERNAME'
)]
) {
sh "git tag ${env.APPLICATION_VERSION}"
sh "git push https://${GIT_USERNAME}:${GIT_PASSWORD}@${env.GIT_REPOSITORY_PATH} ${env.APPLICATION_VERSION}"
}
}
stage('Deploy') {
executeShellScript("configuration/scripts/deployToKubernetes.sh",
env.APPLICATION_NAME)
}
}
}
} catch ( exception ) {
currentBuild.result = 'FAILURE'
throw exception
} finally {
currentBuild.result = 'SUCCESS'
}

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
APPLICATION_NAME=$1
kubectl apply -f configuration/kubernetes/deployment.yaml
kubectl apply -f configuration/kubernetes/service.yaml
kubectl rollout status deployment/${APPLICATION_NAME} --namespace=production

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
DIGITAL_OCEAN=$1
DIGITAL_OCEAN_REPO=$2
APPLICATION_NAME=$3
APPLICATION_VERSION=$4
APPLICATION_LABEL=$5
DOCKER_REPOSITORY="${DIGITAL_OCEAN}\/${DIGITAL_OCEAN_REPO}"
sed -i "s/REPOSITORY/${DOCKER_REPOSITORY}/g" configuration/kubernetes/deployment.yaml
sed -i "s/IMAGE/${APPLICATION_NAME}:${APPLICATION_VERSION}/g" configuration/kubernetes/deployment.yaml
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/deployment.yaml
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/deployment.yaml
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/service.yaml
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/service.yaml

0
src/__init__.py Normal file
View File

View File

@ -0,0 +1,113 @@
#!/usr/bin/env python
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from src.utils.jsonLogger import log
class get_sentiment(object):
def __init__(self):
self.analyser = SentimentIntensityAnalyzer()
self.sentiment = {}
self.compound = {}
def get_vader_sentiment(self, sentence):
score = self.analyser.polarity_scores(sentence)
# Split dict into overall sentiment and compound
sentiment = list(score.values())
compound = sentiment[3:]
compound = compound[0]
sentiment = sentiment[:3]
maxScore = max(sentiment)
pos = [i for i, j in enumerate(sentiment) if j == maxScore]
if pos[0] == 1:
log("Tweet is overall Neutral - Score: {}".format(maxScore), 'WARN')
# return neg or pos which ever is higher
if compound < 0:
return { 'Score': score, 'Compound': compound }
else:
return { 'Score': score, 'Compound': compound }
else:
if compound < 0:
return { 'Score': score, 'Compound': compound }
else:
return { 'Score': score, 'Compound': compound }
def set_newSentiment(self):
log("Adding marketing words and sentiment to lexicon...", 'INFO')
new_sentiment = {
'bull' : 2,
'bear' : -2,
'bullish' : 3.5,
'bearish' : -3.5,
'up' : 1.5,
'down' : -1.5,
'high' : 2.9,
'low' : -2.9,
'higher' : 2.8,
'lower' : -2.8,
'absconded' : -2.0,
'maximalists' : -2.4,
'regulate' : -2.3,
'infamous' : 2.2,
'trade higher' : 2.0,
'trade lower' : -2.0,
'revival' : 2.8,
'centralized' : -2.2,
'decentralized' : 2.2,
'centralised' : -2.2,
'decentralised' : 2.2,
'decentralization' : 2.3,
'decentralisation' : 2.3,
'centralization' : -2.3,
'centralisation' : -2.3,
'bans' : -2.6,
'hodl' : 2.8,
'ambiguity' : -2.4,
'revolutionize' : 2.1,
'revolutionise' : 2.1,
'consolidation' : 2.5,
'shorts' : -2.3,
'longs' : 2.3,
'long' : 2.2,
'short' : -2.2,
'shorting' : -2.8,
'grow' : 2.2,
'volatile' : -1.9,
'rally' : 2.9,
'rallying' : 2.7,
'noob' : -1.7,
'noobs' : -1.9,
'innovation' : 1.4,
'bottom' : -1.4,
'top' : 1.4,
'topped' : 1.5,
'bottomed' : -1.5,
'upwards' : 1.7,
'downwards' : -1.7,
'invest' : 2.0,
'raging' : 3.0,
'rocketing' : 3.1,
'swing' : 1.3,
'swinging' : 1.2,
'stake' : 1.4,
'whale' : -2.2,
'whales' : -2.3,
'lull' : -2.1,
'moon' : 2.7,
'choppy' : -1.2,
'buy' : 1.9,
'buying' : 1.7,
'sell' : -1.7,
'selling' : -1.9,
'start selling' : -2.3,
'stop selling' : 1.4,
'start buying' : 2.3,
'stop buying' : -1.4
}
self.analyser.lexicon.update(new_sentiment)

47
src/main.py Normal file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
import sys, json
sys.path.append('/home/sentiment-analyser/')
from threading import Thread
from src.utils.jsonLogger import setup_logging, log
import analyser.sentimentAnalyser as sentimentAnalyser
from flask import Flask, request
from probes.probes import runFlaskProbes
app = Flask(__name__)
analyser = sentimentAnalyser.get_sentiment()
@app.route('/sentiment', methods=['GET'])
def tweetPredict():
tweet = request.args.get('tweet')
log("Receiving Tweet to classify {}".format(tweet), 'INFO')
result = analyser.get_vader_sentiment(tweet)
log("Returning classification result of {}".format(result), 'INFO')
return json.dumps({'result': result, 'tweet': tweet}), 200, {'ContentType':'application/json'}
def callSentimentAnalyser():
analyser.set_newSentiment()
app.run(port=9090, host="0.0.0.0")
def callProbes():
runFlaskProbes()
if __name__ == '__main__':
setup_logging()
log("Starting Spam Filter...", 'INFO')
sys.stdout.flush()
Thread(target=callProbes).start()
Thread(target=callSentimentAnalyser).start()
# Thread(target=callNewsFilter).start()

0
src/probes/__init__.py Normal file
View File

44
src/probes/probes.py Normal file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
from flask import Flask
import json, requests
app = Flask(__name__)
@app.route('/health')
def health():
return json.dumps({'status': 'UP'}), 200, {'ContentType':'application/json'}
@app.route('/readiness')
def readiness():
result = requests.request("GET", 'http://0.0.0.0:9090/senitment?tweet=Boitcoin%20is%20doing%20very%20well')
result = json.loads(result.text)
if result != "" :
return json.dumps({
'status': 'UP',
'app': {
'name': 'CryptoSky Sentiment Analyser',
'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.',
'check_status': 'Success - Call to sentiment endpoint',
'response': result
}
}), 200, {'ContentType': 'application/json'}
else:
return json.dumps({
'status': 'DOWN',
'app': {
'name': 'CryptoSky Sentiment Analyser',
'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.',
'check_status': 'Failure - Call to sentiment endpoint',
'response': result
}
}), 503, {'ContentType': 'application/json'}
def runFlaskProbes():
app.run(port=9091, host="0.0.0.0")
if __name__ == '__main__':
runFlaskProbes()

40
src/utils/jsonLogger.py Normal file
View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
import logging
from pythonjsonlogger import jsonlogger
import datetime
class CustomJsonFormatter(jsonlogger.JsonFormatter):
def add_fields(self, log_record, record, message_dict):
super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)
if not log_record.get('@timestamp'):
# this doesn't use record.created, so it is slightly off
now = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
log_record['@timestamp'] = now
if log_record.get('level'):
log_record['level'] = log_record['level'].upper()
else:
log_record['level'] = record.levelname
def setup_logging(log_level='INFO'):
logger = logging.getLogger(__name__)
logger.propagate = 0
logger.setLevel(log_level)
logHandler = logging.StreamHandler()
formatter = CustomJsonFormatter('%(@timestamp)s %(level)s %(name)s %(message)s')
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
def log(message, level):
logger = logging.getLogger(__name__)
if level == 'INFO':
logger.info(message)
elif level == 'WARN':
logger.warn(message)
elif level == 'ERR':
logger.error(message)
elif level == 'DEBUG':
logger.debug(message)