[08.10.20] Set up, configuration and code for sentiment analyser service with VADAR
This commit is contained in:
parent
f5866f2a3b
commit
19c13d12dc
12
Dockerfile
Normal file
12
Dockerfile
Normal file
@ -0,0 +1,12 @@
|
||||
FROM python:3.7-alpine
|
||||
MAINTAINER Andrew Sotheran <cryptosky.user@gmail.com>
|
||||
RUN apk update && \
|
||||
apk add py-pip libc-dev gcc
|
||||
RUN python -m pip install --upgrade pip
|
||||
RUN pip install utils pycryptodome && \
|
||||
pip install python-dotenv flask python-json-logger vaderSentiment && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
COPY . /home/sentiment-analyser/.
|
||||
EXPOSE 9090
|
||||
EXPOSE 9091
|
||||
CMD ["python", "/home/sentiment-analyser/src/main.py"]
|
||||
79
configuration/kubernetes/deployment.yaml
Normal file
79
configuration/kubernetes/deployment.yaml
Normal file
@ -0,0 +1,79 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
annotations:
|
||||
linkerd.io/inject: enabled
|
||||
labels:
|
||||
name: LABEL
|
||||
name: RESOURCE_NAME
|
||||
namespace: production
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: RESOURCE_NAME
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 1
|
||||
maxUnavailable: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
linkerd.io/inject: enabled
|
||||
labels:
|
||||
app: RESOURCE_NAME
|
||||
spec:
|
||||
containers:
|
||||
- image: REPOSITORY/IMAGE
|
||||
name: RESOURCE_NAME
|
||||
env:
|
||||
- name: KUBERNETES_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: CONTAINER_CORE_LIMIT
|
||||
valueFrom:
|
||||
resourceFieldRef:
|
||||
resource: limits.cpu
|
||||
- name: CONTAINER_MAX_MEMORY
|
||||
valueFrom:
|
||||
resourceFieldRef:
|
||||
resource: limits.memory
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: RESOURCE_NAME
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 9091
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 1
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
port: 9091
|
||||
path: /readiness
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 1
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
cpu: 32m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 75m
|
||||
memory: 64Mi
|
||||
securityContext:
|
||||
capabilities:
|
||||
add:
|
||||
- NET_ADMIN
|
||||
- NET_RAW
|
||||
restartPolicy: Always
|
||||
imagePullSecrets:
|
||||
- name: registry-cryptosky-image-registry
|
||||
21
configuration/kubernetes/service.yaml
Normal file
21
configuration/kubernetes/service.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
labels:
|
||||
name: LABEL
|
||||
name: RESOURCE_NAME
|
||||
namespace: production
|
||||
spec:
|
||||
selector:
|
||||
app: RESOURCE_NAME
|
||||
ports:
|
||||
- name: sentiment
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: 9090
|
||||
- name: probes
|
||||
port: 9091
|
||||
protocol: TCP
|
||||
targetPort: 9091
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
95
configuration/pipelines/build.groovy
Normal file
95
configuration/pipelines/build.groovy
Normal file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env groovy
|
||||
|
||||
env.APPLICATION_NAME = 'sentiment-analyser'
|
||||
env.APPLICATION_LABEL = 'utilities'
|
||||
env.GIT_BRANCH = 'master'
|
||||
env.GIT_REPOSITORY_PATH = "github.com/andyjk15/${env.APPLICATION_NAME}.git"
|
||||
env.GIT_REPOSITORY_URL = "https://${env.GIT_REPOSITORY_PATH}"
|
||||
env.GITHUB_CREDENTIALS_ID = 'Github'
|
||||
env.DIGITAL_OCEAN = 'registry.digitalocean.com'
|
||||
env.DIGITAL_OCEAN_REPO = 'cryptosky-image-registry'
|
||||
env.DOCKER_BUILDER = 'registry.cryptosky.me'
|
||||
env.DOCKER_REPOSITORY = "${env.DIGITAL_OCEAN}/${env.DIGITAL_OCEAN_REPO}"
|
||||
env.DOCKER_REPOSITORY_TCP = "tcp://${env.DOCKER_BUILDER}:4243"
|
||||
|
||||
env.NAMESPACE = 'production'
|
||||
env.SLAVE_LABEL = "cryptosky-aio-build"
|
||||
|
||||
|
||||
String get_application_version() {
|
||||
"1.0.0-b${env.BUILD_NUMBER}"
|
||||
}
|
||||
|
||||
String executeShellScript( String shellPath, String arg1 = '', String arg2 = '', String arg3 = '', String arg4 = '', String arg5 = '' ) {
|
||||
sh "./${shellPath} ${arg1} ${arg2} ${arg3} ${arg4} ${arg5}"
|
||||
}
|
||||
|
||||
try {
|
||||
timestamps {
|
||||
node ("${env.SLAVE_LABEL}") {
|
||||
stage('Initialise') {
|
||||
checkout([$class: 'GitSCM', branches: [[name: 'master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: 'Github', url: env.GIT_REPOSITORY_URL]]])
|
||||
|
||||
env.APPLICATION_VERSION = get_application_version()
|
||||
|
||||
withCredentials(
|
||||
[usernamePassword(
|
||||
credentialsId: 'doctl',
|
||||
passwordVariable: 'DOCTL_TOKEN',
|
||||
usernameVariable: 'DOCTL_USERNAME'
|
||||
)]
|
||||
) {
|
||||
sh "doctl auth init --access-token ${DOCTL_TOKEN}"
|
||||
sh "doctl registry login"
|
||||
sh "doctl kubernetes cluster kubeconfig save cryptosky-cluster"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build Image') {
|
||||
|
||||
executeShellScript("configuration/scripts/mapVarsToConfigs.sh",
|
||||
env.DIGITAL_OCEAN,
|
||||
env.DIGITAL_OCEAN_REPO,
|
||||
env.APPLICATION_NAME,
|
||||
env.APPLICATION_VERSION,
|
||||
env.APPLICATION_LABEL)
|
||||
|
||||
withDockerServer([uri: "${env.DOCKER_REPOSITORY_TCP}"]) {
|
||||
docker.build("${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}")
|
||||
docker.build("${env.APPLICATION_NAME}:latest")
|
||||
|
||||
sh "docker tag ${env.APPLICATION_NAME}:${env.APPLICATION_VERSION} ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}"
|
||||
sh "docker tag ${env.APPLICATION_NAME}:latest ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest"
|
||||
|
||||
sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}"
|
||||
sh "docker push ${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
stage('Tag Repository') {
|
||||
|
||||
withCredentials(
|
||||
[usernamePassword(
|
||||
credentialsId: env.GITHUB_CREDENTIALS_ID,
|
||||
passwordVariable: 'GIT_PASSWORD',
|
||||
usernameVariable: 'GIT_USERNAME'
|
||||
)]
|
||||
) {
|
||||
sh "git tag ${env.APPLICATION_VERSION}"
|
||||
sh "git push https://${GIT_USERNAME}:${GIT_PASSWORD}@${env.GIT_REPOSITORY_PATH} ${env.APPLICATION_VERSION}"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Deploy') {
|
||||
executeShellScript("configuration/scripts/deployToKubernetes.sh",
|
||||
env.APPLICATION_NAME)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( exception ) {
|
||||
currentBuild.result = 'FAILURE'
|
||||
throw exception
|
||||
} finally {
|
||||
currentBuild.result = 'SUCCESS'
|
||||
}
|
||||
8
configuration/scripts/deployToKubernetes.sh
Executable file
8
configuration/scripts/deployToKubernetes.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
APPLICATION_NAME=$1
|
||||
|
||||
kubectl apply -f configuration/kubernetes/deployment.yaml
|
||||
kubectl apply -f configuration/kubernetes/service.yaml
|
||||
|
||||
kubectl rollout status deployment/${APPLICATION_NAME} --namespace=production
|
||||
17
configuration/scripts/mapVarsToConfigs.sh
Executable file
17
configuration/scripts/mapVarsToConfigs.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
DIGITAL_OCEAN=$1
|
||||
DIGITAL_OCEAN_REPO=$2
|
||||
APPLICATION_NAME=$3
|
||||
APPLICATION_VERSION=$4
|
||||
APPLICATION_LABEL=$5
|
||||
|
||||
DOCKER_REPOSITORY="${DIGITAL_OCEAN}\/${DIGITAL_OCEAN_REPO}"
|
||||
|
||||
sed -i "s/REPOSITORY/${DOCKER_REPOSITORY}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/IMAGE/${APPLICATION_NAME}:${APPLICATION_VERSION}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/deployment.yaml
|
||||
|
||||
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/service.yaml
|
||||
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/service.yaml
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
113
src/analyser/sentimentAnalyser.py
Normal file
113
src/analyser/sentimentAnalyser.py
Normal file
@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
from src.utils.jsonLogger import log
|
||||
|
||||
class get_sentiment(object):
|
||||
|
||||
def __init__(self):
|
||||
self.analyser = SentimentIntensityAnalyzer()
|
||||
self.sentiment = {}
|
||||
self.compound = {}
|
||||
|
||||
def get_vader_sentiment(self, sentence):
|
||||
score = self.analyser.polarity_scores(sentence)
|
||||
|
||||
# Split dict into overall sentiment and compound
|
||||
sentiment = list(score.values())
|
||||
compound = sentiment[3:]
|
||||
compound = compound[0]
|
||||
|
||||
sentiment = sentiment[:3]
|
||||
|
||||
maxScore = max(sentiment)
|
||||
|
||||
pos = [i for i, j in enumerate(sentiment) if j == maxScore]
|
||||
|
||||
if pos[0] == 1:
|
||||
log("Tweet is overall Neutral - Score: {}".format(maxScore), 'WARN')
|
||||
# return neg or pos which ever is higher
|
||||
if compound < 0:
|
||||
return { 'Score': score, 'Compound': compound }
|
||||
else:
|
||||
return { 'Score': score, 'Compound': compound }
|
||||
else:
|
||||
if compound < 0:
|
||||
return { 'Score': score, 'Compound': compound }
|
||||
else:
|
||||
return { 'Score': score, 'Compound': compound }
|
||||
|
||||
def set_newSentiment(self):
|
||||
log("Adding marketing words and sentiment to lexicon...", 'INFO')
|
||||
new_sentiment = {
|
||||
'bull' : 2,
|
||||
'bear' : -2,
|
||||
'bullish' : 3.5,
|
||||
'bearish' : -3.5,
|
||||
'up' : 1.5,
|
||||
'down' : -1.5,
|
||||
'high' : 2.9,
|
||||
'low' : -2.9,
|
||||
'higher' : 2.8,
|
||||
'lower' : -2.8,
|
||||
'absconded' : -2.0,
|
||||
'maximalists' : -2.4,
|
||||
'regulate' : -2.3,
|
||||
'infamous' : 2.2,
|
||||
'trade higher' : 2.0,
|
||||
'trade lower' : -2.0,
|
||||
'revival' : 2.8,
|
||||
'centralized' : -2.2,
|
||||
'decentralized' : 2.2,
|
||||
'centralised' : -2.2,
|
||||
'decentralised' : 2.2,
|
||||
'decentralization' : 2.3,
|
||||
'decentralisation' : 2.3,
|
||||
'centralization' : -2.3,
|
||||
'centralisation' : -2.3,
|
||||
'bans' : -2.6,
|
||||
'hodl' : 2.8,
|
||||
'ambiguity' : -2.4,
|
||||
'revolutionize' : 2.1,
|
||||
'revolutionise' : 2.1,
|
||||
'consolidation' : 2.5,
|
||||
'shorts' : -2.3,
|
||||
'longs' : 2.3,
|
||||
'long' : 2.2,
|
||||
'short' : -2.2,
|
||||
'shorting' : -2.8,
|
||||
'grow' : 2.2,
|
||||
'volatile' : -1.9,
|
||||
'rally' : 2.9,
|
||||
'rallying' : 2.7,
|
||||
'noob' : -1.7,
|
||||
'noobs' : -1.9,
|
||||
'innovation' : 1.4,
|
||||
'bottom' : -1.4,
|
||||
'top' : 1.4,
|
||||
'topped' : 1.5,
|
||||
'bottomed' : -1.5,
|
||||
'upwards' : 1.7,
|
||||
'downwards' : -1.7,
|
||||
'invest' : 2.0,
|
||||
'raging' : 3.0,
|
||||
'rocketing' : 3.1,
|
||||
'swing' : 1.3,
|
||||
'swinging' : 1.2,
|
||||
'stake' : 1.4,
|
||||
'whale' : -2.2,
|
||||
'whales' : -2.3,
|
||||
'lull' : -2.1,
|
||||
'moon' : 2.7,
|
||||
'choppy' : -1.2,
|
||||
'buy' : 1.9,
|
||||
'buying' : 1.7,
|
||||
'sell' : -1.7,
|
||||
'selling' : -1.9,
|
||||
'start selling' : -2.3,
|
||||
'stop selling' : 1.4,
|
||||
'start buying' : 2.3,
|
||||
'stop buying' : -1.4
|
||||
}
|
||||
|
||||
self.analyser.lexicon.update(new_sentiment)
|
||||
47
src/main.py
Normal file
47
src/main.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, json
|
||||
sys.path.append('/home/sentiment-analyser/')
|
||||
|
||||
from threading import Thread
|
||||
|
||||
from src.utils.jsonLogger import setup_logging, log
|
||||
import analyser.sentimentAnalyser as sentimentAnalyser
|
||||
|
||||
from flask import Flask, request
|
||||
|
||||
from probes.probes import runFlaskProbes
|
||||
|
||||
app = Flask(__name__)
|
||||
analyser = sentimentAnalyser.get_sentiment()
|
||||
|
||||
@app.route('/sentiment', methods=['GET'])
|
||||
def tweetPredict():
|
||||
tweet = request.args.get('tweet')
|
||||
|
||||
log("Receiving Tweet to classify {}".format(tweet), 'INFO')
|
||||
|
||||
result = analyser.get_vader_sentiment(tweet)
|
||||
|
||||
log("Returning classification result of {}".format(result), 'INFO')
|
||||
|
||||
return json.dumps({'result': result, 'tweet': tweet}), 200, {'ContentType':'application/json'}
|
||||
|
||||
def callSentimentAnalyser():
|
||||
analyser.set_newSentiment()
|
||||
|
||||
app.run(port=9090, host="0.0.0.0")
|
||||
|
||||
def callProbes():
|
||||
runFlaskProbes()
|
||||
|
||||
if __name__ == '__main__':
|
||||
setup_logging()
|
||||
|
||||
log("Starting Spam Filter...", 'INFO')
|
||||
sys.stdout.flush()
|
||||
|
||||
Thread(target=callProbes).start()
|
||||
|
||||
Thread(target=callSentimentAnalyser).start()
|
||||
# Thread(target=callNewsFilter).start()
|
||||
0
src/probes/__init__.py
Normal file
0
src/probes/__init__.py
Normal file
44
src/probes/probes.py
Normal file
44
src/probes/probes.py
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from flask import Flask
|
||||
|
||||
import json, requests
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
return json.dumps({'status': 'UP'}), 200, {'ContentType':'application/json'}
|
||||
|
||||
@app.route('/readiness')
|
||||
def readiness():
|
||||
|
||||
result = requests.request("GET", 'http://0.0.0.0:9090/senitment?tweet=Boitcoin%20is%20doing%20very%20well')
|
||||
result = json.loads(result.text)
|
||||
|
||||
if result != "" :
|
||||
return json.dumps({
|
||||
'status': 'UP',
|
||||
'app': {
|
||||
'name': 'CryptoSky Sentiment Analyser',
|
||||
'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.',
|
||||
'check_status': 'Success - Call to sentiment endpoint',
|
||||
'response': result
|
||||
}
|
||||
}), 200, {'ContentType': 'application/json'}
|
||||
else:
|
||||
return json.dumps({
|
||||
'status': 'DOWN',
|
||||
'app': {
|
||||
'name': 'CryptoSky Sentiment Analyser',
|
||||
'description': 'Projects Sentiment Analyser service that classifies whether the tweet is overall positive or negative and sends back results.',
|
||||
'check_status': 'Failure - Call to sentiment endpoint',
|
||||
'response': result
|
||||
}
|
||||
}), 503, {'ContentType': 'application/json'}
|
||||
|
||||
def runFlaskProbes():
|
||||
app.run(port=9091, host="0.0.0.0")
|
||||
|
||||
if __name__ == '__main__':
|
||||
runFlaskProbes()
|
||||
40
src/utils/jsonLogger.py
Normal file
40
src/utils/jsonLogger.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import logging
|
||||
from pythonjsonlogger import jsonlogger
|
||||
|
||||
import datetime
|
||||
|
||||
class CustomJsonFormatter(jsonlogger.JsonFormatter):
|
||||
def add_fields(self, log_record, record, message_dict):
|
||||
super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)
|
||||
if not log_record.get('@timestamp'):
|
||||
# this doesn't use record.created, so it is slightly off
|
||||
now = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
log_record['@timestamp'] = now
|
||||
if log_record.get('level'):
|
||||
log_record['level'] = log_record['level'].upper()
|
||||
else:
|
||||
log_record['level'] = record.levelname
|
||||
|
||||
def setup_logging(log_level='INFO'):
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.propagate = 0
|
||||
logger.setLevel(log_level)
|
||||
logHandler = logging.StreamHandler()
|
||||
|
||||
formatter = CustomJsonFormatter('%(@timestamp)s %(level)s %(name)s %(message)s')
|
||||
|
||||
logHandler.setFormatter(formatter)
|
||||
logger.addHandler(logHandler)
|
||||
|
||||
def log(message, level):
|
||||
logger = logging.getLogger(__name__)
|
||||
if level == 'INFO':
|
||||
logger.info(message)
|
||||
elif level == 'WARN':
|
||||
logger.warn(message)
|
||||
elif level == 'ERR':
|
||||
logger.error(message)
|
||||
elif level == 'DEBUG':
|
||||
logger.debug(message)
|
||||
Loading…
x
Reference in New Issue
Block a user