7 changed files with 42 additions and 233 deletions
--- a/configuration/kubernetes/deployment.yaml
+++ b/configuration/kubernetes/deployment.yaml
@ -119,10 +119,10 @@ spec:
        imagePullPolicy: Always
        resources:
          requests:
-            cpu: 100m
-            memory: 64Mi
+            cpu: 32m
+            memory: 32Mi
          limits:
-            cpu: 500m
+            cpu: 150m
            memory: 256Mi
        securityContext:
          capabilities:
--- a/src/tweets/collector.py
+++ b/src/tweets/collector.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python

-import os, sys, json, uuid
+import os, sys, json

 from datetime import datetime, timedelta
 from time import sleep, time
@ -17,14 +17,12 @@ from src.utils.spamFilter import callSpamFilter
 from src.utils.sentimentAnalyser import callSentimentAnalyser
 from src.utils.activemqConnect import activeMQSender
 from src.utils.jsonLogger import log
-from src.utils.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords

 from http.client import IncompleteRead
 from urllib3.exceptions import ProtocolError

 hourStack = []
 dumpStack = []
-processStack = []

 class keys():

@ -34,20 +32,20 @@ class keys():
        self.access_token = os.getenv("ACCESS_TOKEN")
        self.access_secret = os.getenv("ACCESS_SECRET")

-def sendToArtemis(syncId, pos, neu, neg, compound, type):
+def sendToArtemis(pos, neu, neg, compound, type):

    timestamp = datetime.now() + timedelta(hours=1)
    strippedTimestamp = timestamp.replace(minute=0, second=0, microsecond=0)
    timestamp = strippedTimestamp.strftime('%Y-%m-%dT%H:%M:%S')

-    message = { "timestamp" : timestamp, "syncId": str(syncId), "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }
+    message = { "timestamp" : timestamp, "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }

    messageJson = json.dumps(message, indent = 4)

-    log("Sending message to TweetSave queue for SyncId [{}]".format(syncId), 'INFO')
+    log("Sending message to TweetSave queue", 'INFO')
    log("Message: {}".format(message), 'INFO')

-    activeMQSender(messageJson, syncId)
+    activeMQSender(messageJson)

 class Streamer():

@ -56,7 +54,6 @@ class Streamer():

    def stream_tweets(self, hashtag):
        listener = Listener(hashtag)
-
        auth = OAuthHandler(keys().api_key, keys().api_secret)

        log("Authorising with twitter API...", 'INFO')
@ -69,13 +66,11 @@ class Streamer():

        while True:
            try:
-                stream = Stream(auth, listener=listener, tweet_mode='extended')
+                stream = Stream(auth=api.auth, listener=listener, tweet_mode='extended')
                stream.filter(languages=["en"], track=hashtag)
            except IncompleteRead:
-                log("Incomplete Read Error", 'ERR')
                continue
            except ProtocolError:
-                log("Protocol Error", 'ERR')
                continue

 class Listener(StreamListener):
@ -87,47 +82,37 @@ class Listener(StreamListener):

    def on_data(self, data):

+        log("Received Tweet...", 'INFO')
+
        if (time() - self.start_time) < self.limit:
            data = json.loads(data)

-            log(len(dumpStack), 'INFO')
-
            # Check if tweet is a retweet
            if 'retweeted_status' in data:
                if 'extended_tweet' in data['retweeted_status']:
                    #if tweet is over the 140 word limit
                    text = data['retweeted_status']['extended_tweet']['full_text']
-                    text = filterOutTweetsWithNoneWhitelistedWords(text)
                    dumpStack.append({'type': self.hashtag, 'tweet': text})
                else:
                    text = data['retweeted_status']['text']
-                    text = filterOutTweetsWithNoneWhitelistedWords(text)
                    dumpStack.append({'type': self.hashtag, 'tweet': text})
            else:
                # Else if a normal Tweeet
                if 'extended_tweet' in data:
                    # If tweet is over 140 word limit
                    text = data['extended_tweet']['full_text']
-                    text = filterOutTweetsWithNoneWhitelistedWords(text)
                    dumpStack.append({'type': self.hashtag, 'tweet': text})

-def processTweet(syncId):
+def processTweet():

-    log(len(dumpStack), 'INFO')
-
-    processStack = dumpStack.copy()
+    processStack = dumpStack
    dumpStack.clear()

-    # log("Processing [{}] Tweet...".format(text), 'INFO')
-
    if len(processStack) != 0:
        for tweet in processStack:
-
            removedLines = fixLines(tweet["tweet"])
-
            removedSpecialChars = cleanTweet(removedLines)
            removedSpacing = removeSpacing(removedSpecialChars[0])
-
            tweetLength = checkLength(removedSpacing)
            if tweetLength == True:

@ -141,11 +126,10 @@ def processTweet(syncId):

                    cleanedTweet = tweetText + ' ' + removedSpecialChars[1]

-                    if callSpamFilter(cleanedTweet, syncId) != 'spam':
+                    if callSpamFilter(cleanedTweet) != 'spam':
+                        pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet)

-                        pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet, syncId)
-
-                        if compound != 0.0 and neu <= 0.6:
+                        if compound != 0.0:
                            hourTweet = {'pos': pos, 'neu': neu, 'neg': neg, 'compound': compound, 'type': tweet["type"]}

                            hourStack.append(hourTweet)
@ -177,40 +161,28 @@ def createHourJob():
    log("Creating hour job...", 'INFO')
    schedule.clear("sendToArtemis")
    ovPos, ovNeu, ovNeg, ovCompound = 0, 0, 0, 0
-    type = ""

    global timeF
    timeF = timeFunction()

-    syncId = uuid.uuid4()
+    processTweet()

-    processTweet(syncId)
-
-    processStack = hourStack.copy()
-    hourStack.clear()
-
-    log("Extracting sentiment scores...", 'INFO')
-
-    if len(processStack) != 0:
-        log("Process stack size is :: [{}]".format(len(processStack)), 'INFO')
-        for item in processStack:
+    if len(hourStack) != 0:
+        for item in hourStack:
            ovPos = ovPos + item['pos']
            ovNeu = ovNeu + item['neu']
            ovNeg = ovNeg + item['neg']
            ovCompound = ovCompound + item['compound']
            type = item["type"]

-        pos = round(ovPos/len(processStack), 3)
-        neu = round(ovNeu/len(processStack), 3)
-        neg = round(ovNeg/len(processStack), 3)
-        compound = round(ovCompound/len(processStack), 3)
+        pos = round(ovPos/len(hourStack), 3)
+        neu = round(ovNeu/len(hourStack), 3)
+        neg = round(ovNeg/len(hourStack), 3)
+        compound = round(ovCompound/len(hourStack), 3)

-        if type == "bitcoin":
-            type = 'btc_usd'
+        hourStack.clear()

-        processStack.clear()
-
-        sendToArtemis(syncId, pos, neu, neg, compound, type)
+        sendToArtemis(pos, neu, neg, compound, type)
    else:
        log("Stack is empty", 'WARN')

@ -224,7 +196,7 @@ def collectorMain(hashtag):
    for i in range(len(hashtag)):
        Thread(target=collector, args=[hashtag[i]]).start()

-    sleep(2)
+    sleep(5)
    createHourJob()

    while True:
--- a/src/utils/activemqConnect.py
+++ b/src/utils/activemqConnect.py
@ -17,20 +17,14 @@ class keys():
    def returnKeys(self):
        return self.addr, self.port, self.amqU, self.amqP

-def activeMQSender(message, syncId):
+def activeMQSender(message):
    addr, port, mqUser, mqPass = keys().returnKeys()

    log("Attempting Connection to Artemis...", 'INFO')
    con = stomp.Connection([(addr, port)], auto_content_length=False)
    con.connect( mqUser, mqPass, wait=True)

-    con.send("TweetSave",
-             message,
-             content_type="application/json",
-             headers={
-                 "Content-Type":"application/json",
-                 "X-CRYPTO-Sync-ID":syncId
-             })
+    con.send("TweetSave", message, content_type="application/json", headers={"Content-Type":"application/json"})

    con.disconnect()

--- a/src/utils/jsonLogger.py
+++ b/src/utils/jsonLogger.py
@ -28,13 +28,13 @@ def setup_logging(log_level='INFO'):
    logHandler.setFormatter(formatter)
    logger.addHandler(logHandler)

-def log(message, level, syncId=""):
+def log(message, level):
    logger = logging.getLogger(__name__)
    if level == 'INFO':
-        logger.info(message, extra={"X-CRYPTO-Sync-ID" : syncId})
+        logger.info(message)
    elif level == 'WARN':
-        logger.warn(message, extra={"X-CRYPTO-Sync-ID" : syncId})
+        logger.warn(message)
    elif level == 'ERR':
-        logger.error(message, extra={"X-CRYPTO-Sync-ID" : syncId})
+        logger.error(message)
    elif level == 'DEBUG':
-        logger.debug(message, extra={"X-CRYPTO-Sync-ID" : syncId})
+        logger.debug(message)
--- a/src/utils/sentimentAnalyser.py
+++ b/src/utils/sentimentAnalyser.py
@ -9,14 +9,11 @@ class keys():
    def __init__(self):
        self.sentiment_analyser_uri = os.getenv("SENTIMENT_URL")

-def callSentimentAnalyser(tweet, syncId):
-    headers = {
-        "content-type":"text",
-        "X-CRYPTO-Sync-ID" : str(syncId)
-    }
+def callSentimentAnalyser(tweet):
+    log("Calling Sentiment Analyser for [{}]".format(tweet), 'INFO')
    try:
        uri = keys().sentiment_analyser_uri + "/sentiment?tweet="+tweet
-        response = requests.request("GET", url=uri, headers=headers)
+        response = requests.request("GET", uri)

        response = json.loads(response.text)

@ -24,5 +21,5 @@ def callSentimentAnalyser(tweet, syncId):

        return scores["pos"], scores["neu"], scores["neg"], scores["compound"]
    except:
-        log("Could not call Sentiment Analyser Service with syncId of [{}]".format(syncId), 'ERR', syncId)
+        log("Could not call Sentiment Analyser Service", 'ERR')
        return 0, 0, 0, 0
--- a/src/utils/spamFilter.py
+++ b/src/utils/spamFilter.py
@ -9,18 +9,16 @@ class keys():
    def __init__(self):
        self.spamFilter_uri = os.getenv("FILTER_URL")

-def callSpamFilter(tweet, syncId):
-    headers = {
-        "content-type":"text",
-        "X-CRYPTO-Sync-ID" : str(syncId)
-    }
+def callSpamFilter(tweet):
    try:
        uri = keys().spamFilter_uri + "/predict?tweet="+tweet
-        response = requests.request("GET", url=uri, headers=headers)
+        response = requests.request("GET", uri)

        response = json.loads(response.text)

+        log("Spam Filter result for [{}] is [{}]".format(tweet, response["result"]), 'INFO')
+
        return response["result"]
    except:
-        log("Could not call spam filter service with syncId of [{}]".format(syncId), 'ERR', syncId)
+        log("Could not call spam filter service", 'ERR')
        return ""
--- a/src/utils/whitelistedWords.py
+++ b/src/utils/whitelistedWords.py
@ -1,152 +0,0 @@
-#!/usr/bin/env python
-
-whitelist = [
-    "bull",
-    "bear",
-    "bullish",
-    "bearish",
-    "up",
-    "down",
-    "high",
-    "low",
-    "higher",
-    "lower",
-    "absconded",
-    "maximalists",
-    "regulate",
-    "infamous",
-    "tradehigher",
-    "tradelower",
-    "revival",
-    "centralized",
-    "decentralized",
-    "centralised",
-    "decentralised",
-    "decentralization",
-    "decentralisation",
-    "centralization",
-    "centralisation",
-    "bans",
-    "hodl",
-    "ambiguity",
-    "revolutionize",
-    "revolutionise",
-    "consolidation",
-    "shorts",
-    "longs",
-    "long",
-    "short",
-    "shorting",
-    "grow",
-    "volatile",
-    "rally",
-    "rallying",
-    "noob",
-    "noobs",
-    "innovation",
-    "bottom",
-    "top",
-    "topped",
-    "bottomed",
-    "upwards",
-    "downwards",
-    "invest",
-    "raging",
-    "rocketing",
-    "swing",
-    "swinging",
-    "stake",
-    "whale",
-    "whales",
-    "lull",
-    "moon",
-    "choppy",
-    "buy",
-    "buying",
-    "sell",
-    "selling",
-    "startselling",
-    "stopselling",
-    "startbuying",
-    "stopbuying",
-    "bitcoin",
-    "btc",
-    "eth",
-    "xmr",
-    "xrp",
-    "ripple",
-    "block",
-    "reward",
-    "airdrop",
-    "drop",
-    "raise",
-    "stack",
-    "stake",
-    "invest",
-    "pull",
-    "push",
-    "token",
-    "sale",
-    "unhappy",
-    "happy",
-    "expert",
-    "novice"
-    "passed",
-    "mark",
-    "decline",
-    "incline",
-    "fees",
-    "crypto",
-    "wallet",
-    "price",
-    "history",
-    "reached",
-    "upward",
-    "downward",
-    "trading",
-    "mining",
-    "defi",
-    "finance",
-    "blockchain",
-    "interest",
-    "alt",
-    "alts",
-    "fiat",
-    "fiat",
-    "currency",
-    "currencies",
-    "wealth",
-    "hype",
-    "hyped",
-    "achievement",
-    "platform",
-    "incremental",
-    "increment",
-    "decrement",
-    "decremental",
-    "success",
-    "loss",
-    "win",
-    "lose",
-    "worth",
-    "strongest",
-    "weakest",
-    "strong",
-    "weak",
-    "trade",
-    "popping",
-    "sucking",
-    "shard",
-    "sharding",
-    "industry",
-    "powerful",
-    "better",
-    "worse"
-]
-
-def filterOutTweetsWithNoneWhitelistedWords(text):
-    if any(x in text for x in whitelist):
-        return text
-    else:
-        # log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN')
-        return ""