Compare commits

..

No commits in common. "master" and "1.0.0-b23" have entirely different histories.

7 changed files with 42 additions and 233 deletions

View File

@ -119,10 +119,10 @@ spec:
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 64Mi
cpu: 32m
memory: 32Mi
limits:
cpu: 500m
cpu: 150m
memory: 256Mi
securityContext:
capabilities:

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
import os, sys, json, uuid
import os, sys, json
from datetime import datetime, timedelta
from time import sleep, time
@ -17,14 +17,12 @@ from src.utils.spamFilter import callSpamFilter
from src.utils.sentimentAnalyser import callSentimentAnalyser
from src.utils.activemqConnect import activeMQSender
from src.utils.jsonLogger import log
from src.utils.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords
from http.client import IncompleteRead
from urllib3.exceptions import ProtocolError
hourStack = []
dumpStack = []
processStack = []
class keys():
@ -34,20 +32,20 @@ class keys():
self.access_token = os.getenv("ACCESS_TOKEN")
self.access_secret = os.getenv("ACCESS_SECRET")
def sendToArtemis(syncId, pos, neu, neg, compound, type):
def sendToArtemis(pos, neu, neg, compound, type):
timestamp = datetime.now() + timedelta(hours=1)
strippedTimestamp = timestamp.replace(minute=0, second=0, microsecond=0)
timestamp = strippedTimestamp.strftime('%Y-%m-%dT%H:%M:%S')
message = { "timestamp" : timestamp, "syncId": str(syncId), "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }
message = { "timestamp" : timestamp, "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }
messageJson = json.dumps(message, indent = 4)
log("Sending message to TweetSave queue for SyncId [{}]".format(syncId), 'INFO')
log("Sending message to TweetSave queue", 'INFO')
log("Message: {}".format(message), 'INFO')
activeMQSender(messageJson, syncId)
activeMQSender(messageJson)
class Streamer():
@ -56,7 +54,6 @@ class Streamer():
def stream_tweets(self, hashtag):
listener = Listener(hashtag)
auth = OAuthHandler(keys().api_key, keys().api_secret)
log("Authorising with twitter API...", 'INFO')
@ -69,13 +66,11 @@ class Streamer():
while True:
try:
stream = Stream(auth, listener=listener, tweet_mode='extended')
stream = Stream(auth=api.auth, listener=listener, tweet_mode='extended')
stream.filter(languages=["en"], track=hashtag)
except IncompleteRead:
log("Incomplete Read Error", 'ERR')
continue
except ProtocolError:
log("Protocol Error", 'ERR')
continue
class Listener(StreamListener):
@ -87,47 +82,37 @@ class Listener(StreamListener):
def on_data(self, data):
log("Received Tweet...", 'INFO')
if (time() - self.start_time) < self.limit:
data = json.loads(data)
log(len(dumpStack), 'INFO')
# Check if tweet is a retweet
if 'retweeted_status' in data:
if 'extended_tweet' in data['retweeted_status']:
#if tweet is over the 140 word limit
text = data['retweeted_status']['extended_tweet']['full_text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
else:
text = data['retweeted_status']['text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
else:
# Else if a normal Tweeet
if 'extended_tweet' in data:
# If tweet is over 140 word limit
text = data['extended_tweet']['full_text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
def processTweet(syncId):
def processTweet():
log(len(dumpStack), 'INFO')
processStack = dumpStack.copy()
processStack = dumpStack
dumpStack.clear()
# log("Processing [{}] Tweet...".format(text), 'INFO')
if len(processStack) != 0:
for tweet in processStack:
removedLines = fixLines(tweet["tweet"])
removedSpecialChars = cleanTweet(removedLines)
removedSpacing = removeSpacing(removedSpecialChars[0])
tweetLength = checkLength(removedSpacing)
if tweetLength == True:
@ -141,11 +126,10 @@ def processTweet(syncId):
cleanedTweet = tweetText + ' ' + removedSpecialChars[1]
if callSpamFilter(cleanedTweet, syncId) != 'spam':
if callSpamFilter(cleanedTweet) != 'spam':
pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet)
pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet, syncId)
if compound != 0.0 and neu <= 0.6:
if compound != 0.0:
hourTweet = {'pos': pos, 'neu': neu, 'neg': neg, 'compound': compound, 'type': tweet["type"]}
hourStack.append(hourTweet)
@ -177,40 +161,28 @@ def createHourJob():
log("Creating hour job...", 'INFO')
schedule.clear("sendToArtemis")
ovPos, ovNeu, ovNeg, ovCompound = 0, 0, 0, 0
type = ""
global timeF
timeF = timeFunction()
syncId = uuid.uuid4()
processTweet()
processTweet(syncId)
processStack = hourStack.copy()
hourStack.clear()
log("Extracting sentiment scores...", 'INFO')
if len(processStack) != 0:
log("Process stack size is :: [{}]".format(len(processStack)), 'INFO')
for item in processStack:
if len(hourStack) != 0:
for item in hourStack:
ovPos = ovPos + item['pos']
ovNeu = ovNeu + item['neu']
ovNeg = ovNeg + item['neg']
ovCompound = ovCompound + item['compound']
type = item["type"]
pos = round(ovPos/len(processStack), 3)
neu = round(ovNeu/len(processStack), 3)
neg = round(ovNeg/len(processStack), 3)
compound = round(ovCompound/len(processStack), 3)
pos = round(ovPos/len(hourStack), 3)
neu = round(ovNeu/len(hourStack), 3)
neg = round(ovNeg/len(hourStack), 3)
compound = round(ovCompound/len(hourStack), 3)
if type == "bitcoin":
type = 'btc_usd'
hourStack.clear()
processStack.clear()
sendToArtemis(syncId, pos, neu, neg, compound, type)
sendToArtemis(pos, neu, neg, compound, type)
else:
log("Stack is empty", 'WARN')
@ -224,7 +196,7 @@ def collectorMain(hashtag):
for i in range(len(hashtag)):
Thread(target=collector, args=[hashtag[i]]).start()
sleep(2)
sleep(5)
createHourJob()
while True:

View File

@ -17,20 +17,14 @@ class keys():
def returnKeys(self):
return self.addr, self.port, self.amqU, self.amqP
def activeMQSender(message, syncId):
def activeMQSender(message):
addr, port, mqUser, mqPass = keys().returnKeys()
log("Attempting Connection to Artemis...", 'INFO')
con = stomp.Connection([(addr, port)], auto_content_length=False)
con.connect( mqUser, mqPass, wait=True)
con.send("TweetSave",
message,
content_type="application/json",
headers={
"Content-Type":"application/json",
"X-CRYPTO-Sync-ID":syncId
})
con.send("TweetSave", message, content_type="application/json", headers={"Content-Type":"application/json"})
con.disconnect()

View File

@ -28,13 +28,13 @@ def setup_logging(log_level='INFO'):
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
def log(message, level, syncId=""):
def log(message, level):
logger = logging.getLogger(__name__)
if level == 'INFO':
logger.info(message, extra={"X-CRYPTO-Sync-ID" : syncId})
logger.info(message)
elif level == 'WARN':
logger.warn(message, extra={"X-CRYPTO-Sync-ID" : syncId})
logger.warn(message)
elif level == 'ERR':
logger.error(message, extra={"X-CRYPTO-Sync-ID" : syncId})
logger.error(message)
elif level == 'DEBUG':
logger.debug(message, extra={"X-CRYPTO-Sync-ID" : syncId})
logger.debug(message)

View File

@ -9,14 +9,11 @@ class keys():
def __init__(self):
self.sentiment_analyser_uri = os.getenv("SENTIMENT_URL")
def callSentimentAnalyser(tweet, syncId):
headers = {
"content-type":"text",
"X-CRYPTO-Sync-ID" : str(syncId)
}
def callSentimentAnalyser(tweet):
log("Calling Sentiment Analyser for [{}]".format(tweet), 'INFO')
try:
uri = keys().sentiment_analyser_uri + "/sentiment?tweet="+tweet
response = requests.request("GET", url=uri, headers=headers)
response = requests.request("GET", uri)
response = json.loads(response.text)
@ -24,5 +21,5 @@ def callSentimentAnalyser(tweet, syncId):
return scores["pos"], scores["neu"], scores["neg"], scores["compound"]
except:
log("Could not call Sentiment Analyser Service with syncId of [{}]".format(syncId), 'ERR', syncId)
log("Could not call Sentiment Analyser Service", 'ERR')
return 0, 0, 0, 0

View File

@ -9,18 +9,16 @@ class keys():
def __init__(self):
self.spamFilter_uri = os.getenv("FILTER_URL")
def callSpamFilter(tweet, syncId):
headers = {
"content-type":"text",
"X-CRYPTO-Sync-ID" : str(syncId)
}
def callSpamFilter(tweet):
try:
uri = keys().spamFilter_uri + "/predict?tweet="+tweet
response = requests.request("GET", url=uri, headers=headers)
response = requests.request("GET", uri)
response = json.loads(response.text)
log("Spam Filter result for [{}] is [{}]".format(tweet, response["result"]), 'INFO')
return response["result"]
except:
log("Could not call spam filter service with syncId of [{}]".format(syncId), 'ERR', syncId)
log("Could not call spam filter service", 'ERR')
return ""

View File

@ -1,152 +0,0 @@
#!/usr/bin/env python
whitelist = [
"bull",
"bear",
"bullish",
"bearish",
"up",
"down",
"high",
"low",
"higher",
"lower",
"absconded",
"maximalists",
"regulate",
"infamous",
"tradehigher",
"tradelower",
"revival",
"centralized",
"decentralized",
"centralised",
"decentralised",
"decentralization",
"decentralisation",
"centralization",
"centralisation",
"bans",
"hodl",
"ambiguity",
"revolutionize",
"revolutionise",
"consolidation",
"shorts",
"longs",
"long",
"short",
"shorting",
"grow",
"volatile",
"rally",
"rallying",
"noob",
"noobs",
"innovation",
"bottom",
"top",
"topped",
"bottomed",
"upwards",
"downwards",
"invest",
"raging",
"rocketing",
"swing",
"swinging",
"stake",
"whale",
"whales",
"lull",
"moon",
"choppy",
"buy",
"buying",
"sell",
"selling",
"startselling",
"stopselling",
"startbuying",
"stopbuying",
"bitcoin",
"btc",
"eth",
"xmr",
"xrp",
"ripple",
"block",
"reward",
"airdrop",
"drop",
"raise",
"stack",
"stake",
"invest",
"pull",
"push",
"token",
"sale",
"unhappy",
"happy",
"expert",
"novice"
"passed",
"mark",
"decline",
"incline",
"fees",
"crypto",
"wallet",
"price",
"history",
"reached",
"upward",
"downward",
"trading",
"mining",
"defi",
"finance",
"blockchain",
"interest",
"alt",
"alts",
"fiat",
"fiat",
"currency",
"currencies",
"wealth",
"hype",
"hyped",
"achievement",
"platform",
"incremental",
"increment",
"decrement",
"decremental",
"success",
"loss",
"win",
"lose",
"worth",
"strongest",
"weakest",
"strong",
"weak",
"trade",
"popping",
"sucking",
"shard",
"sharding",
"industry",
"powerful",
"better",
"worse"
]
def filterOutTweetsWithNoneWhitelistedWords(text):
if any(x in text for x in whitelist):
return text
else:
# log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN')
return ""