Compare commits

...

10 Commits

Author SHA1 Message Date
andrewso
b3f8ba4cec [15.10.20] Testing 2020-10-15 20:19:39 +01:00
andrewso
da43817926 [15.10.20] Testing 2020-10-15 20:15:55 +01:00
andrewso
17bd1399cc [15.10.20] Testing 2020-10-15 18:10:57 +01:00
andrewso
7325a509bd [15.10.20] Testing 2020-10-15 17:19:44 +01:00
andrewso
affd25379a [15.10.20] Testing 2020-10-15 14:01:06 +01:00
andrewso
0106ec9044 [15.10.20] Testing 2020-10-15 11:31:50 +01:00
andrewso
5c832a70fe [15.10.20] Testing 2020-10-15 11:20:50 +01:00
andrewso
3c25310d76 [14.10.20] Testing 2020-10-14 18:51:36 +01:00
andrewso
6626c0864b [14.10.20] Testing 2020-10-14 18:11:10 +01:00
andrewso
1d459420de [14.10.20] Testing 2020-10-14 13:17:16 +01:00
6 changed files with 202 additions and 30 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
import os, sys, json
import os, sys, json, uuid
from datetime import datetime, timedelta
from time import sleep, time
@ -17,6 +17,7 @@ from src.utils.spamFilter import callSpamFilter
from src.utils.sentimentAnalyser import callSentimentAnalyser
from src.utils.activemqConnect import activeMQSender
from src.utils.jsonLogger import log
from src.utils.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords
from http.client import IncompleteRead
from urllib3.exceptions import ProtocolError
@ -33,20 +34,20 @@ class keys():
self.access_token = os.getenv("ACCESS_TOKEN")
self.access_secret = os.getenv("ACCESS_SECRET")
def sendToArtemis(pos, neu, neg, compound, type):
def sendToArtemis(syncId, pos, neu, neg, compound, type):
timestamp = datetime.now() + timedelta(hours=1)
strippedTimestamp = timestamp.replace(minute=0, second=0, microsecond=0)
timestamp = strippedTimestamp.strftime('%Y-%m-%dT%H:%M:%S')
message = { "timestamp" : timestamp, "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }
message = { "timestamp" : timestamp, "syncId": str(syncId), "pos" : pos, "neu" : neu, "neg" : neg, "compound" : compound, "type": type }
messageJson = json.dumps(message, indent = 4)
log("Sending message to TweetSave queue", 'INFO')
log("Sending message to TweetSave queue for SyncId [{}]".format(syncId), 'INFO')
log("Message: {}".format(message), 'INFO')
activeMQSender(messageJson)
activeMQSender(messageJson, syncId)
class Streamer():
@ -89,23 +90,28 @@ class Listener(StreamListener):
if (time() - self.start_time) < self.limit:
data = json.loads(data)
log(len(dumpStack), 'INFO')
# Check if tweet is a retweet
if 'retweeted_status' in data:
if 'extended_tweet' in data['retweeted_status']:
#if tweet is over the 140 word limit
text = data['retweeted_status']['extended_tweet']['full_text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
else:
text = data['retweeted_status']['text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
else:
# Else if a normal Tweeet
if 'extended_tweet' in data:
# If tweet is over 140 word limit
text = data['extended_tweet']['full_text']
text = filterOutTweetsWithNoneWhitelistedWords(text)
dumpStack.append({'type': self.hashtag, 'tweet': text})
def processTweet():
def processTweet(syncId):
log(len(dumpStack), 'INFO')
@ -117,7 +123,7 @@ def processTweet():
if len(processStack) != 0:
for tweet in processStack:
removedLines = fixLines(str(tweet))
removedLines = fixLines(tweet["tweet"])
removedSpecialChars = cleanTweet(removedLines)
removedSpacing = removeSpacing(removedSpecialChars[0])
@ -135,12 +141,12 @@ def processTweet():
cleanedTweet = tweetText + ' ' + removedSpecialChars[1]
if callSpamFilter(cleanedTweet) != 'spam':
if callSpamFilter(cleanedTweet, syncId) != 'spam':
pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet)
pos, neu, neg, compound = callSentimentAnalyser(cleanedTweet, syncId)
if compound != 0.0 or neu <= 0.8:
hourTweet = {'pos': 1, 'neu': 1, 'neg': 1, 'compound': 1, 'type': ""}
if compound != 0.0 and neu <= 0.6:
hourTweet = {'pos': pos, 'neu': neu, 'neg': neg, 'compound': compound, 'type': tweet["type"]}
hourStack.append(hourTweet)
processStack.clear()
@ -176,7 +182,9 @@ def createHourJob():
global timeF
timeF = timeFunction()
processTweet()
syncId = uuid.uuid4()
processTweet(syncId)
processStack = hourStack.copy()
hourStack.clear()
@ -185,7 +193,7 @@ def createHourJob():
if len(processStack) != 0:
log("Process stack size is :: [{}]".format(len(processStack)), 'INFO')
for item in hourStack:
for item in processStack:
ovPos = ovPos + item['pos']
ovNeu = ovNeu + item['neu']
ovNeg = ovNeg + item['neg']
@ -202,7 +210,7 @@ def createHourJob():
processStack.clear()
sendToArtemis(pos, neu, neg, compound, type)
sendToArtemis(syncId, pos, neu, neg, compound, type)
else:
log("Stack is empty", 'WARN')
@ -216,6 +224,7 @@ def collectorMain(hashtag):
for i in range(len(hashtag)):
Thread(target=collector, args=[hashtag[i]]).start()
sleep(2)
createHourJob()
while True:

View File

@ -17,14 +17,20 @@ class keys():
def returnKeys(self):
return self.addr, self.port, self.amqU, self.amqP
def activeMQSender(message):
def activeMQSender(message, syncId):
addr, port, mqUser, mqPass = keys().returnKeys()
log("Attempting Connection to Artemis...", 'INFO')
con = stomp.Connection([(addr, port)], auto_content_length=False)
con.connect( mqUser, mqPass, wait=True)
con.send("TweetSave", message, content_type="application/json", headers={"Content-Type":"application/json"})
con.send("TweetSave",
message,
content_type="application/json",
headers={
"Content-Type":"application/json",
"X-CRYPTO-Sync-ID":syncId
})
con.disconnect()

View File

@ -28,13 +28,13 @@ def setup_logging(log_level='INFO'):
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)
def log(message, level):
def log(message, level, syncId=""):
logger = logging.getLogger(__name__)
if level == 'INFO':
logger.info(message)
logger.info(message, extra={"X-CRYPTO-Sync-ID" : syncId})
elif level == 'WARN':
logger.warn(message)
logger.warn(message, extra={"X-CRYPTO-Sync-ID" : syncId})
elif level == 'ERR':
logger.error(message)
logger.error(message, extra={"X-CRYPTO-Sync-ID" : syncId})
elif level == 'DEBUG':
logger.debug(message)
logger.debug(message, extra={"X-CRYPTO-Sync-ID" : syncId})

View File

@ -9,11 +9,14 @@ class keys():
def __init__(self):
self.sentiment_analyser_uri = os.getenv("SENTIMENT_URL")
def callSentimentAnalyser(tweet):
# log("Calling Sentiment Analyser for [{}]".format(tweet), 'INFO')
def callSentimentAnalyser(tweet, syncId):
headers = {
"content-type":"text",
"X-CRYPTO-Sync-ID" : str(syncId)
}
try:
uri = keys().sentiment_analyser_uri + "/sentiment?tweet="+tweet
response = requests.request("GET", uri)
response = requests.request("GET", url=uri, headers=headers)
response = json.loads(response.text)
@ -21,5 +24,5 @@ def callSentimentAnalyser(tweet):
return scores["pos"], scores["neu"], scores["neg"], scores["compound"]
except:
log("Could not call Sentiment Analyser Service", 'ERR')
log("Could not call Sentiment Analyser Service with syncId of [{}]".format(syncId), 'ERR', syncId)
return 0, 0, 0, 0

View File

@ -9,16 +9,18 @@ class keys():
def __init__(self):
self.spamFilter_uri = os.getenv("FILTER_URL")
def callSpamFilter(tweet):
def callSpamFilter(tweet, syncId):
headers = {
"content-type":"text",
"X-CRYPTO-Sync-ID" : str(syncId)
}
try:
uri = keys().spamFilter_uri + "/predict?tweet="+tweet
response = requests.request("GET", uri)
response = requests.request("GET", url=uri, headers=headers)
response = json.loads(response.text)
# log("Spam Filter result for [{}] is [{}]".format(tweet, response["result"]), 'INFO')
return response["result"]
except:
log("Could not call spam filter service", 'ERR')
log("Could not call spam filter service with syncId of [{}]".format(syncId), 'ERR', syncId)
return ""

View File

@ -0,0 +1,152 @@
#!/usr/bin/env python
whitelist = [
"bull",
"bear",
"bullish",
"bearish",
"up",
"down",
"high",
"low",
"higher",
"lower",
"absconded",
"maximalists",
"regulate",
"infamous",
"tradehigher",
"tradelower",
"revival",
"centralized",
"decentralized",
"centralised",
"decentralised",
"decentralization",
"decentralisation",
"centralization",
"centralisation",
"bans",
"hodl",
"ambiguity",
"revolutionize",
"revolutionise",
"consolidation",
"shorts",
"longs",
"long",
"short",
"shorting",
"grow",
"volatile",
"rally",
"rallying",
"noob",
"noobs",
"innovation",
"bottom",
"top",
"topped",
"bottomed",
"upwards",
"downwards",
"invest",
"raging",
"rocketing",
"swing",
"swinging",
"stake",
"whale",
"whales",
"lull",
"moon",
"choppy",
"buy",
"buying",
"sell",
"selling",
"startselling",
"stopselling",
"startbuying",
"stopbuying",
"bitcoin",
"btc",
"eth",
"xmr",
"xrp",
"ripple",
"block",
"reward",
"airdrop",
"drop",
"raise",
"stack",
"stake",
"invest",
"pull",
"push",
"token",
"sale",
"unhappy",
"happy",
"expert",
"novice"
"passed",
"mark",
"decline",
"incline",
"fees",
"crypto",
"wallet",
"price",
"history",
"reached",
"upward",
"downward",
"trading",
"mining",
"defi",
"finance",
"blockchain",
"interest",
"alt",
"alts",
"fiat",
"fiat",
"currency",
"currencies",
"wealth",
"hype",
"hyped",
"achievement",
"platform",
"incremental",
"increment",
"decrement",
"decremental",
"success",
"loss",
"win",
"lose",
"worth",
"strongest",
"weakest",
"strong",
"weak",
"trade",
"popping",
"sucking",
"shard",
"sharding",
"industry",
"powerful",
"better",
"worse"
]
def filterOutTweetsWithNoneWhitelistedWords(text):
if any(x in text for x in whitelist):
return text
else:
# log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN')
return ""