From 3c25310d760fb085e79703feab1c83a2abc3568f Mon Sep 17 00:00:00 2001 From: andrewso <9V5f1FkzI2LD> Date: Wed, 14 Oct 2020 18:51:36 +0100 Subject: [PATCH] [14.10.20] Testing --- src/tweets/collector.py | 4 + src/utils/whitelistedWords.py | 151 ++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 src/utils/whitelistedWords.py diff --git a/src/tweets/collector.py b/src/tweets/collector.py index c2ac37c..528d5ec 100644 --- a/src/tweets/collector.py +++ b/src/tweets/collector.py @@ -17,6 +17,7 @@ from src.utils.spamFilter import callSpamFilter from src.utils.sentimentAnalyser import callSentimentAnalyser from src.utils.activemqConnect import activeMQSender from src.utils.jsonLogger import log +from src.utils.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords from http.client import IncompleteRead from urllib3.exceptions import ProtocolError @@ -94,15 +95,18 @@ class Listener(StreamListener): if 'extended_tweet' in data['retweeted_status']: #if tweet is over the 140 word limit text = data['retweeted_status']['extended_tweet']['full_text'] + text = filterOutTweetsWithNoneWhitelistedWords(text) dumpStack.append({'type': self.hashtag, 'tweet': text}) else: text = data['retweeted_status']['text'] + text = filterOutTweetsWithNoneWhitelistedWords(text) dumpStack.append({'type': self.hashtag, 'tweet': text}) else: # Else if a normal Tweeet if 'extended_tweet' in data: # If tweet is over 140 word limit text = data['extended_tweet']['full_text'] + text = filterOutTweetsWithNoneWhitelistedWords(text) dumpStack.append({'type': self.hashtag, 'tweet': text}) def processTweet(): diff --git a/src/utils/whitelistedWords.py b/src/utils/whitelistedWords.py new file mode 100644 index 0000000..8454e9f --- /dev/null +++ b/src/utils/whitelistedWords.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +from src.utils.jsonLogger import log + +whitelist = [ + "bull", + "bear", + "bullish", + "bearish", + "up", + "down", + "high", + "low", + "higher", + "lower", + "absconded", + "maximalists", + "regulate", + "infamous", + "tradehigher", + "tradelower", + "revival", + "centralized", + "decentralized", + "centralised", + "decentralised", + "decentralization", + "decentralisation", + "centralization", + "centralisation", + "bans", + "hodl", + "ambiguity", + "revolutionize", + "revolutionise", + "consolidation", + "shorts", + "longs", + "long", + "short", + "shorting", + "grow", + "volatile", + "rally", + "rallying", + "noob", + "noobs", + "innovation", + "bottom", + "top", + "topped", + "bottomed", + "upwards", + "downwards", + "invest", + "raging", + "rocketing", + "swing", + "swinging", + "stake", + "whale", + "whales", + "lull", + "moon", + "choppy", + "buy", + "buying", + "sell", + "selling", + "startselling", + "stopselling", + "startbuying", + "stopbuying", + "bitcoin", + "btc", + "eth", + "xmr", + "xrp", + "ripple", + "block", + "reward", + "airdrop", + "drop", + "raise", + "stack", + "stake", + "invest", + "pull", + "push", + "token", + "sale", + "unhappy", + "happy", + "expert", + "novice" + "passed", + "mark", + "decline", + "incline", + "fees", + "crypto", + "wallet", + "price", + "history", + "reached", + "upward", + "downward", + "trading", + "mining", + "defi", + "finance", + "blockchain", + "interest", + "alt", + "alts", + "fiat", + "fiat", + "currency", + "currencies", + "wealth", + "hype", + "hyped", + "achievement", + "platform", + "incremental", + "increment", + "decrement", + "decremental", + "success", + "loss", + "win", + "lose", + "worth", + "strongest", + "weakest", + "strong", + "weak", + "trade", + "popping", + "sucking", + "shard", + "sharding", + "industry" +] + +def filterOutTweetsWithNoneWhitelistedWords(text): + if any(x in text for x in whitelist): + return text + else: + log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN') + return "" \ No newline at end of file