diff --git a/src/resources/tweet_spam_ham.csv b/src/resources/tweet_spam_ham.csv index ad3b80e..167d5ba 100644 --- a/src/resources/tweet_spam_ham.csv +++ b/src/resources/tweet_spam_ham.csv @@ -3356,4 +3356,5 @@ spam,BTC ham,It's incredible bitcoin spam, xrp ripple trx btc Over 20 Congress Lawmakers Unhappy May Decide Soon BS Yeddyurappa - IPLfantasy IPLFantasyLeague IPL2019onIndiaBet - BS Yeddyurappa said the Bharatiya Janata Party would win by-polls for both spam,Current Crypto Prices! BTC $6298 74 USDETH $172 63 USDLTC $76 03 USDBCH $286 59 USDXLM $0 09192 USDDOGE $ 0 00252 USDNEO $8 97 USDXRP $0 2979 USDCANN $0 008251 USDEMC2 $0 09215 USDXMR $68 18 USDBTG $20 38 USD -spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN \ No newline at end of file +spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN +spam,THE MOST BEAUTIFUL FACE CHOU TZUYU ONLY \ No newline at end of file diff --git a/src/tweets/tweetFilter.py b/src/tweets/tweetFilter.py index aa4c891..1cf08ff 100644 --- a/src/tweets/tweetFilter.py +++ b/src/tweets/tweetFilter.py @@ -15,8 +15,6 @@ from sklearn.metrics import classification_report, accuracy_score from src.utils.jsonLogger import log -from src.tweets.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords - # Global Metrics HB_NB_Precision = 0 HB_NB_Recall = 0 @@ -266,12 +264,8 @@ class tweetFilter(object): # # Filter.testPrediction() def tweetFilterPredit(self, text): - tweet = filterOutTweetsWithNoneWhitelistedWords(text) - if tweet != "": - df = pd.DataFrame(self.Filter.predict(tweet)) - df[0] = df[0].map({0: 'ham', 1: 'spam'}) - log("Classification of tweet as {}".format(df[0][0]), 'INFO') + df = pd.DataFrame(self.Filter.predict(text)) + df[0] = df[0].map({0: 'ham', 1: 'spam'}) + log("Classification of tweet as {}".format(df[0][0]), 'INFO') - return df[0][0] - else: - return "spam" \ No newline at end of file + return df[0][0] \ No newline at end of file diff --git a/src/tweets/whitelistedWords.py b/src/tweets/whitelistedWords.py deleted file mode 100644 index 8454e9f..0000000 --- a/src/tweets/whitelistedWords.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python - -from src.utils.jsonLogger import log - -whitelist = [ - "bull", - "bear", - "bullish", - "bearish", - "up", - "down", - "high", - "low", - "higher", - "lower", - "absconded", - "maximalists", - "regulate", - "infamous", - "tradehigher", - "tradelower", - "revival", - "centralized", - "decentralized", - "centralised", - "decentralised", - "decentralization", - "decentralisation", - "centralization", - "centralisation", - "bans", - "hodl", - "ambiguity", - "revolutionize", - "revolutionise", - "consolidation", - "shorts", - "longs", - "long", - "short", - "shorting", - "grow", - "volatile", - "rally", - "rallying", - "noob", - "noobs", - "innovation", - "bottom", - "top", - "topped", - "bottomed", - "upwards", - "downwards", - "invest", - "raging", - "rocketing", - "swing", - "swinging", - "stake", - "whale", - "whales", - "lull", - "moon", - "choppy", - "buy", - "buying", - "sell", - "selling", - "startselling", - "stopselling", - "startbuying", - "stopbuying", - "bitcoin", - "btc", - "eth", - "xmr", - "xrp", - "ripple", - "block", - "reward", - "airdrop", - "drop", - "raise", - "stack", - "stake", - "invest", - "pull", - "push", - "token", - "sale", - "unhappy", - "happy", - "expert", - "novice" - "passed", - "mark", - "decline", - "incline", - "fees", - "crypto", - "wallet", - "price", - "history", - "reached", - "upward", - "downward", - "trading", - "mining", - "defi", - "finance", - "blockchain", - "interest", - "alt", - "alts", - "fiat", - "fiat", - "currency", - "currencies", - "wealth", - "hype", - "hyped", - "achievement", - "platform", - "incremental", - "increment", - "decrement", - "decremental", - "success", - "loss", - "win", - "lose", - "worth", - "strongest", - "weakest", - "strong", - "weak", - "trade", - "popping", - "sucking", - "shard", - "sharding", - "industry" -] - -def filterOutTweetsWithNoneWhitelistedWords(text): - if any(x in text for x in whitelist): - return text - else: - log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN') - return "" \ No newline at end of file