[14.10.20] Moved whitelisting to collector to reduce initial amount

This commit is contained in:
andrewso 2020-10-14 18:51:17 +01:00
parent b5a81aa2b7
commit b85fe65196
3 changed files with 6 additions and 162 deletions

View File

@ -3356,4 +3356,5 @@ spam,BTC
ham,It's incredible bitcoin
spam, xrp ripple trx btc Over 20 Congress Lawmakers Unhappy May Decide Soon BS Yeddyurappa - IPLfantasy IPLFantasyLeague IPL2019onIndiaBet - BS Yeddyurappa said the Bharatiya Janata Party would win by-polls for both
spam,Current Crypto Prices! BTC $6298 74 USDETH $172 63 USDLTC $76 03 USDBCH $286 59 USDXLM $0 09192 USDDOGE $ 0 00252 USDNEO $8 97 USDXRP $0 2979 USDCANN $0 008251 USDEMC2 $0 09215 USDXMR $68 18 USDBTG $20 38 USD
spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN
spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN
spam,THE MOST BEAUTIFUL FACE CHOU TZUYU ONLY
1 classes tweet
3356 ham It's incredible bitcoin
3357 spam xrp ripple trx btc Over 20 Congress Lawmakers Unhappy May Decide Soon BS Yeddyurappa - IPLfantasy IPLFantasyLeague IPL2019onIndiaBet - BS Yeddyurappa said the Bharatiya Janata Party would win by-polls for both
3358 spam Current Crypto Prices! BTC $6298 74 USDETH $172 63 USDLTC $76 03 USDBCH $286 59 USDXLM $0 09192 USDDOGE $ 0 00252 USDNEO $8 97 USDXRP $0 2979 USDCANN $0 008251 USDEMC2 $0 09215 USDXMR $68 18 USDBTG $20 38 USD
3359 spam lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN
3360 spam THE MOST BEAUTIFUL FACE CHOU TZUYU ONLY

View File

@ -15,8 +15,6 @@ from sklearn.metrics import classification_report, accuracy_score
from src.utils.jsonLogger import log
from src.tweets.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords
# Global Metrics
HB_NB_Precision = 0
HB_NB_Recall = 0
@ -266,12 +264,8 @@ class tweetFilter(object):
#
# Filter.testPrediction()
def tweetFilterPredit(self, text):
tweet = filterOutTweetsWithNoneWhitelistedWords(text)
if tweet != "":
df = pd.DataFrame(self.Filter.predict(tweet))
df[0] = df[0].map({0: 'ham', 1: 'spam'})
log("Classification of tweet as {}".format(df[0][0]), 'INFO')
df = pd.DataFrame(self.Filter.predict(text))
df[0] = df[0].map({0: 'ham', 1: 'spam'})
log("Classification of tweet as {}".format(df[0][0]), 'INFO')
return df[0][0]
else:
return "spam"
return df[0][0]

View File

@ -1,151 +0,0 @@
#!/usr/bin/env python
from src.utils.jsonLogger import log
whitelist = [
"bull",
"bear",
"bullish",
"bearish",
"up",
"down",
"high",
"low",
"higher",
"lower",
"absconded",
"maximalists",
"regulate",
"infamous",
"tradehigher",
"tradelower",
"revival",
"centralized",
"decentralized",
"centralised",
"decentralised",
"decentralization",
"decentralisation",
"centralization",
"centralisation",
"bans",
"hodl",
"ambiguity",
"revolutionize",
"revolutionise",
"consolidation",
"shorts",
"longs",
"long",
"short",
"shorting",
"grow",
"volatile",
"rally",
"rallying",
"noob",
"noobs",
"innovation",
"bottom",
"top",
"topped",
"bottomed",
"upwards",
"downwards",
"invest",
"raging",
"rocketing",
"swing",
"swinging",
"stake",
"whale",
"whales",
"lull",
"moon",
"choppy",
"buy",
"buying",
"sell",
"selling",
"startselling",
"stopselling",
"startbuying",
"stopbuying",
"bitcoin",
"btc",
"eth",
"xmr",
"xrp",
"ripple",
"block",
"reward",
"airdrop",
"drop",
"raise",
"stack",
"stake",
"invest",
"pull",
"push",
"token",
"sale",
"unhappy",
"happy",
"expert",
"novice"
"passed",
"mark",
"decline",
"incline",
"fees",
"crypto",
"wallet",
"price",
"history",
"reached",
"upward",
"downward",
"trading",
"mining",
"defi",
"finance",
"blockchain",
"interest",
"alt",
"alts",
"fiat",
"fiat",
"currency",
"currencies",
"wealth",
"hype",
"hyped",
"achievement",
"platform",
"incremental",
"increment",
"decrement",
"decremental",
"success",
"loss",
"win",
"lose",
"worth",
"strongest",
"weakest",
"strong",
"weak",
"trade",
"popping",
"sucking",
"shard",
"sharding",
"industry"
]
def filterOutTweetsWithNoneWhitelistedWords(text):
if any(x in text for x in whitelist):
return text
else:
log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN')
return ""