[14.10.20] Moved whitelisting to collector to reduce initial amount
This commit is contained in:
parent
b5a81aa2b7
commit
b85fe65196
@ -3356,4 +3356,5 @@ spam,BTC
|
||||
ham,It's incredible bitcoin
|
||||
spam, xrp ripple trx btc Over 20 Congress Lawmakers Unhappy May Decide Soon BS Yeddyurappa - IPLfantasy IPLFantasyLeague IPL2019onIndiaBet - BS Yeddyurappa said the Bharatiya Janata Party would win by-polls for both
|
||||
spam,Current Crypto Prices! BTC $6298 74 USDETH $172 63 USDLTC $76 03 USDBCH $286 59 USDXLM $0 09192 USDDOGE $ 0 00252 USDNEO $8 97 USDXRP $0 2979 USDCANN $0 008251 USDEMC2 $0 09215 USDXMR $68 18 USDBTG $20 38 USD
|
||||
spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN
|
||||
spam,lunomoney Many don't even realise The block reward will hit the value of a single transaction fee in BTC approx 60 years before that deadlineAnd another 36 years before that point it will hit the total in fees in the average block where fees aren't rediculous Suddenly you realise SN
|
||||
spam,THE MOST BEAUTIFUL FACE CHOU TZUYU ONLY
|
||||
|
@ -15,8 +15,6 @@ from sklearn.metrics import classification_report, accuracy_score
|
||||
|
||||
from src.utils.jsonLogger import log
|
||||
|
||||
from src.tweets.whitelistedWords import filterOutTweetsWithNoneWhitelistedWords
|
||||
|
||||
# Global Metrics
|
||||
HB_NB_Precision = 0
|
||||
HB_NB_Recall = 0
|
||||
@ -266,12 +264,8 @@ class tweetFilter(object):
|
||||
#
|
||||
# Filter.testPrediction()
|
||||
def tweetFilterPredit(self, text):
|
||||
tweet = filterOutTweetsWithNoneWhitelistedWords(text)
|
||||
if tweet != "":
|
||||
df = pd.DataFrame(self.Filter.predict(tweet))
|
||||
df[0] = df[0].map({0: 'ham', 1: 'spam'})
|
||||
log("Classification of tweet as {}".format(df[0][0]), 'INFO')
|
||||
df = pd.DataFrame(self.Filter.predict(text))
|
||||
df[0] = df[0].map({0: 'ham', 1: 'spam'})
|
||||
log("Classification of tweet as {}".format(df[0][0]), 'INFO')
|
||||
|
||||
return df[0][0]
|
||||
else:
|
||||
return "spam"
|
||||
return df[0][0]
|
||||
@ -1,151 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from src.utils.jsonLogger import log
|
||||
|
||||
whitelist = [
|
||||
"bull",
|
||||
"bear",
|
||||
"bullish",
|
||||
"bearish",
|
||||
"up",
|
||||
"down",
|
||||
"high",
|
||||
"low",
|
||||
"higher",
|
||||
"lower",
|
||||
"absconded",
|
||||
"maximalists",
|
||||
"regulate",
|
||||
"infamous",
|
||||
"tradehigher",
|
||||
"tradelower",
|
||||
"revival",
|
||||
"centralized",
|
||||
"decentralized",
|
||||
"centralised",
|
||||
"decentralised",
|
||||
"decentralization",
|
||||
"decentralisation",
|
||||
"centralization",
|
||||
"centralisation",
|
||||
"bans",
|
||||
"hodl",
|
||||
"ambiguity",
|
||||
"revolutionize",
|
||||
"revolutionise",
|
||||
"consolidation",
|
||||
"shorts",
|
||||
"longs",
|
||||
"long",
|
||||
"short",
|
||||
"shorting",
|
||||
"grow",
|
||||
"volatile",
|
||||
"rally",
|
||||
"rallying",
|
||||
"noob",
|
||||
"noobs",
|
||||
"innovation",
|
||||
"bottom",
|
||||
"top",
|
||||
"topped",
|
||||
"bottomed",
|
||||
"upwards",
|
||||
"downwards",
|
||||
"invest",
|
||||
"raging",
|
||||
"rocketing",
|
||||
"swing",
|
||||
"swinging",
|
||||
"stake",
|
||||
"whale",
|
||||
"whales",
|
||||
"lull",
|
||||
"moon",
|
||||
"choppy",
|
||||
"buy",
|
||||
"buying",
|
||||
"sell",
|
||||
"selling",
|
||||
"startselling",
|
||||
"stopselling",
|
||||
"startbuying",
|
||||
"stopbuying",
|
||||
"bitcoin",
|
||||
"btc",
|
||||
"eth",
|
||||
"xmr",
|
||||
"xrp",
|
||||
"ripple",
|
||||
"block",
|
||||
"reward",
|
||||
"airdrop",
|
||||
"drop",
|
||||
"raise",
|
||||
"stack",
|
||||
"stake",
|
||||
"invest",
|
||||
"pull",
|
||||
"push",
|
||||
"token",
|
||||
"sale",
|
||||
"unhappy",
|
||||
"happy",
|
||||
"expert",
|
||||
"novice"
|
||||
"passed",
|
||||
"mark",
|
||||
"decline",
|
||||
"incline",
|
||||
"fees",
|
||||
"crypto",
|
||||
"wallet",
|
||||
"price",
|
||||
"history",
|
||||
"reached",
|
||||
"upward",
|
||||
"downward",
|
||||
"trading",
|
||||
"mining",
|
||||
"defi",
|
||||
"finance",
|
||||
"blockchain",
|
||||
"interest",
|
||||
"alt",
|
||||
"alts",
|
||||
"fiat",
|
||||
"fiat",
|
||||
"currency",
|
||||
"currencies",
|
||||
"wealth",
|
||||
"hype",
|
||||
"hyped",
|
||||
"achievement",
|
||||
"platform",
|
||||
"incremental",
|
||||
"increment",
|
||||
"decrement",
|
||||
"decremental",
|
||||
"success",
|
||||
"loss",
|
||||
"win",
|
||||
"lose",
|
||||
"worth",
|
||||
"strongest",
|
||||
"weakest",
|
||||
"strong",
|
||||
"weak",
|
||||
"trade",
|
||||
"popping",
|
||||
"sucking",
|
||||
"shard",
|
||||
"sharding",
|
||||
"industry"
|
||||
]
|
||||
|
||||
def filterOutTweetsWithNoneWhitelistedWords(text):
|
||||
if any(x in text for x in whitelist):
|
||||
return text
|
||||
else:
|
||||
log("Tweet [{}] did not contain any keywords for it to be considered crypto related".format(text), 'WARN')
|
||||
return ""
|
||||
Loading…
x
Reference in New Issue
Block a user