[05.03.20] Spam filter code for tweets, kubernetes deployments, pipeline, training for naive bayes and endpoint exposure
This commit is contained in:
parent
a418a08aa1
commit
7b519630a2
9
Dockerfile
Normal file
9
Dockerfile
Normal file
@ -0,0 +1,9 @@
|
||||
FROM python:3.7-alpine
|
||||
MAINTAINER Andrew Sotheran <cryptosky.user@gmail.com>
|
||||
RUN apk update && \
|
||||
apk add py-pip libc-dev gcc
|
||||
RUN pip install utils pycryptodome && \
|
||||
pip install nltk pandas numpy sklearn flask
|
||||
COPY . /home/spam-filter/.
|
||||
EXPOSE 9090
|
||||
CMD ["python", "/home/spam-filter/src/main.py"]
|
||||
2
README.md
Normal file
2
README.md
Normal file
@ -0,0 +1,2 @@
|
||||
# Spam-Filter
|
||||
Universal Spam Filter - for news and tweets data
|
||||
77
configuration/kubernetes/deployment.yaml
Normal file
77
configuration/kubernetes/deployment.yaml
Normal file
@ -0,0 +1,77 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
name: LABEL
|
||||
name: RESOURCE_NAME
|
||||
namespace: production
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: RESOURCE_NAME
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 1
|
||||
maxUnavailable: 0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: RESOURCE_NAME
|
||||
spec:
|
||||
containers:
|
||||
- image: REPOSITORY/IMAGE
|
||||
name: RESOURCE_NAME
|
||||
env:
|
||||
- name: KUBERNETES_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: CONTAINER_CORE_LIMIT
|
||||
valueFrom:
|
||||
resourceFieldRef:
|
||||
resource: limits.cpu
|
||||
- name: CONTAINER_MAX_MEMORY
|
||||
valueFrom:
|
||||
resourceFieldRef:
|
||||
resource: limits.memory
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: endpoints
|
||||
key: dbGateway.url
|
||||
- name: DATABASE_PORT
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: endpoints
|
||||
key: dbGateway.port
|
||||
- name: COINBASE_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: coinbase-secret
|
||||
key: coinbase.api.key
|
||||
- name: COINBASE_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: coinbase-secret
|
||||
key: coinbase.api.secret
|
||||
- name: DB_GATEWAY_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: endpoints
|
||||
key: dbGateway.url
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: RESOURCE_NAME
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 32Mi
|
||||
restartPolicy: Always
|
||||
imagePullSecrets:
|
||||
- name: registry-secret
|
||||
16
configuration/kubernetes/service.yaml
Normal file
16
configuration/kubernetes/service.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
labels:
|
||||
name: LABEL
|
||||
name: RESOURCE_NAME
|
||||
namespace: production
|
||||
spec:
|
||||
selector:
|
||||
app: RESOURCE_NAME
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: HTTP
|
||||
targetPort: 9090
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
100
configuration/pipelines/build.groovy
Normal file
100
configuration/pipelines/build.groovy
Normal file
@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env groovy
|
||||
|
||||
env.APPLICATION_NAME = 'spam-filter'
|
||||
env.APPLICATION_LABEL = 'utilities'
|
||||
env.GIT_BRANCH = 'master'
|
||||
env.GIT_REPOSITORY_PATH = "github.com/andyjk15/${env.APPLICATION_NAME}.git"
|
||||
env.GIT_REPOSITORY_URL = "https://${env.GIT_REPOSITORY_PATH}"
|
||||
env.GITHUB_CREDENTIALS_ID = 'Github'
|
||||
env.DOCKER_REPOSITORY = 'registry.cryptosky.me'
|
||||
env.DOCKER_REPOSITORY_URL = "https://${env.DOCKER_REPOSITORY}"
|
||||
env.DOCKER_REPOSITORY_TCP = "tcp://${env.DOCKER_REPOSITORY}:4243"
|
||||
|
||||
env.NAMESPACE = 'production'
|
||||
env.SLAVE_LABEL = "cryptosky-aio-build"
|
||||
|
||||
def mvn( String gloals ) {
|
||||
sh "mvn -s configuration/settings.xml --show-version --batch-mode ${gloals}"
|
||||
}
|
||||
|
||||
String get_application_version() {
|
||||
"1.0.0-b${env.BUILD_NUMBER}"
|
||||
}
|
||||
|
||||
String executeShellScript( String shellPath, String arg1 = '', String arg2 = '', String arg3 = '', String arg4 = '' ) {
|
||||
sh "./${shellPath} ${arg1} ${arg2} ${arg3} ${arg4}"
|
||||
}
|
||||
|
||||
try {
|
||||
timestamps {
|
||||
node ("${env.SLAVE_LABEL}") {
|
||||
stage('Initialise') {
|
||||
checkout([$class: 'GitSCM', branches: [[name: 'master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: 'Github', url: 'https://github.com/andyjk15/price-collector.git']]])
|
||||
|
||||
env.APPLICATION_VERSION = get_application_version()
|
||||
|
||||
withCredentials(
|
||||
[usernamePassword(
|
||||
credentialsId: 'doctl',
|
||||
passwordVariable: 'DOCTL_TOKEN',
|
||||
usernameVariable: 'DOCTL_USERNAME'
|
||||
)]
|
||||
) {
|
||||
sh "doctl auth init --access-token ${DOCTL_TOKEN}"
|
||||
sh "doctl kubernetes cluster kubeconfig save cryptosky-kubernetes-cluster-production"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Test Artifact') {
|
||||
try {
|
||||
// mvn 'verify -DskipUTs -DskipTests'
|
||||
} finally {
|
||||
// mvn 'test'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build Image') {
|
||||
// mvn 'clean package -DskipTests'
|
||||
|
||||
executeShellScript("configuration/scripts/mapVarsToConfigs.sh",
|
||||
env.DOCKER_REPOSITORY,
|
||||
env.APPLICATION_NAME,
|
||||
env.APPLICATION_VERSION,
|
||||
env.APPLICATION_LABEL)
|
||||
|
||||
}
|
||||
|
||||
stage('Tag Repository') {
|
||||
|
||||
withDockerServer([uri: "${env.DOCKER_REPOSITORY_TCP}"]) {
|
||||
withDockerRegistry([credentialsId: 'Registry', url: "${env.DOCKER_REPOSITORY_URL}"]) {
|
||||
def image = docker.build("${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:${env.APPLICATION_VERSION}")
|
||||
image.push()
|
||||
def latest = docker.build("${env.DOCKER_REPOSITORY}/${env.APPLICATION_NAME}:latest")
|
||||
latest.push()
|
||||
}
|
||||
}
|
||||
withCredentials(
|
||||
[usernamePassword(
|
||||
credentialsId: env.GITHUB_CREDENTIALS_ID,
|
||||
passwordVariable: 'GIT_PASSWORD',
|
||||
usernameVariable: 'GIT_USERNAME'
|
||||
)]
|
||||
) {
|
||||
sh "git tag ${env.APPLICATION_VERSION}"
|
||||
sh "git push https://${GIT_USERNAME}:${GIT_PASSWORD}@${env.GIT_REPOSITORY_PATH} ${env.APPLICATION_VERSION}"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Deploy') {
|
||||
executeShellScript("configuration/scripts/deployToKubernetes.sh",
|
||||
env.APPLICATION_NAME)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( exception ) {
|
||||
currentBuild.result = 'FAILURE'
|
||||
throw exception
|
||||
} finally {
|
||||
currentBuild.result = 'SUCCESS'
|
||||
}
|
||||
10
configuration/scripts/deployToKubernetes.sh
Executable file
10
configuration/scripts/deployToKubernetes.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
APPLICATION_NAME=$1
|
||||
|
||||
kubectl apply -f configuration/kubernetes/deployment.yaml
|
||||
kubectl apply -f configuration/kubernetes/service.yaml
|
||||
|
||||
kubectl get pods
|
||||
|
||||
kubectl rollout status deployment/${APPLICATION_NAME} --namespace=production
|
||||
14
configuration/scripts/mapVarsToConfigs.sh
Executable file
14
configuration/scripts/mapVarsToConfigs.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
DOCKER_REPOSITORY=$1
|
||||
APPLICATION_NAME=$2
|
||||
APPLICATION_VERSION=$3
|
||||
APPLICATION_LABEL=$4
|
||||
|
||||
sed -i "s/REPOSITORY/${DOCKER_REPOSITORY}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/IMAGE/${APPLICATION_NAME}:${APPLICATION_VERSION}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/deployment.yaml
|
||||
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/deployment.yaml
|
||||
|
||||
sed -i "s/RESOURCE_NAME/${APPLICATION_NAME}/g" configuration/kubernetes/service.yaml
|
||||
sed -i "s/LABEL/${APPLICATION_LABEL}/g" configuration/kubernetes/service.yaml
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
37
src/main.py
Normal file
37
src/main.py
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys, json
|
||||
sys.path.append('/home/spam-filter/')
|
||||
|
||||
from threading import Thread
|
||||
from tweets.tweetFilter import tweetFilter
|
||||
# from tweets.tweetFilter import tweetFilter
|
||||
# from news.newsFilter import newsFilter
|
||||
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
filter = tweetFilter()
|
||||
|
||||
@app.route('/predict', methods=['GET'])
|
||||
def tweetPredict():
|
||||
tweet = request.args.get('tweet')
|
||||
|
||||
result = filter.tweetFilterPredit(tweet)
|
||||
return json.dumps({'result': result, 'tweet': tweet}), 200, {'ContentType':'application/json'}
|
||||
|
||||
def callTweetFilter():
|
||||
filter.tweetFilterTrain()
|
||||
|
||||
app.run(port=9090)
|
||||
|
||||
# def callNewsFilter():
|
||||
# newsFilter()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Console: ", "==== Spam Filter - Tweets & News ====")
|
||||
sys.stdout.flush()
|
||||
|
||||
Thread(target=callTweetFilter).start()
|
||||
# Thread(target=callNewsFilter).start()
|
||||
0
src/resources/news_spam_ham.csv
Normal file
0
src/resources/news_spam_ham.csv
Normal file
|
|
1007
src/resources/tweet_spam_ham.csv
Normal file
1007
src/resources/tweet_spam_ham.csv
Normal file
File diff suppressed because it is too large
Load Diff
261
src/tweets/tweetFilter.py
Normal file
261
src/tweets/tweetFilter.py
Normal file
@ -0,0 +1,261 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os, sys
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import PorterStemmer
|
||||
from math import log, sqrt
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import classification_report, accuracy_score
|
||||
|
||||
# Global Metrics
|
||||
HB_NB_Precision = 0
|
||||
HB_NB_Recall = 0
|
||||
HB_NB_F_Score = 0
|
||||
HB_NB_Accuracy = 0
|
||||
|
||||
## Logic
|
||||
def processTweet(tweet, gram = 2):
|
||||
tweet = tweet.lower() #Lower cases
|
||||
|
||||
words = word_tokenize(tweet) #Tokenise words in text
|
||||
words = [w for w in words if len(w) > 2]
|
||||
|
||||
if gram > 2: ## Increasing grams can increase accuracy
|
||||
w = []
|
||||
for i in range(len(words) - gram + 1):
|
||||
w += [' '.join(words[i:i + gram])]
|
||||
return w
|
||||
|
||||
# Remove stopwords
|
||||
sw = stopwords.words('english')
|
||||
words = [word for word in words if word not in sw]
|
||||
|
||||
stemmer = PorterStemmer() # Stem words
|
||||
words = [stemmer.stem(word) for word in words]
|
||||
|
||||
return words
|
||||
|
||||
class classifier(object):
|
||||
def __init__(self, trainData):
|
||||
self.tweet = trainData['tweet']
|
||||
self.labels = trainData['class']
|
||||
|
||||
def train(self):
|
||||
self.TF_and_IDF() ## Bag of Words
|
||||
self.TF_IDF() ## Term Frequecies
|
||||
|
||||
def TF_and_IDF(self):
|
||||
noTweets = self.tweet.shape[0]
|
||||
self.spam = self.labels.value_counts()[1]
|
||||
self.ham = self.labels.value_counts()[0]
|
||||
self.total = self.spam + self.ham
|
||||
|
||||
# Initialise spam vars
|
||||
self.spamCount = 0
|
||||
self.hamCount = 0
|
||||
self.tfSpam = dict()
|
||||
self.tfHam = dict()
|
||||
self.idfSpam = dict()
|
||||
self.idfHam = dict()
|
||||
|
||||
## Logic
|
||||
|
||||
for entry in range(noTweets):
|
||||
processed = processTweet(self.tweet[entry])
|
||||
count = list() #To keep track of whether the word has occured in the message or not. IDF count
|
||||
for word in processed:
|
||||
if self.labels[entry]:
|
||||
self.tfSpam[word] = self.tfSpam.get(word, 0) + 1
|
||||
self.spamCount += 1
|
||||
else:
|
||||
self.tfHam[word] = self.tfHam.get(word, 0) + 1
|
||||
self.hamCount += 1
|
||||
if word not in count: ## And below is Addictive Smoothing
|
||||
count += [word]
|
||||
for word in count:
|
||||
if self.labels[entry]:
|
||||
self.idfSpam[word] = self.idfSpam.get(word, 0) + 1
|
||||
else:
|
||||
self.idfHam[word] = self.idfHam.get(word, 0) + 1
|
||||
|
||||
def TF_IDF(self):
|
||||
self.probSpam = dict()
|
||||
self.probHam = dict()
|
||||
self.sumSpam = 0
|
||||
self.sumHam = 0
|
||||
for word in self.tfSpam:
|
||||
self.probSpam[word] = (self.tfSpam[word]) * log((self.spam + self.ham) / (self.idfSpam[word] + self.idfHam.get(word, 0)))
|
||||
self.sumSpam += self.probSpam[word]
|
||||
for word in self.tfSpam:
|
||||
self.probSpam[word] = (self.probSpam[word] + 1) / (self.sumSpam + len(list(self.probSpam.keys())))
|
||||
for word in self.tfHam:
|
||||
self.probHam[word] = (self.tfHam[word]) * log((self.spam + self.ham) / (self.idfSpam.get(word, 0) + self.idfHam[word]))
|
||||
self.sumHam += self.probHam[word]
|
||||
for word in self.tfHam:
|
||||
self.probHam[word] = (self.probHam[word] + 1) / (self.sumHam + len(list(self.probHam.keys())))
|
||||
|
||||
self.probSpamTotal, self.probHamTotal = self.spam / self.total, self.ham / self.total
|
||||
|
||||
def classify(self, processed):
|
||||
pSpam, pHam = 0, 0
|
||||
for word in processed:
|
||||
if word in self.probSpam:
|
||||
pSpam += log(self.probSpam[word])
|
||||
else:
|
||||
pSpam -= log(self.sumSpam + len(list(self.probSpam.keys())))
|
||||
if word in self.probHam:
|
||||
pHam += log(self.probHam[word])
|
||||
else:
|
||||
pHam -= log(self.sumHam + len(list(self.probHam.keys())))
|
||||
pSpam += log(self.probSpamTotal)
|
||||
pHam += log(self.probHamTotal)
|
||||
return pSpam >= pHam
|
||||
|
||||
def predict(self, testData):
|
||||
result = dict()
|
||||
for (i, tweet) in enumerate(testData):
|
||||
processed = processTweet(tweet)
|
||||
result[i] = int(self.classify(processed))
|
||||
return result
|
||||
|
||||
def metrics(labels, predictions):
|
||||
true_pos, true_neg, false_pos, false_neg = 0, 0, 0, 0
|
||||
for i in range(len(labels)):
|
||||
true_pos += int(labels[i] == 1 and predictions[i] == 1)
|
||||
true_neg += int(labels[i] == 0 and predictions[i] == 0)
|
||||
false_pos += int(labels[i] == 0 and predictions[i] == 1)
|
||||
false_neg += int(labels[i] == 1 and predictions[i] == 0)
|
||||
HB_NB_Precision = true_pos / (true_pos + false_pos)
|
||||
HB_NB_Recall = true_pos / (true_pos + false_neg)
|
||||
HB_NB_F_Score = 2 * HB_NB_Precision * HB_NB_Recall / (HB_NB_Precision + HB_NB_Recall)
|
||||
HB_NB_Accuracy = (true_pos + true_neg) / (true_pos + true_neg + false_pos + false_neg)
|
||||
|
||||
print("HB Precision: ", HB_NB_Precision)
|
||||
print("HB Recall: ", HB_NB_Recall)
|
||||
print("HB F-score: ", HB_NB_F_Score)
|
||||
print("HB Accuracy: ", HB_NB_Accuracy)
|
||||
|
||||
class filterSpam(object):
|
||||
def __init__(self, training_set):
|
||||
self.training_set = training_set
|
||||
|
||||
def trainFilter(self):
|
||||
self.dataset()
|
||||
self.train()
|
||||
|
||||
def dataset(self):
|
||||
self.data = pd.read_csv(self.training_set)
|
||||
|
||||
self.data['class'] = self.data['classes'].map({'ham': 0, 'spam': 1})
|
||||
|
||||
self.data.drop(['classes'], axis=1, inplace=True)
|
||||
|
||||
self.trainIndex, self.testIndex = list(), list()
|
||||
for i in range(self.data.shape[0]):
|
||||
if np.random.uniform(0, 1) < 0.75:
|
||||
self.trainIndex += [i]
|
||||
else:
|
||||
self.testIndex += [i]
|
||||
self.trainData = self.data.loc[self.trainIndex]
|
||||
self.testData = self.data.loc[self.testIndex]
|
||||
|
||||
self.trainData.reset_index(inplace=True)
|
||||
self.testData.reset_index(inplace=True)
|
||||
self.trainData.drop(['index'], axis=1, inplace=True)
|
||||
self.testData.drop(['index'], axis=1, inplace=True)
|
||||
|
||||
def train(self):
|
||||
self.spamFilter = classifier(self.trainData)
|
||||
self.spamFilter.train()
|
||||
|
||||
def testData_Prediction(self):
|
||||
prediction = self.spamFilter.predict(self.testData['tweet'])
|
||||
|
||||
return prediction
|
||||
|
||||
def testPrediction(self):
|
||||
|
||||
# Test Spam/Ham tweets - should return True and False respectivly
|
||||
spam = processTweet("Earn more than 0015 btc free No deposit No investment Free Bitcoins - Earn $65 free btc in 5 minutes bitcoin freebtc getbtc")
|
||||
ham = processTweet("Bitcoin closed with some gains in month of February")
|
||||
|
||||
hamTweet = self.spamFilter.classify(ham)
|
||||
spamTweet = self.spamFilter.classify(spam)
|
||||
|
||||
print("Console: ", "Spam Tweet -- ", spamTweet)
|
||||
sys.stdout.flush()
|
||||
print("Console: ", "Ham Tweet -- ", hamTweet)
|
||||
sys.stdout.flush()
|
||||
|
||||
def filterStatistics(self, prediction):
|
||||
metrics(self.testData['class'], prediction)
|
||||
|
||||
def filterTweet(self, tweet):
|
||||
|
||||
processed = processTweet(tweet)
|
||||
|
||||
classified = self.spamFilter.classify(processed)
|
||||
|
||||
return classified
|
||||
|
||||
class multinomialNaiveBayes(object):
|
||||
def __init__(self, training_set):
|
||||
self.training_set = training_set
|
||||
|
||||
def trainFilter(self):
|
||||
self.dataset()
|
||||
self.train()
|
||||
self.predictTest()
|
||||
|
||||
def dataset(self):
|
||||
self.data = pd.read_csv(self.training_set)
|
||||
|
||||
self.data.drop_duplicates(inplace = True)
|
||||
|
||||
self.data['class'] = self.data['classes'].map({'ham': 0, 'spam': 1})
|
||||
|
||||
self.cv = CountVectorizer(analyzer=processTweet)
|
||||
|
||||
messages_bow = self.cv.fit_transform(self.data['tweet'])
|
||||
|
||||
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(messages_bow, self.data['class'], test_size = 0.20, random_state = 0)
|
||||
|
||||
def train(self):
|
||||
self.classifier = MultinomialNB()
|
||||
self.classifier.fit(self.X_train, self.y_train)
|
||||
|
||||
def predictTest(self):
|
||||
self.pred = self.classifier.predict(self.X_test)
|
||||
print('Accuracy: ', accuracy_score(self.y_test, self.pred))
|
||||
|
||||
def predict(self, tweet):
|
||||
message = self.cv.transform([tweet]).toarray()
|
||||
|
||||
return self.classifier.predict(message)
|
||||
|
||||
class tweetFilter(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def tweetFilterTrain(self):
|
||||
self.Filter = multinomialNaiveBayes("src/resources/tweet_spam_ham.csv")
|
||||
|
||||
self.Filter.trainFilter()
|
||||
|
||||
### Self coded NB get around 75 -> 85% accuracy ( not as good as SKlearns )
|
||||
# Filter.trainFilter()
|
||||
#
|
||||
# prediction = Filter.testData_Prediction()
|
||||
# Filter.filterStatistics(prediction)
|
||||
#
|
||||
# Filter.testPrediction()
|
||||
def tweetFilterPredit(self, tweet):
|
||||
df = pd.DataFrame(self.Filter.predict(tweet))
|
||||
df[0] = df[0].map({0: 'ham', 1: 'spam'})
|
||||
return df[0][0]
|
||||
Loading…
x
Reference in New Issue
Block a user