Skip to content

style: format code with Autopep8, Black, ClangFormat, dotnet-format, Go fmt, Gofumpt, Google Java Format, isort, Ktlint, PHP CS Fixer, Prettier, RuboCop, Ruff Formatter, Rustfmt, Scalafmt, StandardJS, StandardRB, swift-format and Yapf #1251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 154 additions & 99 deletions blockchain_integration/pi_network/ai/fraud_detection_ai.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,63 @@
# pi_network/ai/fraud_detection_ai.py
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from catboost import CatBoostClassifier
from imblearn.ensemble import EasyEnsemble
from imblearn.over_sampling import SMOTE
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint
from imblearn.over_sampling import SMOTE
from imblearn.ensemble import EasyEnsemble
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import (
AdaBoostClassifier,
BaggingClassifier,
GradientBoostingClassifier,
RandomForestClassifier,
StackingClassifier,
VotingClassifier,
)
from sklearn.feature_selection import RFECV, SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFECV
from sklearn.manifold import TSNE
from sklearn.metrics import (
accuracy_score,
average_precision_score,
classification_report,
confusion_matrix,
f1_score,
precision_score,
recall_score,
roc_auc_score,
)
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

# Load the dataset
df = pd.read_csv('fraud_data.csv')
df = pd.read_csv("fraud_data.csv")

# Preprocess the data
X = df.drop(['is_fraud'], axis=1)
y = df['is_fraud']
X = df.drop(["is_fraud"], axis=1)
y = df["is_fraud"]

# Handle imbalanced dataset
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X_res, y_res, test_size=0.2, random_state=42
)

# Scale the data
scaler = StandardScaler()
Expand All @@ -71,40 +76,64 @@

# Define the neural network model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_pca.shape[1],)))
model.add(Dense(64, activation="relu", input_shape=(X_train_pca.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
model.add(Dense(2, activation="softmax"))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.001)
early_stopping = EarlyStopping(monitor="val_loss", patience=5, min_delta=0.001)

# Define the model checkpoint callback
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min')
model_checkpoint = ModelCheckpoint(
"best_model.h5", monitor="val_loss", save_best_only=True, mode="min"
)

# Train the model
history = model.fit(X_train_pca, to_categorical(y_train), epochs=100, batch_size=128, validation_data=(X_test_pca, to_categorical(y_test)), callbacks=[early_stopping, model_checkpoint])
history = model.fit(
X_train_pca,
to_categorical(y_train),
epochs=100,
batch_size=128,
validation_data=(X_test_pca, to_categorical(y_test)),
callbacks=[early_stopping, model_checkpoint],
)

# Evaluate the model
y_pred = model.predict(X_test_pca)
y_pred_class = np.argmax(y_pred, axis=1)
print('Accuracy:', accuracy_score(y_test, y_pred_class))
print('Classification Report:')
print("Accuracy:", accuracy_score(y_test, y_pred_class))
print("Classification Report:")
print(classification_report(y_test, y_pred_class))
print('Confusion Matrix:')
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_class))

# Define the ensemble models
xgb_model = XGBClassifier()
cat_model = CatBoostClassifier()
lgbm_model = LGBMClassifier()
gbm_model = GradientBoostingClassifier()
voting_model = VotingClassifier(estimators=[('xgb', xgb_model), ('cat', cat_model), ('lgbm', lgbm_model), ('gbm', gbm_model)])
stacking_model = StackingClassifier(estimators=[('xgb', xgb_model), ('cat', cat_model), ('lgbm', lgbm_model), ('gbm', gbm_model)], final_estimator=LogisticRegression())
voting_model = VotingClassifier(
estimators=[
("xgb", xgb_model),
("cat", cat_model),
("lgbm", lgbm_model),
("gbm", gbm_model),
]
)
stacking_model = StackingClassifier(
estimators=[
("xgb", xgb_model),
("cat", cat_model),
("lgbm", lgbm_model),
("gbm", gbm_model),
],
final_estimator=LogisticRegression(),
)
bagging_model = BaggingClassifier(base_estimator=xgb_model, n_estimators=10)
adaboost_model = AdaBoostClassifier(base_estimator=xgb_model, n_estimators=10)

Expand All @@ -128,18 +157,18 @@
y_pred_bagging = bagging_model.predict(X_test_pca)
y_pred_adaboost = adaboost_model.predict(X_test_pca)

print('XGB Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('CAT Accuracy:', accuracy_score(y_test, y_pred_cat))
print('LGBM Accuracy:', accuracy_score(y_test, y_pred_lgbm))
print('GBM Accuracy:', accuracy_score(y_test, y_pred_gbm))
print('Voting Accuracy:', accuracy_score(y_test, y_pred_voting))
print('Stacking Accuracy:', accuracy_score(y_test, y_pred_stacking))
print('Bagging Accuracy:', accuracy_score(y_test, y_pred_bagging))
print('AdaBoost Accuracy:', accuracy_score(y_test, y_pred_adaboost))
print("XGB Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("CAT Accuracy:", accuracy_score(y_test, y_pred_cat))
print("LGBM Accuracy:", accuracy_score(y_test, y_pred_lgbm))
print("GBM Accuracy:", accuracy_score(y_test, y_pred_gbm))
print("Voting Accuracy:", accuracy_score(y_test, y_pred_voting))
print("Stacking Accuracy:", accuracy_score(y_test, y_pred_stacking))
print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bagging))
print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred_adaboost))

# Define the feature selection models
rf_model = RandomForestClassifier()
svm_model = SVC(kernel='linear', probability=True)
svm_model = SVC(kernel="linear", probability=True)
gnb_model = GaussianNB()
qda_model = QuadraticDiscriminantAnalysis()
dt_model = DecisionTreeClassifier()
Expand All @@ -164,116 +193,142 @@
y_pred_knn = knn_model.predict(X_test_pca)
y_pred_lr = lr_model.predict(X_test_pca)

print('RF Accuracy:', accuracy_score(y_test, y_pred_rf))
print('SVM Accuracy:', accuracy_score(y_test, y_pred_svm))
print('GNB Accuracy:', accuracy_score(y_test, y_pred_gnb))
print('QDA Accuracy:', accuracy_score(y_test, y_pred_qda))
print('DT Accuracy:', accuracy_score(y_test, y_pred_dt))
print('KNN Accuracy:', accuracy_score(y_test, y_pred_knn))
print('LR Accuracy:', accuracy_score(y_test, y_pred_lr))
print("RF Accuracy:", accuracy_score(y_test, y_pred_rf))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("GNB Accuracy:", accuracy_score(y_test, y_pred_gnb))
print("QDA Accuracy:", accuracy_score(y_test, y_pred_qda))
print("DT Accuracy:", accuracy_score(y_test, y_pred_dt))
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("LR Accuracy:", accuracy_score(y_test, y_pred_lr))

# Define the hyperparameter tuning models
grid_search = GridSearchCV(xgb_model, {'max_depth': [3, 5, 7], 'learning_rate': [0.1, 0.5, 1.0]}, cv=5, scoring='accuracy')
random_search = RandomizedSearchCV(xgb_model, {'max_depth': [3, 5, 7], 'learning_rate': [0.1, 0.5, 1.0]}, cv=5, scoring='accuracy', n_iter=10)
grid_search = GridSearchCV(
xgb_model,
{"max_depth": [3, 5, 7], "learning_rate": [0.1, 0.5, 1.0]},
cv=5,
scoring="accuracy",
)
random_search = RandomizedSearchCV(
xgb_model,
{"max_depth": [3, 5, 7], "learning_rate": [0.1, 0.5, 1.0]},
cv=5,
scoring="accuracy",
n_iter=10,
)

# Train the hyperparameter tuning models
grid_search.fit(X_train_pca, y_train)
random_search.fit(X_train_pca, y_train)

# Evaluate the hyperparameter tuning models
print('Grid Search Best Parameters:', grid_search.best_params_)
print('Grid Search Best Score:', grid_search.best_score_)
print('Random Search Best Parameters:', random_search.best_params_)
print('Random Search Best Score:', random_search.best_score_)
print("Grid Search Best Parameters:", grid_search.best_params_)
print("Grid Search Best Score:", grid_search.best_score_)
print("Random Search Best Parameters:", random_search.best_params_)
print("Random Search Best Score:", random_search.best_score_)

# Define the feature importance models
feature_importance = rf_model.feature_importances_
print('Feature Importance:')
print("Feature Importance:")
print(feature_importance)

# Define the partial dependence plots
partial_dependence = pd.DataFrame({'Feature 1': X_test_pca[:, 0], 'Feature 2': X_test_pca[:, 1], 'Target': y_test})
sns.lmplot(x='Feature 1', y='Target', data=partial_dependence, hue='is_fraud')
sns.lmplot(x='Feature 2', y='Target', data=partial_dependence, hue='is_fraud')
partial_dependence = pd.DataFrame(
{"Feature 1": X_test_pca[:, 0], "Feature 2": X_test_pca[:, 1], "Target": y_test}
)
sns.lmplot(x="Feature 1", y="Target", data=partial_dependence, hue="is_fraud")
sns.lmplot(x="Feature 2", y="Target", data=partial_dependence, hue="is_fraud")

# Define the learning curves
train_sizes, train_scores, test_scores = learning_curve(xgb_model, X_train_pca, y_train, cv=5, scoring='accuracy')
train_sizes, train_scores, test_scores = learning_curve(
xgb_model, X_train_pca, y_train, cv=5, scoring="accuracy"
)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.plot(train_sizes, train_scores_mean, label='Training Score')
plt.plot(train_sizes, test_scores_mean, label='Test Score')
plt.plot(train_sizes, train_scores_mean, label="Training Score")
plt.plot(train_sizes, test_scores_mean, label="Test Score")
plt.legend()
plt.show()

# Define the ROC-AUC curve
y_pred_proba = model.predict(X_test_pca)
roc_auc = roc_auc_score(y_test, y_pred_proba[:, 1])
print('ROC-AUC Score:', roc_auc)
print("ROC-AUC Score:", roc_auc)

# Define the precision-recall curve
precision, recall, _ = precision_recall_curve(y_test, y_pred_proba[:, 1])
average_precision = average_precision_score(y_test, y_pred_proba[:, 1])
print('Average Precision Score:', average_precision)
print("Average Precision Score:", average_precision)

# Define the F1 score
f1 = f1_score(y_test, y_pred_class)
print('F1 Score:', f1)
print("F1 Score:", f1)

# Define the recall score
recall = recall_score(y_test, y_pred_class)
print('Recall Score:', recall)
print("Recall Score:", recall)

# Define the precision score
precision = precision_score(y_test, y_pred_class)
print('Precision Score:', precision)
print("Precision Score:", precision)

# Define the confusion matrix
cm = confusion_matrix(y_test, y_pred_class)
print('Confusion Matrix:')
print("Confusion Matrix:")
print(cm)

# Define the classification report
cr = classification_report(y_test, y_pred_class)
print('Classification Report:')
print("Classification Report:")
print(cr)

# Define the feature selection pipeline
feature_selection_pipeline = Pipeline([
('feature_selection', SelectFromModel(rf_model)),
('classifier', xgb_model)
])
feature_selection_pipeline = Pipeline(
[("feature_selection", SelectFromModel(rf_model)), ("classifier", xgb_model)]
)

# Train the feature selection pipeline
feature_selection_pipeline.fit(X_train_pca, y_train)

# Evaluate the feature selection pipeline
y_pred_fs = feature_selection_pipeline.predict(X_test_pca)
print('Feature Selection Accuracy:', accuracy_score(y_test, y_pred_fs))
print("Feature Selection Accuracy:", accuracy_score(y_test, y_pred_fs))

# Define the recursive feature elimination pipeline
rfe_pipeline = Pipeline([
('feature_selection', RFECV(rf_model, cv=5, scoring='accuracy')),
('classifier', xgb_model)
])
rfe_pipeline = Pipeline(
[
("feature_selection", RFECV(rf_model, cv=5, scoring="accuracy")),
("classifier", xgb_model),
]
)

# Train the recursive feature elimination pipeline
rfe_pipeline.fit(X_train_pca, y_train)

# Evaluate the recursive feature elimination pipeline
y_pred_rfe = rfe_pipeline.predict(X_test_pca)
print('Recursive Feature Elimination Accuracy:', accuracy_score(y_test, y_pred_rfe))
print("Recursive Feature Elimination Accuracy:", accuracy_score(y_test, y_pred_rfe))

# Define the hyperparameter tuning pipeline
hyperparameter_tuning_pipeline = Pipeline([
('feature_selection', SelectFromModel(rf_model)),
('classifier', GridSearchCV(xgb_model, {'max_depth': [3, 5, 7], 'learning_rate': [0.1, 0.5, 1.0]}, cv=5, scoring='accuracy'))
])
hyperparameter_tuning_pipeline = Pipeline(
[
("feature_selection", SelectFromModel(rf_model)),
(
"classifier",
GridSearchCV(
xgb_model,
{"max_depth": [3, 5, 7], "learning_rate": [0.1, 0.5, 1.0]},
cv=5,
scoring="accuracy",
),
),
]
)

# Train the hyperparameter tuning pipeline
hyperparameter_tuning_pipeline.fit(X_train_pca, y_train)

# Evaluate the hyperparameter tuning pipeline
y_pred_ht = hyperparameter_tuning_pipeline.predict(X_test_pca)
print('Hyperparameter Tuning Accuracy:', accuracy_score(y_test, y_pred_ht))
print("Hyperparameter Tuning Accuracy:", accuracy_score(y_test, y_pred_ht))
Loading