Skip to content

Commit 8baf52b

Browse files
committed
Update examples for KNN model
1 parent a07023c commit 8baf52b

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python
2+
# Created by "Thieu" at 09:09, 10/05/2025 ----------%
3+
# Email: nguyenthieu2102@gmail.com %
4+
# Github: https://github.com/thieu1995 %
5+
# --------------------------------------------------%
6+
7+
from sklearn.neighbors import KNeighborsClassifier
8+
from sklearn.datasets import load_iris
9+
from sklearn.model_selection import train_test_split
10+
from metasklearn import MetaSearchCV, IntegerVar, StringVar
11+
12+
# Load dataset
13+
X, y = load_iris(return_X_y=True)
14+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
15+
16+
# Define param bounds
17+
18+
# param_grid = { ==> This is for GridSearchCV, show you how to convert to our MetaSearchCV
19+
# 'n_neighbors': [2, 3, 5, 7, 9, 11], # Số lượng hàng xóm gần nhất
20+
# 'weights': ['uniform', 'distance'], # Trọng số: đều nhau hoặc theo khoảng cách
21+
# 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'], # Thuật toán tìm kiếm
22+
# 'leaf_size': [10, 20, 30, 40, 50], # Kích thước node lá, ảnh hưởng đến performance
23+
# 'p': [1, 2], # Tham số cho khoảng cách Minkowski: 1 (Manhattan), 2 (Euclidean)
24+
# 'metric': ['minkowski'], # Khoảng cách sử dụng; thường dùng 'minkowski' kết hợp với p
25+
# # Có thể thêm các metric khác nếu cần như 'euclidean', 'manhattan', 'chebyshev', 'mahalanobis'
26+
# }
27+
28+
param_bounds = [
29+
IntegerVar(lb=2, ub=12, name="n_neighbors"),
30+
StringVar(valid_sets=("uniform", "distance"), name="weights"),
31+
StringVar(valid_sets=("auto", "ball_tree", "kd_tree", "brute"), name="algorithm"),
32+
IntegerVar(lb=10, ub=50, name="leaf_size"),
33+
IntegerVar(lb=1, ub=2, name="p"), # 1 (Manhattan), 2 (Euclidean)
34+
StringVar(valid_sets=("minkowski", "manhattan"), name="metric"), # Khoảng cách sử dụng; thường dùng 'minkowski' kết hợp với p
35+
]
36+
37+
# Initialize and fit MetaSearchCV
38+
searcher = MetaSearchCV(
39+
estimator=KNeighborsClassifier(),
40+
param_bounds=param_bounds,
41+
task_type="classification",
42+
optim="BaseGA",
43+
optim_params={"epoch": 20, "pop_size": 30, "name": "GA"},
44+
cv=3,
45+
scoring="AS", # or any custom scoring like "F1_macro"
46+
seed=42,
47+
n_jobs=2,
48+
verbose=True
49+
)
50+
51+
searcher.fit(X_train, y_train)
52+
print("Best parameters:", searcher.best_params)
53+
print("Best model: ", searcher.best_estimator)
54+
print("Best score during searching: ", searcher.best_score)
55+
56+
# Make prediction after re-fit
57+
y_pred = searcher.predict(X_test)
58+
print("Test Accuracy:", searcher.score(X_test, y_test))
59+
print("Test Score: ", searcher.scores(X_test, y_test, list_metrics=("AS", "RS", "PS", "F1S")))

examples/exam_knn_regression.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env python
2+
# Created by "Thieu" at 09:02, 10/05/2025 ----------%
3+
# Email: nguyenthieu2102@gmail.com %
4+
# Github: https://github.com/thieu1995 %
5+
# --------------------------------------------------%
6+
7+
from sklearn.neighbors import KNeighborsRegressor
8+
from sklearn.datasets import load_diabetes
9+
from metasklearn import MetaSearchCV, IntegerVar, StringVar, Data
10+
11+
## Load data object
12+
X, y = load_diabetes(return_X_y=True)
13+
data = Data(X, y)
14+
15+
## Split train and test
16+
data.split_train_test(test_size=0.2, random_state=42, inplace=True)
17+
print(data.X_train.shape, data.X_test.shape)
18+
19+
## Scaling dataset
20+
data.X_train, scaler_X = data.scale(data.X_train, scaling_methods=("standard", "minmax"))
21+
data.X_test = scaler_X.transform(data.X_test)
22+
23+
data.y_train, scaler_y = data.scale(data.y_train, scaling_methods=("standard", "minmax"))
24+
data.y_train = data.y_train.ravel()
25+
data.y_test = scaler_y.transform(data.y_test.reshape(-1, 1)).ravel()
26+
27+
# Define param bounds
28+
29+
# param_grid = { ==> This is for GridSearchCV, show you how to convert to our MetaSearchCV
30+
# 'n_neighbors': [2, 3, 5, 7, 9, 11], # Số lượng hàng xóm gần nhất
31+
# 'weights': ['uniform', 'distance'], # Trọng số: đều nhau hoặc theo khoảng cách
32+
# 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'], # Thuật toán tìm kiếm
33+
# 'leaf_size': [10, 20, 30, 40, 50], # Kích thước node lá, ảnh hưởng đến performance
34+
# 'p': [1, 2], # Tham số cho khoảng cách Minkowski: 1 (Manhattan), 2 (Euclidean)
35+
# 'metric': ['minkowski'], # Khoảng cách sử dụng; thường dùng 'minkowski' kết hợp với p
36+
# # Có thể thêm các metric khác nếu cần như 'euclidean', 'manhattan', 'chebyshev', 'mahalanobis'
37+
# }
38+
39+
param_bounds = [
40+
IntegerVar(lb=2, ub=12, name="n_neighbors"),
41+
StringVar(valid_sets=("uniform", "distance"), name="weights"),
42+
StringVar(valid_sets=("auto", "ball_tree", "kd_tree", "brute"), name="algorithm"),
43+
IntegerVar(lb=10, ub=50, name="leaf_size"),
44+
IntegerVar(lb=1, ub=2, name="p"), # 1 (Manhattan), 2 (Euclidean)
45+
StringVar(valid_sets=("minkowski", "manhattan"), name="metric"), # Khoảng cách sử dụng; thường dùng 'minkowski' kết hợp với p
46+
]
47+
48+
# Initialize and fit MetaSearchCV
49+
searcher = MetaSearchCV(
50+
estimator=KNeighborsRegressor(),
51+
param_bounds=param_bounds,
52+
task_type="regression",
53+
optim="BaseGA",
54+
optim_params={"epoch": 20, "pop_size": 30, "name": "GA"},
55+
cv=3,
56+
scoring="MSE", # or any custom scoring like "F1_macro"
57+
seed=42,
58+
n_jobs=2,
59+
verbose=True
60+
)
61+
62+
searcher.fit(data.X_train, data.y_train)
63+
print("Best parameters:", searcher.best_params)
64+
print("Best model: ", searcher.best_estimator)
65+
print("Best score during searching: ", searcher.best_score)
66+
67+
# Make prediction after re-fit
68+
y_pred = searcher.predict(data.X_test)
69+
print("Test R2:", searcher.score(data.X_test, data.y_test))
70+
print("Test Score: ", searcher.scores(data.X_test, data.y_test, list_metrics=("RMSE", "R", "KGE", "NNSE")))

0 commit comments

Comments
 (0)