A minimalistic implementation of common machine learning algorithms in Python and Numpy.
Linear Regression | Logistic Regression | SVM |
---|---|---|
Naive Bayes | CART | k-NN |
Expectation-Maximization | k-means | PCA |
---|---|---|
class LinearRegression(lr=1e-3, tol=1e-4, verbose=False)
[source]
import numpy as np
from lib.linear_regression import LinearRegression
X = 30 * np.random.random((50, 1))
y = 0.5 * X + 1.0 + np.random.normal(size=X.shape)
model = LinearRegression(verbose=True)
model.fit(X, y)
X_new = np.linspace(0, 30, 100)
y_new = model.predict(X_new[:, np.newaxis])
import numpy as np
from lib.linear_regression import LinearRegression
rng = np.random.RandomState(1)
X = rng.randn(200, 2)
y = np.dot(X, [-2, 1]) + 0.1 * rng.randn(X.shape[0])
model = LinearRegression(verbose=True)
model.fit(X, y)
X_new = rng.randn(100, 2)
y_new = model.predict(X_new)
class LogisticRegression(weights=None, lr=1e-3, tol=1e-4, verbose=False)
[source]
from sklearn.datasets import make_blobs
from lib.logistic_regression import LogisticRegression
X, y = make_blobs(n_samples=80, centers=2, random_state=0)
model = LogisticRegression(lr=1e-2, verbose=True)
model.fit(X, y)
y_pred = model.predict(X)
class PegasosSVM(kernel='rbf', degree=3, gamma=1, coef0=0.0, lambda_=1, max_iter=10)
[source]
Note: This implements PEGASOS: Primal Estimated sub-GrAdient SOlver for SVM instead of the standard SVM.
from sklearn.datasets import make_blobs
from lib.pegasos import PegasosSVM
X, y = make_blobs(n_samples=40, centers=2, random_state=6)
y = np.where(y > 0, 1, -1)
model = PegasosSVM(kernel='linear', max_iter=100)
model.fit(X, y)
y_pred = model.predict(X)
from sklearn.datasets import make_circles
from lib.pegasos import PegasosSVM
X, y = make_circles(noise=0.2, factor=0.5, random_state=1)
y = np.where(y > 0, 1, -1)
model = PegasosSVM(kernel='rbf', max_iter=100)
model.fit(X, y)
y_pred = model.predict(X)
class NaiveBayes()
[source]
from sklearn.datasets import make_blobs
from lib.naive_bayes import NaiveBayes
X, y = make_blobs(n_samples=60, centers=2, random_state=2, cluster_std=1.5)
model = NaiveBayes()
model.fit(X, y)
y_pred = model.predict(X)
class DecisionTreeClassifier(max_depth=None)
[source]
from sklearn.datasets import load_iris
from lib.cart import DecisionTreeClassifier
X, y = load_iris(return_X_y=True)
model = DecisionTreeClassifier()
model.fit(X, y)
y_pred = model.predict(X)
class KNeighborsClassifier(k=1)
[source]
from sklearn.datasets import make_moons
from lib.knn import KNeighborsClassifier
X, y = make_moons(noise=0.3, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
model = KNeighborsClassifier(k=3)
y_pred = model.predict(X_train, y_train, X_test)
class GMM(n_components=1, tol=0.001, max_iter=100, init_params='kmeans', random_state=None)
[source]
from sklearn.datasets import make_blobs
from lib.gmm import GMM
X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.60, random_state=0)
X = X[:, ::-1] # flip axes for better plotting
model = GMM(n_components=4, random_state=2)
model.fit(X)
model.predict(X)
class KMeans(n_clusters=8, max_iter=300, tol=1e-4, random_state=None)
[source]
import numpy as np
from sklearn.datasets import make_blobs
from lib.kmeans import KMeans
np.random.seed(0)
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, _ = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
model = KMeans(n_clusters=3, random_state=0)
model.fit(X)
model.cluster_centers_
np.random.seed(0)
X_new, _ = make_blobs(n_samples=10, centers=centers, cluster_std=0.7)
model.predict(X_new)
class PCA(n_components=None, whiten=False)
[source]
from sklearn import datasets
from lib.pca import PCA
X, y = datasets.load_iris(return_X_y=True)
model = PCA(n_components=3)
model.fit(X)
X = model.transform(X)
- fast.ai Wiki
- Implementing Naive Bayes in 2 minutes with Python
- Decision Tree from Scratch in Python
- Develop k-Nearest Neighbors in Python From Scratch
- Implementing PEGASOS: Primal Estimated sub-GrAdient SOlver for SVM, Logistic Regression and Application in Sentiment Classification (in Python)
- ML From Scratch, Part 5: Gaussian Mixture Models
- K-Means Clustering Implementation in Python
- How to Calculate Principal Component Analysis (PCA) from Scratch in Python