2022-10-19 09:02:34 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# TP1 KMEANS\n",
|
|
|
|
"\n",
|
|
|
|
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
|
|
|
|
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 379,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
"import numpy as np\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
"import scipy.spatial"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 380,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-10-30 14:21:09 +01:00
|
|
|
"# mean = [1,2,3,4]\n",
|
|
|
|
"# sd = [0.25, 0.25, 0.1, 0.2]\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
"clusters = 2\n",
|
|
|
|
"mean = np.random.randint(5, size=clusters)\n",
|
|
|
|
"sd = [0.25, 0.25, 0.3]\n",
|
2022-10-19 09:02:34 +02:00
|
|
|
"dim = 2\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
"nb = 50\n",
|
|
|
|
"K= clusters"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
2022-10-30 14:21:09 +01:00
|
|
|
"## Fonctions à utiliser pour le clustering"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 381,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
|
|
|
|
" size = []\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
" # for i in range(0,dim):\n",
|
|
|
|
" size.append(nb)\n",
|
|
|
|
" size.append(dim)\n",
|
|
|
|
" points = np.random.normal(mean[0],sd[0],size=size)\n",
|
|
|
|
" for i in range(1,clusters):\n",
|
|
|
|
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
|
|
|
|
" \n",
|
2022-10-19 09:02:34 +02:00
|
|
|
" return points"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 382,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-10-30 14:21:09 +01:00
|
|
|
"def distance(points,Pc): \n",
|
2022-11-10 08:51:50 +01:00
|
|
|
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
2022-10-30 14:21:09 +01:00
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 383,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
2022-10-30 14:21:09 +01:00
|
|
|
"outputs": [],
|
2022-10-19 09:02:34 +02:00
|
|
|
"source": [
|
2022-10-30 14:21:09 +01:00
|
|
|
"def kmeans(points = [0,0], K = 1, nb=1, dim=2):\n",
|
|
|
|
" # Initialisation K prototypes\n",
|
|
|
|
" Pc_index = []\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
" Pc_save = np.zeros([K,dim])\n",
|
|
|
|
" clusters = []\n",
|
|
|
|
" iter = 0\n",
|
|
|
|
" eps = 0.1\n",
|
|
|
|
"\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
" for i in range(0,K):\n",
|
|
|
|
" Pc_index.append(np.random.randint(0,nb*dim))\n",
|
|
|
|
" Pc = points[Pc_index,:]\n",
|
|
|
|
"\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 10):\n",
|
|
|
|
" iter += 1\n",
|
|
|
|
" Pc_save = Pc\n",
|
|
|
|
" # print(Pc)\n",
|
|
|
|
" # print(points[:,:Pc.shape[0]])\n",
|
|
|
|
" dist = distance(points=points[:,:Pc.shape[0]],Pc=Pc)\n",
|
|
|
|
" clust = np.argmin(dist, axis=1)\n",
|
|
|
|
" clust = np.expand_dims(clust, axis=0)\n",
|
|
|
|
" points = np.append(points[:,:Pc.shape[0]], clust.T, axis=1)\n",
|
|
|
|
" # print(points)\n",
|
|
|
|
" Pc = np.zeros([K,dim])\n",
|
|
|
|
" index = np.array([])\n",
|
|
|
|
"\n",
|
|
|
|
" for n in range(0,2*nb):\n",
|
|
|
|
" for k in range(0,K):\n",
|
|
|
|
" index = np.append(index, (clust==k).sum())\n",
|
|
|
|
" if points[n,-1] == k:\n",
|
|
|
|
" # print(points)\n",
|
|
|
|
" # print(Pc)\n",
|
|
|
|
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
|
|
|
|
"\n",
|
|
|
|
" for k in range(0,K):\n",
|
|
|
|
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
|
|
|
|
"\n",
|
|
|
|
" # print(Pc)\n",
|
|
|
|
" return Pc, points\n"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 384,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2022-11-10 08:51:50 +01:00
|
|
|
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
|
|
|
|
"def visualisation(points, Pc=[0,0], dim=2, K=1):\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
" if(dim==2):\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
" for k in range(0,K):\n",
|
|
|
|
" for n in range(0,len(points)):\n",
|
|
|
|
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(points[n,-1])])\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
" plt.plot(Pc[:,0],Pc[:,1],'r+')\n",
|
|
|
|
" plt.grid(True)\n",
|
|
|
|
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 385,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
2022-11-10 08:51:50 +01:00
|
|
|
"outputs": [],
|
2022-10-19 09:02:34 +02:00
|
|
|
"source": [
|
2022-10-30 14:21:09 +01:00
|
|
|
"points = gen_points(mean,sd,nb,dim,clusters)\n",
|
2022-11-10 08:51:50 +01:00
|
|
|
"# print(points.shape)\n",
|
|
|
|
"# print(points.mean(axis=0))\n",
|
2022-10-30 14:21:09 +01:00
|
|
|
"# print(points)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 386,
|
2022-10-30 14:21:09 +01:00
|
|
|
"metadata": {},
|
2022-11-10 08:51:50 +01:00
|
|
|
"outputs": [],
|
2022-10-30 14:21:09 +01:00
|
|
|
"source": [
|
2022-11-10 08:51:50 +01:00
|
|
|
"dist = distance(points,points)\n",
|
|
|
|
"# print(dist)"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 387,
|
2022-10-19 09:02:34 +02:00
|
|
|
"metadata": {},
|
2022-11-10 08:51:50 +01:00
|
|
|
"outputs": [],
|
2022-10-30 14:21:09 +01:00
|
|
|
"source": [
|
2022-11-10 08:51:50 +01:00
|
|
|
"Pc, clusters = kmeans(points,K=K,nb=nb,dim=dim)\n",
|
|
|
|
"# print(Pc)\n",
|
|
|
|
"# print(clusters)\n"
|
2022-10-30 14:21:09 +01:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-11-10 08:51:50 +01:00
|
|
|
"execution_count": 388,
|
2022-10-30 14:21:09 +01:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
2022-11-10 08:51:50 +01:00
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"[[ 2.00659379 2.0037594 ]\n",
|
|
|
|
" [-0.05586229 -0.02372516]]\n",
|
|
|
|
"[0 2]\n"
|
|
|
|
]
|
|
|
|
},
|
2022-10-19 09:02:34 +02:00
|
|
|
{
|
|
|
|
"data": {
|
2022-11-10 08:51:50 +01:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGiCAYAAADulWxzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGdklEQVR4nO3dfVhUdcI//vcwMoOkaJgCKj5vPpXPiuheKquE6Tfj/u66rfVdtTVbVzFwVMK92tSyMFRATdf6eif3tnnX1pXUtzWNEPQ2EfOBX+qWJZq0BViakJAzI3N+f9BMDMzDOcM5M2fOvF/XxWVz+JyZz2cGlvd+HnWCIAggIiIi0qCwQFeAiIiISCkMOkRERKRZDDpERESkWQw6REREpFkMOkRERKRZDDpERESkWQw6REREpFkMOkRERKRZDDpERESkWQw6REREpFmKBp2//vWvGDFiBKKiohAVFYXExES8//77Hu958803MWTIEERERODee+/F/v37lawiERERaZiiQad3797YuHEjTp06hZMnT+JXv/oVHnzwQZw/f95l+WPHjmHevHlYtGgRzpw5g9TUVKSmpuLcuXNKVpOIiIg0SufvQz2jo6OxadMmLFq0qM33HnroITQ0NOC9995zXJs4cSJGjRqFXbt2+bOaREREpAEd/PVCTU1NePPNN9HQ0IDExESXZcrKymAymZyupaSkoLCw0O3zms1mmM1mx2ObzYbr16+jW7du0Ol0stSdiIiIlCUIAn744Qf07NkTYWHyDTgpHnTOnj2LxMRE3Lp1C506dcK+ffswbNgwl2VramoQExPjdC0mJgY1NTVunz87Oxvr16+Xtc5EREQUGF999RV69+4t2/MpHnQGDx6MiooK1NXV4a233sKCBQtw+PBht2FHqjVr1jj1AtXV1aFPnz74/PPPER0dLctrBAOr1YqSkhIkJSUhPDw80NXxG7ab7Q4FbDfbHQquX7+Ou+++G507d5b1eRUPOgaDAYMGDQIAjB07Fh9//DG2bt2Kl156qU3Z2NhY1NbWOl2rra1FbGys2+c3Go0wGo1trkdHR6Nbt27trH3wsFqtiIyMRLdu3ULqF4PtZrtDAdvNdocSuaed+H0fHZvN5jSnpqXExEQUFxc7XSsqKnI7p4eIiIjIE0V7dNasWYP7778fffr0wQ8//IC9e/eitLQUBw8eBADMnz8fvXr1QnZ2NgAgPT0dU6dOxZYtWzB79my8/vrrOHnyJF5++WUlq0lEREQapWjQuXr1KubPn4/q6mp06dIFI0aMwMGDB5GcnAwAqKqqcppZPWnSJOzduxdPPfUU/vznP+MXv/gFCgsLcc899yhZTSIiItIoRYPOf/7nf3r8fmlpaZtrc+fOxdy5cxWqEREREYUSnnVFREREmsWgQ0RERJrFoENERESaxaBDREREmsWgQ0RERJrFoENERESaxaBDREREmsWgQ0RERJrFoENERESaxaBDREREmsWgQ0RERJrFoENERESaxaBDREREmsWgQ0RERJrFoENERESa1SHQFSAiIj+zWICdO4HKSmDgQGDpUsBgCHStiBTBoENEFEoyM4HcXKCp6edrq1YBJhOQkxO4ehEphEGHiChUZGYCmza1vd7U9PN1hh3SGM7RISIKBRZLc0+OJ7m5zeWINIRBh4goFOzc6Txc5UpTU3M5Ig1h0CEiCgWVlfKWIwoSDDpERKFg4EB5yxEFCQYdIqJQsHQpoNd7LqPXN5cj0hAGHSKiUGAwNC8h98Rk4n46pDlcXk5EFCrsS8db76Oj13MfHdIsBh0iolCSkwNs2MCdkSlkMOgQEYUagwHIyAh0LYj8gnN0iIiISLMYdIiIiEizGHSIiIhIsxh0iIiISLMYdIiIiEizGHSIiIhIsxh0iIiISLMYdIiIiEizFA062dnZGD9+PDp37owePXogNTUVFy5c8HhPQUEBdDqd01dERISS1SQiIiKNUjToHD58GMuWLcPx48dRVFQEq9WK++67Dw0NDR7vi4qKQnV1tePrypUrSlaTiIiINErRIyAOHDjg9LigoAA9evTAqVOnMGXKFLf36XQ6xMbGKlk1IiIiCgF+Peuqrq4OABAdHe2x3M2bN9G3b1/YbDaMGTMGzz//PIYPH+6yrNlshtlsdjyur68HAFitVlitVplqrn72toZSmwG2m+0ODWw32x0KlGqvThAEQZFnbsVms2HOnDm4ceMGjh496rZcWVkZvvjiC4wYMQJ1dXXYvHkzjhw5gvPnz6N3795tyq9btw7r169vc33v3r2IjIyUtQ1ERESkjMbGRjz88MOoq6tDVFSUbM/rt6Dzpz/9Ce+//z6OHj3qMrC4Y7VaMXToUMybNw/PPvtsm++76tGJj49HdXU1unXrJkvdg4HVakVRURGSk5MRHh4e6Or4DdvNdocCtpvtDgXXrl1DXFyc7EHHL0NXaWlpeO+993DkyBFJIQcAwsPDMXr0aFy8eNHl941GI4xGo8v7QukHxI7tDi1sd2hhu0NLqLVbqbYquupKEASkpaVh3759OHToEPr37y/5OZqamnD27FnExcUpUEMiIiLSMkV7dJYtW4a9e/finXfeQefOnVFTUwMA6NKlCzp27AgAmD9/Pnr16oXs7GwAwDPPPIOJEydi0KBBuHHjBjZt2oQrV67gscceU7KqREREpEGKBp2//vWvAIBp06Y5Xd+zZw8WLlwIAKiqqkJY2M8dS99//z0WL16Mmpoa3HnnnRg7diyOHTuGYcOGKVlVIiIi0iBFg46Yec6lpaVOj/Py8pCXl6dQjYiISJMsFmDnTqCyEhg4EFi6FDAYAl0rUgG/7qNDREQku8xMIDcXaGr6+dqqVYDJBOTkBK5epAoMOkREFLwyM4FNm9peb2r6+TrDTkjj6eVERBScLJbmnhxPcnOby1HIYtAhIiJ1s1iA/Hxg+fLmf+3BZedO5+EqV5qamstRyOLQFRERqdawggJ0+N//G7DZfr5on3/z44/inqSyUpnKUVBg0CEiIlUKy8rCoMLCtt+wz79JShL3RAMHylovB670CgoMOkREpD4WC8Ly8wEAOndlDh8GwsKce3ta0+ubA0iL55UlnHClV9DgHB0iIlKfnTuhs9nchxygOeBMner5eUymn4NMZiYQGQmsWAG8+GLzv5GRzdelsK/0aj0/yN7TJPX5SFEMOkREwcbd5FwtETuvZvhwYPXq5p6blvT65uv23hW5wglXegUdDl0REQUTLQ+ZtBxW+ve/xd0zcCCQkQFs2OB+SEpsONmwwfUwVut6iV3plZEhrg2kKAYdIqIgEZaV5foPtlo3x5MyH8ZFgLMfIuR2+Krl/BuDwX2wkLIMvfVzuAqWYnCll2pw6IqIKBi0mJzrlpqGTKTMh3EzrGQPOG5PTZwyRVxdxIaO1uXcDXeJodRKL5KMQYeIKAgMOHAAOk+riwD1bI4nZT6MmGEld0pKxE0mFhs6WpZrT71ar/SigGLQISIKAnfU1IgrGOghE6mTdb0MK+ngYegKEDeZeOnStpOVW2sdTsQMd7nTcqUXBRyDDhFREGiIjRVXMNBDJlKPZZArmHkatjMYmsOHJ63DiS/10umcV3qRKjDoEBEFgUszZ0II8/I/2WoYMpE6H0auYOZt2C4nR9wydDtf6qXTNa/cIlVh0CEiCgYGA2zeliurYchE6nyYpUubA4IcvIWsnBygsRHIywPS0pr/bWx03QMjZrirNZtNHXOkyAmDDhFRkLBt3CitVyIQpM6HMRiaV2R54HbVVWtiQpZ9Gfr27c3/uguGYoa7XAn0HClqg/voEBEFk5wcz5vjBZo9INj39XElPd25/tnZwP/8D/Dxx22KOkKOTgcIHiKPEsN29uAoZR+dQM+RojYYdIiIgo2nzfHUwF1A0OuBMWOArVtd7+z8y182H2nRItAIYWEQMjKg1+s9hyelhu3swXLrVu/L2NUwR4raYNAhIiL5uep5qqpqnhfTmn2J+OrVwK1bjnua+vXDe336YFZqKvTh4c1lXYUnpY6/aL2z84oVrutvp4Y5UtQGgw4RESmjZc+TxdK8uZ8n9vO
|
2022-10-19 09:02:34 +02:00
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2022-11-10 08:51:50 +01:00
|
|
|
"visualisation(clusters, Pc, dim=dim, K=K)\n",
|
|
|
|
"print(Pc)\n",
|
|
|
|
"print(mean)"
|
2022-10-19 09:02:34 +02:00
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3.8.10 64-bit",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.8.10"
|
|
|
|
},
|
|
|
|
"orig_nbformat": 4,
|
|
|
|
"vscode": {
|
|
|
|
"interpreter": {
|
|
|
|
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|