{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# TP1 KMEANS\n", "\n", "On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n", "Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur." ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import scipy.spatial\n", "from skimage import io" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "# mean = [1,2,3,4]\n", "# sd = [0.25, 0.25, 0.1, 0.2]\n", "clusters = 5\n", "dim = 2\n", "nb = 50\n", "K= clusters\n", "mean = np.random.randint(5, size=clusters)\n", "mean = mean.T * np.random.random(size=clusters)\n", "sd = np.random.random(size=clusters)\n", "path_image = \"fruits.jpg\"\n", "# print(mean)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fonctions à utiliser pour le clustering" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n", " size = []\n", " # for i in range(0,dim):\n", " size.append(nb)\n", " size.append(dim)\n", " points = np.random.normal(mean[0],sd[0],size=size)\n", " for i in range(1,clusters):\n", " points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n", " \n", " return points, mean" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [], "source": [ "def distance(points,Pc): \n", " return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])" ] }, { "cell_type": "code", "execution_count": 192, "metadata": {}, "outputs": [], "source": [ "def kmeans(points = [0,0], K = 1):\n", " # Initialisation K prototypes\n", " dim = points.shape[1]\n", " N = points.shape[0]\n", " iter = 0\n", " eps = 0.1\n", " Pc_index = []\n", " Pc_save = np.zeros([K,dim])\n", " clusters = []\n", "\n", " for i in range(0,K):\n", " Pc_index.append(np.random.randint(0,N))\n", " Pc = points[Pc_index,:]\n", "\n", " while (np.mean(distance(Pc,Pc_save)) > eps and iter < 3):\n", " iter += 1\n", " Pc_save = Pc\n", " # print(Pc)\n", " # print(points[:,:Pc.shape[0]])\n", " dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n", " clust = np.argmin(dist, axis=1)\n", " clust = np.expand_dims(clust, axis=0)\n", " points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n", " # print(points)\n", " Pc = np.zeros([K,dim])\n", " index = np.array([])\n", "\n", " for n in range(0,N):\n", " for k in range(0,K):\n", " index = np.append(index, (clust==k).sum())\n", " if points[n,-1] == k:\n", " # print(points)\n", " # print(Pc)\n", " Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n", "\n", " for k in range(0,K):\n", " Pc[k,:] = np.divide(Pc[k,:],index[k])\n", "\n", " # print(Pc)\n", " indice = points[:,-1]\n", " points = points[:,:-1]\n", " return Pc, indice, points\n" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [], "source": [ "colors=['red', 'green','yellow','blue','purple', 'orange']\n", "def visualisation(points, index, Pc=[0,0], K=1):\n", " if(points.shape[1]==2):\n", " # for k in range(0,K):\n", " for n in range(0,len(points)):\n", " plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n", " plt.plot(Pc[:,0],Pc[:,1],'r+')\n", " plt.grid(True)\n", " plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])" ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [], "source": [ "def img_2_mat(my_img):\n", " mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n", " return mat" ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [], "source": [ "def mat_2_img(mat,my_img):\n", " img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n", " return img_seg" ] }, { "cell_type": "code", "execution_count": 196, "metadata": {}, "outputs": [], "source": [ "def kmeans_image(path_image, K):\n", " my_img = io.imread(path_image)\n", " imgplot = plt.imshow(my_img)\n", " Mat = img_2_mat(my_img)\n", " \n", " Pc, index, clusters = kmeans(Mat, K)\n", "\n", " for k in range(Mat.shape[0]):\n", " Mat[k,:] = np.floor(Pc[index[k],:])\n", "\n", " img_seg = mat_2_img(Mat, my_img)\n", "\n", " io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n", " imgplot = plt.imshow(img_seg)\n", " return Pc, index, img_seg\n" ] }, { "cell_type": "code", "execution_count": 197, "metadata": {}, "outputs": [], "source": [ "points, mean = gen_points(mean,sd,nb,dim,clusters)\n", "# print(points.shape)\n", "# print(points.mean(axis=0))\n", "# print(points)" ] }, { "cell_type": "code", "execution_count": 198, "metadata": {}, "outputs": [], "source": [ "dist = distance(points,points)\n", "# print(dist)" ] }, { "cell_type": "code", "execution_count": 199, "metadata": {}, "outputs": [], "source": [ "Pc, index, clusters = kmeans(points,K=K)\n", "# print(index)\n", "# print(clusters)\n" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "visualisation(clusters, index, Pc, K=K)\n", "# print(Pc)\n", "# print(mean)" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "Pc, index, img_seg = kmeans_image(path_image=path_image, K=250)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(103230,)\n", "[[252.69560225 251.67998184 249.86355505]\n", " [ 69.54698482 50.76095445 17.63930586]\n", " [228.4210873 174.61140875 69.5397857 ]\n", " [154.87751731 83.70097198 45.64258942]\n", " [ nan nan nan]]\n", "[[[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]\n", "\n", " [[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]\n", "\n", " [[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]\n", "\n", " ...\n", "\n", " [[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]\n", "\n", " [[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]\n", "\n", " [[252 251 249]\n", " [252 251 249]\n", " [252 251 249]\n", " ...\n", " [252 251 249]\n", " [252 251 249]\n", " [252 251 249]]]\n" ] } ], "source": [ "print(index.shape)\n", "print(Pc)\n", "print(img_seg)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.4 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.4" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "2ef431f6525756fa8a44688585fa332ef3b2e5fcfe8fe75df35bbf7028a8b511" } } }, "nbformat": 4, "nbformat_minor": 2 }