diff --git a/D3/TP/TP1.ipynb b/D3/TP/TP1.ipynb deleted file mode 100644 index 1155c15..0000000 --- a/D3/TP/TP1.ipynb +++ /dev/null @@ -1,230 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# TP1 KMEANS\n", - "\n", - "On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n", - "Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur." - ] - }, - { - "cell_type": "code", - "execution_count": 379, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import scipy.spatial" - ] - }, - { - "cell_type": "code", - "execution_count": 380, - "metadata": {}, - "outputs": [], - "source": [ - "# mean = [1,2,3,4]\n", - "# sd = [0.25, 0.25, 0.1, 0.2]\n", - "clusters = 2\n", - "mean = np.random.randint(5, size=clusters)\n", - "sd = [0.25, 0.25, 0.3]\n", - "dim = 2\n", - "nb = 50\n", - "K= clusters" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fonctions à utiliser pour le clustering" - ] - }, - { - "cell_type": "code", - "execution_count": 381, - "metadata": {}, - "outputs": [], - "source": [ - "def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n", - " size = []\n", - " # for i in range(0,dim):\n", - " size.append(nb)\n", - " size.append(dim)\n", - " points = np.random.normal(mean[0],sd[0],size=size)\n", - " for i in range(1,clusters):\n", - " points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n", - " \n", - " return points" - ] - }, - { - "cell_type": "code", - "execution_count": 382, - "metadata": {}, - "outputs": [], - "source": [ - "def distance(points,Pc): \n", - " return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])" - ] - }, - { - "cell_type": "code", - "execution_count": 383, - "metadata": {}, - "outputs": [], - "source": [ - "def kmeans(points = [0,0], K = 1, nb=1, dim=2):\n", - " # Initialisation K prototypes\n", - " Pc_index = []\n", - " Pc_save = np.zeros([K,dim])\n", - " clusters = []\n", - " iter = 0\n", - " eps = 0.1\n", - "\n", - " for i in range(0,K):\n", - " Pc_index.append(np.random.randint(0,nb*dim))\n", - " Pc = points[Pc_index,:]\n", - "\n", - " while (np.mean(distance(Pc,Pc_save)) > eps and iter < 10):\n", - " iter += 1\n", - " Pc_save = Pc\n", - " # print(Pc)\n", - " # print(points[:,:Pc.shape[0]])\n", - " dist = distance(points=points[:,:Pc.shape[0]],Pc=Pc)\n", - " clust = np.argmin(dist, axis=1)\n", - " clust = np.expand_dims(clust, axis=0)\n", - " points = np.append(points[:,:Pc.shape[0]], clust.T, axis=1)\n", - " # print(points)\n", - " Pc = np.zeros([K,dim])\n", - " index = np.array([])\n", - "\n", - " for n in range(0,2*nb):\n", - " for k in range(0,K):\n", - " index = np.append(index, (clust==k).sum())\n", - " if points[n,-1] == k:\n", - " # print(points)\n", - " # print(Pc)\n", - " Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n", - "\n", - " for k in range(0,K):\n", - " Pc[k,:] = np.divide(Pc[k,:],index[k])\n", - "\n", - " # print(Pc)\n", - " return Pc, points\n" - ] - }, - { - "cell_type": "code", - "execution_count": 384, - "metadata": {}, - "outputs": [], - "source": [ - "colors=['red', 'green','yellow','blue','purple', 'orange']\n", - "def visualisation(points, Pc=[0,0], dim=2, K=1):\n", - " if(dim==2):\n", - " for k in range(0,K):\n", - " for n in range(0,len(points)):\n", - " plt.plot(points[n,0], points[n,1], 'o', color=colors[int(points[n,-1])])\n", - " plt.plot(Pc[:,0],Pc[:,1],'r+')\n", - " plt.grid(True)\n", - " plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])" - ] - }, - { - "cell_type": "code", - "execution_count": 385, - "metadata": {}, - "outputs": [], - "source": [ - "points = gen_points(mean,sd,nb,dim,clusters)\n", - "# print(points.shape)\n", - "# print(points.mean(axis=0))\n", - "# print(points)" - ] - }, - { - "cell_type": "code", - "execution_count": 386, - "metadata": {}, - "outputs": [], - "source": [ - "dist = distance(points,points)\n", - "# print(dist)" - ] - }, - { - "cell_type": "code", - "execution_count": 387, - "metadata": {}, - "outputs": [], - "source": [ - "Pc, clusters = kmeans(points,K=K,nb=nb,dim=dim)\n", - "# print(Pc)\n", - "# print(clusters)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 388, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 2.00659379 2.0037594 ]\n", - " [-0.05586229 -0.02372516]]\n", - "[0 2]\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "visualisation(clusters, Pc, dim=dim, K=K)\n", - "print(Pc)\n", - "print(mean)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.8.10 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}