M2_SETI/D3/TP/TP_SETI_Kmeans/TP1.ipynb
2023-01-05 20:28:08 +01:00

402 lines
22 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TP1 KMEANS\n",
"\n",
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import scipy.spatial\n",
"from skimage import io"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {},
"outputs": [],
"source": [
"clusters = 2\n",
"dim = 2\n",
"nb = 50\n",
"K = clusters\n",
"mean = np.random.randint(5, size=clusters)*2\n",
"mean = mean.T * np.random.random(size=clusters)\n",
"sd = np.random.random(size=clusters)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fonctions à utiliser pour le clustering"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
" \"\"\" Generates data\n",
" dim: dimension\n",
" nb: number of points\n",
" clusters: number of clusters\n",
" mean: mean\n",
" sd: standard deviation \n",
" \"\"\"\n",
" size = []\n",
" # for i in range(0,dim):\n",
" size.append(nb)\n",
" size.append(dim)\n",
" points = np.random.normal(mean[0],sd[0],size=size)\n",
" for i in range(1,clusters):\n",
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
" \n",
" return points, mean"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {},
"outputs": [],
"source": [
"def distance(points,Pc): \n",
" \"\"\" Returns spatial distance between two matrix\n",
" \"\"\"\n",
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"def kmeans(points = [0,0], K = 1):\n",
" \"\"\" Create K clusters from points\n",
" \"\"\"\n",
" # Initialisation K prototypes\n",
" dim = points.shape[1]\n",
" N = points.shape[0]\n",
" iter = 0\n",
" eps = 0.1\n",
" Pc_index = []\n",
" Pc_save = np.zeros([K,dim])\n",
" clusters = []\n",
"\n",
" for i in range(0,K):\n",
" Pc_index.append(np.random.randint(0,N))\n",
" Pc = points[Pc_index,:]\n",
"\n",
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 3):\n",
" iter += 1\n",
" Pc_save = Pc\n",
" # print(Pc)\n",
" # print(points[:,:Pc.shape[0]])\n",
" dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n",
" clust = np.argmin(dist, axis=1)\n",
" clust = np.expand_dims(clust, axis=0)\n",
" points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n",
" # print(points)\n",
" Pc = np.zeros([K,dim])\n",
" index = np.array([])\n",
"\n",
" for n in range(0,N):\n",
" for k in range(0,K):\n",
" index = np.append(index, (clust==k).sum())\n",
" if points[n,-1] == k:\n",
" # print(points)\n",
" # print(Pc)\n",
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
"\n",
" for k in range(0,K):\n",
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
"\n",
" # print(Pc)\n",
" indice = points[:,-1]\n",
" points = points[:,:-1]\n",
" return Pc, indice, points\n"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
"def visualisation(points, index, Pc=[0,0], K=1):\n",
" \"\"\"Visualisation function of a dataset and its K clusters\n",
" \"\"\"\n",
" if(points.shape[1]==2):\n",
" # for k in range(0,K):\n",
" for n in range(0,len(points)):\n",
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n",
" plt.plot(Pc[:,0],Pc[:,1],'ko')\n",
" plt.grid(True)\n",
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [],
"source": [
"def img_2_mat(my_img):\n",
" \"\"\" Reshaping 3D img NxMx3 to 2D matrix N*Mx3\n",
" \"\"\"\n",
" mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n",
" return mat"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def mat_2_img(mat,my_img):\n",
" \"\"\" Reshaping 2D matrix N*Mx3 to 3D img NxMx3 \n",
" \"\"\"\n",
" img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n",
" return img_seg"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"def kmeans_image(path_image, K):\n",
" \"\"\" Clustering an image and changing pixels to its closest cluster\n",
" \"\"\"\n",
" my_img = io.imread(path_image)\n",
" imgplot = plt.imshow(my_img)\n",
" Mat = img_2_mat(my_img)\n",
" \n",
" Pc, index, clusters = kmeans(Mat, K)\n",
"\n",
" for k in range(Mat.shape[0]):\n",
" Mat[k,:] = np.floor(Pc[index[k],:])\n",
"\n",
" img_seg = mat_2_img(Mat, my_img)\n",
"\n",
" io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n",
" imgplot = plt.imshow(img_seg)\n",
" return Pc, index, img_seg\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exemple\n",
"### Clusterisation 2D\n",
"On fait la clusterisation d'un exemple simple avec 2 nuages de points éloignés"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD5CAYAAADcDXXiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAgeklEQVR4nO3df4zc9X3n8efbi7PBWWIUgzcI8GxzNCgUJ4S1Uleprl7jSJSmoPao4mpLzAlrj5C7hGtXl0sskQZl1fZulUQcRyLXRDj1XjYnklRgOTplzXIEqdDaOX6YECihtgNFdjCJycZkBd73/THftdfj78x8Z+b7ne/n+53XQxp59jvfmX3Pl+U9n3l/3t/P19wdEREpl2V5ByAiIulTchcRKSEldxGRElJyFxEpISV3EZESUnIXESmhc5LuaGZ9wD7gZXf/aM1j/cA3gGHgGPAxdz/Y6PUuuOACHxoaajXeXPzqV7/iHe94R95htE3x50vx56fIsUN8/Pv373/V3S9s9tzEyR34NPAs8M6Yx24Bfu7ul5nZZuBvgI81erGhoSH27dvXwq/Pz8MPP8yGDRvyDqNtij9fij8/RY4d4uM3s0NJnpuoLGNmlwB/AOyos8sNwM7o/v3ANWZmSV5bRETSZ0nOUDWz+4G/As4DxmPKMgeAa939pejnnwC/7e6v1uw3BowBDA4ODk9PT6fyJrI2NzfHwMBA3mG0TfHnS/Hnp8ixQ3z8IyMj+919XdMnu3vDG/BR4J7o/gZgd8w+B4BLlvz8E+CCRq87PDzsRTE7O5t3CB1R/PlS/Pkpcuzu8fED+7xJ3nb3RGWZDwPXm9lBYBrYaGa7avZ5GbgUwMzOAVZSnVgVEZEcNE3u7v5Zd7/E3YeAzcBD7v5nNbs9AGyJ7t8Y7aMVyUREctJKt8wZzOxOql8PHgDuBf7OzF4AXqP6ISAiIjlpKbm7+8PAw9H9O5Zs/zXwJ2kGJiIi7dMZqiIiJaTkLiJSQkruIgUx9fQUQ18ZYtkXljH0lSGmnp7KOyQJWNsTqiLSPVNPTzH24Bgn3jwBwKHjhxh7cAyA0bWjeYYmgdLIXUqh7KPabXu3nUrsi068eYJte7flFJGETiN3KbxeGNUePn64pe0iGrlL4fXCqHbNyjUtbRdRcpfCK+qotpVS0sQ1E6xYvuKMbSuWr2Dimomsw5SCUnKXwiviqHaxlHTo+CEcP1VKqpfgR9eOsv0Pt1NZWcEwKisrbP/D7aUpO0n6lNyl8Io4qm2nlDS6dpSDtx9k4fMLHLz9oBK7NKTkLoVXxFFtUUtJUhzqlpFSGF07GnQyr7Vm5RoOHT/7amkhl5KkWDRyF8lByKWk2onemSMzeYckbVByF8lBqKWkuIneyecnS3dSWC9QcpeeEdpZrCFOkMZN9M4vzLPlu1uCOW6SjJK79IRWWw/b/R0hfXi0o96E7kk/mdlxk2wouUtPyPos1m58eHRDkgndsp39W1ZK7tITsm49LMsSCHETvXHUshk+JXfpCVmfxVqWvvXaid4+64vdTy2b4VNyl56QdethEZdAqGfpRO/OP9pJ/7L+Mx4PpWVTGlNyl56QdethKH3raU/qjq4dZfy948G1bEpzOkNVekaWZ7Euvu62vds4fPwwa1auYeKaCUbXjvLwww9n8jtrZbWu/abBTXzxY19MJUbpnqbJ3czeDjwC9Ef73+/un6/Z52bgvwMvR5vudvcd6YYqEra8l0BoNKmrkXbvSTJynwc2uvucmS0HHjWz77n7YzX7fcvd/2P6IYpIEmWZ1JV0NK25e9Vc9OPy6OaZRiUiLSvTpK50ztyb52kz6wP2A5cB/9PdP1Pz+M3AXwE/A54H/rO7/zTmdcaAMYDBwcHh6enpTuPvirm5OQYGBvIOo22KP1/din/myAyTz08yvzB/alv/sn7G3zvOpsFNbb9ukY9/kWOH+PhHRkb2u/u6pk9298Q34HxgFriyZvsqoD+6/x+Ah5q91vDwsBfF7Oxs3iF0RPHnq5vx73pql1e+XHH7S/PKlyu+66ldHb9mkY9/kWN3j48f2OcJ8nVL3TLu/gszmwWuBQ4s2X5syW47gP/WyuuKSDryntSVcDStuZvZhWZ2fnT/XOAjwI9r9rloyY/XA8+mGKOIFFgZFlQroiQnMV0EzJrZU8A/Ad93991mdqeZXR/t8ykze8bMngQ+BdycTbgiUiRJF1TTB0D6knTLPOXuH3T397v7le5+Z7T9Dnd/ILr/WXf/LXf/gLuPuPuPG7+qSO/qpUSWZEG1sqyoGRotPyDSRSEmsiw/bJL03pdlRc3QKLmLdFFoiSzrD5skvfc6+SobSu4iXRRaIsv6wybJgmo6+SobSu4iXRRaIsv6wybJapxlXVEzb1oVUqSLJq6ZOGPlRsh3ffQ1K9dw6Pih2O1padZ732hFzW7JakXNPGnkLtJFWa8r36pQRs1LLxBy8PaDDY9HFiPs0OZC0qCRu0iXhXQWaQij5lZkNcIObS4kDRq5S2mVrYaaVKvvu5VRc96yGmHXK0O969x3FfZvSMldSinEfvJuKPP7nnp6KnZ+ADofYceVp97W9zZen3+9sMdSyV1KqYw11CTK+r4XP7Tq6XQCOG4u5Ly3ncebC2+esV+RjqWSu5RSGWuoSYT8vjspk8V9aC1KawK4tjz12huvxe4XwrFMQsldSim0fvJuSeN91ybhmSMzHcfVabmoUULNqtuo6H9DSu5SSqG0+CWR5sRvp+87LglPPj/ZcZ2503JRvYRaWVnJbAK4SH9DcZTcpZRC6yevJ+0J0E7fd1wSnl+Y77jO3Gm5KI9EW5S/oXrU5y6lFVI/eT2NRrTtxt7J+86qZt/pmbB59eMX4W+oHiV3kRyFNgGa1XIEaSy7UOREmweVZURyFNqkXVz5o39Zf8flj6KXOIpIyV0kR6FN2sUl4WsHr2Xb3m0dT/gW6UzYMlBZRiRHIa7tsrT8MfX0FLf8/S3ML8wD5VgtsVdo5C6Ss5BHtNv2bmP+iXn4MvCXwJfhxP7unKUZ2tpAocXTjJK7SEmlkYwO/eAQPAgcjzYcBx6MtmcotDVyQosnCSV3kRJKKxn1zfbBmzUb34y2Zyi0NXJCiyeJpsndzN5uZv9oZk+a2TNm9oWYffrN7Ftm9oKZPW5mQ5lEKyKJpJWMTv7iZEvb0xJai2ho8SSRZOQ+D2x09w8AVwHXmtn6mn1uAX7u7pdRrc79TapRikhL0kpGlTWVlranJbQW0dDiSaJpcvequejH5dHNa3a7AdgZ3b8fuMbMLLUoRaQlaSWjiYkJ+vv7z9i2YsUKJiaybdUMrUU0tHiSSFRzN7M+M3sCOAp8390fr9nlYuCnAO7+FtVpl1UpxikiLUgrGY2OjjI+Pk6lUsHMqFQqbN++ndHR7E/7D+mkp9DiScLcawfhDXY2Ox/4LvCf3P3Aku0HgGvd/aXo558Av+3ur9Y8fwwYAxgcHByenp7u+A10w9zcHAMDA3mH0TbFn6+84p85MsOOf9nB0fmjrO5fzdbf2MqmwU0tv06Rj3+RY4f4+EdGRva7+7qmT3b3lm7AHcB4zbb/A/xOdP8c4FWiD456t+HhYS+K2dnZvEPoiOLPl+LPT5Fjd4+PH9jnCXJ1km6ZC6MRO2Z2LvAR4Mc1uz0AbInu3wg8FAUhIhkq2ok10j1Jau4XAbNm9hTwT1Rr7rvN7E4zuz7a515glZm9APw58F+zCVekdzRL3EU8sUa6p+naMu7+FPDBmO13LLn/a+BP0g1NpHctJu7FXvW4NV2yWAteykNnqIoEKMlJSEU8saYZlZnSo+QuEqAkiTvtE2vyTqwqM6VLyV0kQEkSd5on1oSQWIu4fkvIlNxFApQkcbdzYk290XmeiXUxprjL+0Gxy0x50sU6RAKU9CIerVxXtNEkbb0Eeuj4ITY/tpmj//doJhcSqY0pTsjrt4RMyV0kUGlfELrR6LzehbEN48j8ESCbqzDFxbRU6Ou3hExlGZEe0WiSNq4MZBhes0Zg2qWaRiWXPutjywe2FL6tM6+JaiV3KZW8Oz5C1miSNq5+X5vYF6VZA29UcjnpJ9n55M5C/zfMc6JayV1KI4SOj5A1m6StvZZrZWX8mu1p1sDjYlqq6N0yeU5UK7lLaaiVrrFWu2u6sYb50pjqKXK3TJ4nmim5S2mU8YzNtNWOzhvVsxcT72D/YKZrmC/G1Mo3hXbLb90u2+V5BScldymNIl4KLXSja0eZXj+d6MOgU0m/KbRbfsujbJfnFZyU3KU0ingpNDktadmo3fJbHmW7PK/gpD53KY2kJ/6EaOrpqULGnbYkvf3tlt/yKtulfb5CUkruUip5/Y/UiSTL+8pp9U64alZ+a/d5RaWyjEiOpp6eYst3t6jLpwXtlt96rWyn5C6Sk8UR+0k/Gft4vXJBr5+o1W4dO8/6dx5UlhHJSbN1Veq1AKqE0375rYhlu3Zp5C6Sk0YTefXKBSGfqNXr3yhCo+QukpN6E3l91le3XBDqiVpa+iE8Su4iOak3wbfzj3aekdiXjoiXWfz/snl3fLTyjUIj/O5QzV0kJ0n68mtr7HGTryF0fCT9RqE5g+5RchfJUbMJvnqTrn3Wx4IvBHPCU9Ie8kYj/LzfQ9k0LcuY2aVmNmtmPzKzZ8zs0zH7bDCz42b2RHS7I5twRXpLvRHxgi90Zb2XpJL2kIc6Z1BGSWrubwF/4e5XAOuBT5rZFTH7/cDdr4pud6YapUiPKspiaEl7yIvyfsqgaXJ391fc/YfR/V8CzwIXZx2YiBTrrMokywkX6f0UnbnHX0ordmezIeAR4Ep3f33J9g3At4GXgH8Fxt39mZjnjwFjAIODg8PT09MdhN49c3NzDAwM5B1G2xR/vjqNf+bIDDv+ZQdH54+yun81W39jK5sGN6UYYWNpH/9uvp8y/u2MjIzsd/d1TZ/s7oluwACwH/jjmMfeCQxE968D/rnZ6w0PD3tRzM7O5h1CRxR/vhR/foocu3t8/MA+T5CzE/W5m9lyqiPzKXf/TswHxOvuPhfd3wMsN7MLkry2iGRD/eS9rWkrpJkZcC/wrLt/qc4+7waOuLub2Yeo1vKPpRqpiCSmfnJJMnL/MHATsHFJq+N1Znarmd0a7XMjcMDMngTuAjZHXx9EJAchr0HTCX0bSa7pyN3dHwWsyT53A3enFZSIdKaM/eT6NtIarS0jUkJl7Ccv67eRrCi5i5RQGfvJy/htJEtK7iIlVMarDpXx20iWtHCYSEmV7apDE9dMnFFzh+J/G8mSRu4iUghl/DaSJY3cRaQwyvZtJEsauYuIlJCSu4hICSm5i4iUkJK7iEgJKbmLFJzWW5E46pYRKTCttyL1aOQunZmagqEhWLas+u+URo3dpPVWpB6N3KV9U1MwNgYnouRy6FD1Z4BRjRq7QeutSD0auUv7tm07ndgXnThR3S5dofVWpB4ld2nf4Tqjw3rbJXVlXP1R0qHkLu1bU2d0WG+7pE7rrUg9Su69IouJz4kJWHHmqJEVK6rbpWtG145y8PaDLHx+gYO3H1RiF0DJvSesnpmpTnQeOgTupyc+O03wo6OwfTtUKmBW/Xf7dk2migRAyb0HvGfHjuwmPkdH4eBBWFio/qvELhIEJfeyWlKG6T9yJH4fTXyKlJb63Muopv/c6u2niU+R0mo6cjezS81s1sx+ZGbPmNmnY/YxM7vLzF4ws6fM7OpswpVE4vrPa2niU6TUkpRl3gL+wt2vANYDnzSzK2r2+X3gN6PbGPDVVKOU1jQqtxRw4nP1zIyWOBBpUdOyjLu/ArwS3f+lmT0LXAz8aMluNwDfcHcHHjOz883soui50m1r1lQ7YmpVKtVJzyKZmuLyyUmYn6/+rCUORBKxaj5OuLPZEPAIcKW7v75k+27gr9390ejnvcBn3H1fzfPHqI7sGRwcHJ6enu74DXTD3NwcAwMDufzu1TMzvGfHDvqPHmV+9Wpe3LqVo5s2NX3O5ZOT9C0mROBkfz/PjY83fW5o1m/ezNtjJoR/PTjIY/r76Yoix1/k2CE+/pGRkf3uvq7pk9090Q0YAPYDfxzz2G7gd5f8vBdY1+j1hoeHPVi7drlXKu5m7pWKP7NtW35xrFjhXu1Or95WrKhuT/Lc6D28MTiY7DkhMjvz/S/ezPKOLLHZ2dm8Q+hIkeMvcuzu8fED+zxBzk7UCmlmy4FvA1Pu/p2YXV4GLl3y8yXRtuJZ7DRZcsLP5ZOT+dR5O1mYa0n/+WPT08UtYWiJg9zNHJnRxUAKKEm3jAH3As+6+5fq7PYA8PGoa2Y9cNyLWm+PSah98/P5rHSohblgYoKT/f1nblOnT9dMPT3F5POTHDp+CMdPXQxECT58SUbuHwZuAjaa2RPR7Tozu9XMbo322QO8CLwA/C1wWzbhdkFICVWjVhgd5bnxcS1xkJNte7cxvzB/xjZdDKQYmiZ3d3/U3c3d3+/uV0W3Pe7+NXf/WrSPu/sn3f3fuPtar5lILZSQEmqvLcxVZ3Gzo5s2aYmDnOhiIMWl5QdqxSTUk/39+STUXlqYK2auI5XFzaQjuhhIcSm514pJqM+Nj+eXUHtlYS5d1SlIE9dM0L/szDkPXQykGJTc49Qk1KL1hgejlTXkQ5rrkFNG144y/t5xXQykgLRwmGSj1Ytn1zurtpcmjwO1aXATX/zYF/MOQ1qkkbtko9UyS69NHotkTMldstFqmaWXJo9FukDJPQ9ZXM80NO20lPbK5LFIFyi5d1u7LX9F+0BoVmYp2vsRKRgl925rp+Uv1B7wRgm6UZkl1PcjUiJK7t3WTstfiD3gSRJ0vTJLiO9HpGSU3LutnVp0iD3gnSToEN+PSMkouXdbOy1/Wa53c9ttcM451dLJOedUf06ikwQd0vo9IiWl5N5t7bT8ZdUDfttt8NWvwsmT1Z9Pnqz+fN55Z9fRa+vr73pX/Gs2S9BTUzA3d/Z29bSLpErJPY+ujVZb/pp9ILT7HrZvj98+N3dmHf22286ur//852c/r1mCXqzTHzt25vZVq9TTLpKy3l5+oNVT5PM0OhofUyfvYXHE3siJE9XEW7vvwsKZP5vBli2Nf2dcnR5gYCC84y1ScL09ci9D10Yn76GvL9nvSPIh4A579jTeRxOpIl3T28m9DMmmk/ewOMLPOpZFmkgV6ZreTu5lSDadvId77oFPfOL0CH6xYybtWBZpcTCRrunt5F6GZNPpe7jnHnjrrWpZZWEB7rvv9MRt0rJN0t+ZtFOoZoJ49cxM8jhEBOj15N5OW2Joa6KkvZri0k6e2knTelr5nc06hWLOfL18cjL/4yxSML3dLQP1u1DihNpd08p7aEW9C2gsValUk3RaYiaI++bnq9vVUSOSWG+P3FtVhu6aVsSVfJbKooRVhklukQA0Te5m9nUzO2pmB+o8vsHMjpvZE9HtjvTDDESvJZ7aks+qVdVblhfTKMMkt0gAkozc7wOubbLPD9z9quh2Z+dhBaoXE8/SGvmrr1ZvWV5MI+bbwsn+/mJNcosEoGlyd/dHgNe6EEv4StBdMzU1xdDQEMuWLWNoaIip0CYqYyaInxsfz6feHtrkuUgLzN2b72Q2BOx29ytjHtsAfBt4CfhXYNzdn6nzOmPAGMDg4ODw9PR0u3F31dzcHAMDAwCsnpnhPTt20H/0KPOrV/Pi1q0c3bQp5wgbW4x/ZmaGyclJ5ufnTz3W39/P+Pg4m7rwHto9dkuPf7esnpnh8snJ6mRu5GR/P8+Nj7f83zuP+NNU5PiLHDvExz8yMrLf3dc1fbK7N70BQ8CBOo+9ExiI7l8H/HOS1xweHvaimJ2dzTuEjizGX6lUHDjrVqlU0vtlu3a5VyruZtV/d+06vX3FCvdqg2P1tmLF6ccTxN9VlcqZsS7e2jhWZfn7KaIix+4eHz+wzxPk2I67Zdz9dXefi+7vAZab2QWdvm7hBfiV/nCdid9621vW6OpMRes06rXJcymdjpO7mb3bzCy6/6HoNY81flbJBXqN0DV1Jn7rbW9ZowRetGTZi5PnUipJWiG/CfwDcLmZvWRmt5jZrWZ2a7TLjcABM3sSuAvYHH116F2BjlInrruO2q71FcDEZZel8y2jUQIvWrIsweS59LamZ6i6+582efxu4O7UIiqDQEepo9GSvNuAw8AaYAIYfeih6jcM6Oys23pntK5ZU02KS8/uhbCT5dKLeS9+OE1M6CxZKQydoZqFUEephw8zChwEFqJ/R+F0Yl/U7reMRqPdtNfA6YZWr5glEhAl9yzk/ZW+3qqKrXy4tPMto1kCT7JoWGCT0CJFpYXDspDnV/qYxc0un5yE970vvjRidvbIHdr/ltHuImaNFmW7+OL2YhHpYUruWclqpcZmGq2quLh647Zt1eTZ11e9hF5tgs+jFt5oEvq++7obi0gJKLmXTbPJ3MUPnKWjZPfTCb5SyWfiMNBJaJGiUnIvm0YdK4viRsmLiT3NtdlbkSRuEUlME6qd6mQSMMlzW339JKsqhjhKznsSWqRklNw70cmZqEme287rJ1lVMcRWzSK2SooETMm9E52ciZrkue2+fk3L4VmrGIY6SlZfuUhqei+5p9lL3Ul5I8lzsyqfaJQsUnq9ldzTXtCrk/JGkudmWT7RKFmk1Horuae9oFcn5Y0kzw21fCIiweut5J52maOT8kaS58bts2VL9cNIp+iLSAO91eeeRS91J2eiJnnu0n0anaKvsoqILNFbI/eilzkCXSe+KS0IJtJ1vZXci94lEuLJR80EelUqkbLrreQO4XaJJBndtto9E73m723cmN+IuajfNkQKrveSe4iSjm7jykpmcN11DV/T8hwxF/HbhkgJKLmHIOnodnS02i1TvR55lTvs3Hl20s5yxNxKDT3EpQ5EeoCSewhaGd3u2ZPssnhZjZhbraEXfRJbpKCU3EPQyug2adLOasTc6jeCok9iixSUknsIWhndJk3aWY2Y2/lGEOoktkiJNU3uZvZ1MztqZgfqPG5mdpeZvWBmT5nZ1emHWXKtjG6bJe3FevhNN8G558KqVXiaI2bV0EUKIcnI/T7g2gaP/z7wm9FtDPhq52H1oKSj20YfBLX18GPH4I03ePZzn0tvxKwaukghNE3u7v4I8FqDXW4AvuFVjwHnm9lFaQUoMep9ENSph79nx470zhJVDV2kEMxrOy/idjIbAna7+5Uxj+0G/trdH41+3gt8xt33xew7RnV0z+Dg4PD09HRn0XfJ3NwcAwMDeYfR1O9t3Fjtaa/hwEJ/P33z86e2nezv57nx8bMv5BGgohz/ehR/foocO8THPzIyst/d1zV9srs3vQFDwIE6j+0GfnfJz3uBdc1ec3h42ItidnY23wB27XKvVNzNqv/u2hW/X6XiXi3InHE7uWxZ7HavVLr3HjqQ+/HvkOLPT5Fjd4+PH9jnCfJ2Gt0yLwOXLvn5kmibNJP0AtlJ+8rr1MNtYSH+9+ssUZHSSiO5PwB8POqaWQ8cd/dXUnjdckuatFvpK69TD58fHIyPIesOF60GKZKbJK2Q3wT+AbjczF4ys1vM7FYzuzXaZQ/wIvAC8LfAbZlFWyZJk3arfeUxk60vbt3a/Q4XrQYpkqumF+tw9z9t8rgDn0wtol7RypmmHV5g5OimTVzxvvdVPzgOH64+d2Ii2w6XRh9e6qwRyZzOUM1Lt8807fZZoloNUiRXSu55SZq0i9pXrjNZRXKl5J6XVpJ2Eddm0ZmsIrnqrQtkh6aTi2uHbulZs92q84vIKUrukp0yf3iJBE5lmV6lHnSRUtPIvRct9qAvtiou9qCDRtoiJaGRey/K8vqqIhIEJfdepB50kdJTcu9F6kEXKT0l916kHnSR0lNy70VFPetVRBJTt0yvUg+6SKlp5C4iUkJK7iIiJaTkLiJSQkruIiIlpOQuIlJCSu4iIiWk5C4iUkJK7iIiJaTkLiJSQubu+fxis58Bh3L55a27AHg17yA6oPjzpfjzU+TYIT7+irtf2OyJuSX3IjGzfe6+Lu842qX486X481Pk2KGz+FWWEREpISV3EZESUnJPZnveAXRI8edL8eenyLFDB/Gr5i4iUkIauYuIlJCSu4hICSm5R8zs62Z21MwO1HnczOwuM3vBzJ4ys6u7HWMjCeLfYGbHzeyJ6HZHt2Osx8wuNbNZM/uRmT1jZp+O2SfY458w/pCP/9vN7B/N7Mko/i/E7NNvZt+Kjv/jZjaUQ6ixEsZ/s5n9bMnx35pHrI2YWZ+Z/T8z2x3zWOvH3911q847/FvgauBAncevA74HGLAeeDzvmFuMfwOwO+8468R2EXB1dP884HngiqIc/4Txh3z8DRiI7i8HHgfW1+xzG/C16P5m4Ft5x91i/DcDd+cda5P38efA/4r7O2nn+GvkHnH3R4DXGuxyA/ANr3oMON/MLupOdM0liD9Y7v6Ku/8wuv9L4Fng4prdgj3+CeMPVnRM56Ifl0e32k6LG4Cd0f37gWvMzLoUYkMJ4w+amV0C/AGwo84uLR9/JffkLgZ+uuTnlyjQ/8CR34m+un7PzH4r72DiRF83P0h19LVUIY5/g/gh4OMflQSeAI4C33f3usff3d8CjgOruhpkAwniB/h3UUnvfjO7tLsRNvUV4L8AC3Ueb/n4K7n3jh9SXZPiA8D/AP4+33DOZmYDwLeB29399bzjaVWT+IM+/u5+0t2vAi4BPmRmV+YcUksSxP8gMOTu7we+z+lRcO7M7KPAUXffn+brKrkn9zKw9NP+kmhbIbj764tfXd19D7DczC7IOaxTzGw51cQ45e7fidkl6OPfLP7Qj/8id/8FMAtcW/PQqeNvZucAK4FjXQ0ugXrxu/sxd5+PftwBDHc5tEY+DFxvZgeBaWCjme2q2afl46/kntwDwMejro31wHF3fyXvoJIys3cv1ujM7ENU/9sH8T9nFNe9wLPu/qU6uwV7/JPEH/jxv9DMzo/unwt8BPhxzW4PAFui+zcCD3k0u5e3JPHXzM9cT3VeJAju/ll3v8Tdh6hOlj7k7n9Ws1vLx/+c1CMtKDP7JtWOhgvM7CXg81QnZnD3rwF7qHZsvACcAP59PpHGSxD/jcAnzOwt4A1gcyj/c1IdudwEPB3VTQE+B6yBQhz/JPGHfPwvAnaaWR/VD53/7e67zexOYJ+7P0D1w+vvzOwFqhP3m/ML9yxJ4v+UmV0PvEU1/ptzizahTo+/lh8QESkhlWVEREpIyV1EpISU3EVESkjJXUSkhJTcRURKSMldRKSElNxFREro/wPnsG2DK2GxMgAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"points1, mean1 = gen_points(mean,sd,nb,dim,clusters)\n",
"Pc1, index1, clusters1 = kmeans(points1,K=K)\n",
"visualisation(clusters1, index1, Pc1, K=K)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Avec un K ne correspondant pas au nombre réel de groupes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"K = clusters+1\n",
"points, mean = gen_points(mean,sd,nb,dim,clusters)\n",
"Pc, index, clusters_ex = kmeans(points,K=K)\n",
"visualisation(clusters_ex, index, Pc, K=K)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Et un exemple un peu plus complexe avec des intensités différentes et relativement proche"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clusters = 5\n",
"dim = 2\n",
"nb = 50\n",
"K = clusters\n",
"mean = np.random.randint(5, size=clusters)\n",
"mean = mean.T * np.random.random(size=clusters)\n",
"sd = np.random.random(size=clusters)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"points, mean = gen_points(mean,sd,nb,dim,clusters)\n",
"Pc, index, clusters_ex = kmeans(points,K=K)\n",
"visualisation(clusters_ex, index, Pc, K=K)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exemple de clusterisation sur une image\n",
"On souhaite pouvoir changer les pixels vers les le centre du cluster le plus proche.\n",
"\n",
"On observe ainsi pour un nombre différent de clusters :"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"path_image = \"images/fruits.jpg\"\n",
"for i in range(1,13):\n",
" Pc, index, img_seg = kmeans_image(path_image=path_image, K=i)\n",
"plt.figure()\n",
"plt.imshow(io.imread(\"images/fruits_2.jpg\"))\n",
"plt.figure()\n",
"plt.imshow(io.imread(\"images/fruits_4.jpg\"))\n",
"plt.figure()\n",
"plt.imshow(io.imread(\"images/fruits_10.jpg\"))\n",
"plt.figure()\n",
"plt.imshow(io.imread(\"images/fruits_255_cuda.jpg\"))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"La clusterisation avec 255 couleurs à été réalisée à l'aide du script `Kmeans_skcuda.py` afin d'accélérer le temps d'exécution. \n",
"\n",
"Nous avons vu en A4 la puissance de CUDA pour le traitement de données importantes, ce qui est notre cas avec le nombre de pixels de l'image et le nombre d'itérations qui augmentent en fonction du nombre de clusters recherchés. J'ai ainsi décidé - pour voir à partir de combien de niveaux de couleurs peut-on apercevoir une image nette - d'observer le traitement Kmeans jusqu'à 255 clusters.\n",
"\n",
"On observe ainsi qu'au dessus de 40 clusters on arrive à bien distinguer les fruits et légumes présents sur l'image."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cpu = []\n",
"cuda = []\n",
"i = 0\n",
"with open(\"timing.txt\",'r') as data_file:\n",
" for line in data_file:\n",
" data = line.split()\n",
" if(len(data) < 4):\n",
" i=i+1\n",
" if(len(data) > 3):\n",
" if(i == 1):\n",
" cpu.append(float(data[2]))\n",
" if(i == 2):\n",
" cuda.append(float(data[2]))\n",
"\n",
"fig, ax1 = plt.subplots()\n",
"ax1.set_xlabel(\"K\")\n",
"ax1.set_ylabel(\"temps (s)\")\n",
"ax1.set_yscale('log')\n",
"ax1.plot(cpu,'b')\n",
"ax1.tick_params(axis ='y', labelcolor = 'blue') \n",
"plt.legend(['CPU'],loc='lower left')\n",
"\n",
"ax2 = ax1.twinx()\n",
"ax2.set_ylabel(\"temps (s)\")\n",
"ax2.plot(cuda,'r')\n",
"ax2.tick_params(axis ='y', labelcolor = 'red') \n",
"\n",
"plt.legend(['CUDA'],loc='lower right')\n",
"plt.title(\"Temps d'exécution Kmeans image\")\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "2ef431f6525756fa8a44688585fa332ef3b2e5fcfe8fe75df35bbf7028a8b511"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}