M2_SETI/D3/TP/TP_SETI_Kmeans/TP1.ipynb

359 lines
24 KiB
Text
Raw Normal View History

2022-11-28 11:40:47 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TP1 KMEANS\n",
"\n",
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 188,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import scipy.spatial\n",
"from skimage import io"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 189,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"# mean = [1,2,3,4]\n",
"# sd = [0.25, 0.25, 0.1, 0.2]\n",
"clusters = 5\n",
"dim = 2\n",
2022-12-08 22:12:07 +01:00
"nb = 50\n",
2022-11-28 11:40:47 +01:00
"K= clusters\n",
2022-11-29 12:15:07 +01:00
"mean = np.random.randint(5, size=clusters)\n",
"mean = mean.T * np.random.random(size=clusters)\n",
2022-12-08 22:12:07 +01:00
"sd = np.random.random(size=clusters)\n",
2022-11-28 11:40:47 +01:00
"path_image = \"fruits.jpg\"\n",
"# print(mean)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fonctions à utiliser pour le clustering"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 190,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
" size = []\n",
" # for i in range(0,dim):\n",
" size.append(nb)\n",
" size.append(dim)\n",
" points = np.random.normal(mean[0],sd[0],size=size)\n",
" for i in range(1,clusters):\n",
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
" \n",
2022-11-29 12:15:07 +01:00
" return points, mean"
2022-11-28 11:40:47 +01:00
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 191,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def distance(points,Pc): \n",
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 192,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def kmeans(points = [0,0], K = 1):\n",
" # Initialisation K prototypes\n",
" dim = points.shape[1]\n",
" N = points.shape[0]\n",
" iter = 0\n",
" eps = 0.1\n",
" Pc_index = []\n",
" Pc_save = np.zeros([K,dim])\n",
" clusters = []\n",
"\n",
" for i in range(0,K):\n",
" Pc_index.append(np.random.randint(0,N))\n",
" Pc = points[Pc_index,:]\n",
"\n",
2022-12-05 18:14:46 +01:00
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 3):\n",
2022-11-28 11:40:47 +01:00
" iter += 1\n",
" Pc_save = Pc\n",
" # print(Pc)\n",
" # print(points[:,:Pc.shape[0]])\n",
" dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n",
" clust = np.argmin(dist, axis=1)\n",
" clust = np.expand_dims(clust, axis=0)\n",
" points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n",
" # print(points)\n",
" Pc = np.zeros([K,dim])\n",
" index = np.array([])\n",
"\n",
" for n in range(0,N):\n",
" for k in range(0,K):\n",
" index = np.append(index, (clust==k).sum())\n",
" if points[n,-1] == k:\n",
" # print(points)\n",
" # print(Pc)\n",
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
"\n",
" for k in range(0,K):\n",
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
"\n",
" # print(Pc)\n",
2022-11-29 12:15:07 +01:00
" indice = points[:,-1]\n",
2022-11-28 11:40:47 +01:00
" points = points[:,:-1]\n",
2022-11-29 12:15:07 +01:00
" return Pc, indice, points\n"
2022-11-28 11:40:47 +01:00
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 193,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
"def visualisation(points, index, Pc=[0,0], K=1):\n",
" if(points.shape[1]==2):\n",
" # for k in range(0,K):\n",
" for n in range(0,len(points)):\n",
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n",
" plt.plot(Pc[:,0],Pc[:,1],'r+')\n",
" plt.grid(True)\n",
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 194,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def img_2_mat(my_img):\n",
" mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n",
" return mat"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 195,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def mat_2_img(mat,my_img):\n",
" img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n",
" return img_seg"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 196,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"def kmeans_image(path_image, K):\n",
" my_img = io.imread(path_image)\n",
" imgplot = plt.imshow(my_img)\n",
" Mat = img_2_mat(my_img)\n",
" \n",
" Pc, index, clusters = kmeans(Mat, K)\n",
"\n",
2022-11-29 12:15:07 +01:00
" for k in range(Mat.shape[0]):\n",
" Mat[k,:] = np.floor(Pc[index[k],:])\n",
2022-11-28 11:40:47 +01:00
"\n",
" img_seg = mat_2_img(Mat, my_img)\n",
"\n",
" io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n",
" imgplot = plt.imshow(img_seg)\n",
" return Pc, index, img_seg\n"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 197,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
2022-11-29 12:15:07 +01:00
"points, mean = gen_points(mean,sd,nb,dim,clusters)\n",
2022-11-28 11:40:47 +01:00
"# print(points.shape)\n",
"# print(points.mean(axis=0))\n",
"# print(points)"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 198,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"dist = distance(points,points)\n",
"# print(dist)"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 199,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"Pc, index, clusters = kmeans(points,K=K)\n",
"# print(index)\n",
"# print(clusters)\n"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 200,
2022-11-28 11:40:47 +01:00
"metadata": {},
"outputs": [
{
"data": {
2022-12-08 22:12:07 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtwklEQVR4nO2de2wd133nvz/SJEWaDg3RIteIw8t4Ywk1wjSFjawWAVbPArZibxGjBWRcCUpSgRurLqSFhWwDAhUF9GaxhVBIwMYOVKWwIBEVBNRtaltFEOoVLFBtY7WOadeRYyikNkUhQhQimxZNSeTZP+YOOZx7zjzPmZkz9/cBLsg7d2bOGT6+85vf65AQAgzDMIy9tOQ9AYZhGCYdLOQMwzCWw0LOMAxjOSzkDMMwlsNCzjAMYzn35THogw8+KL7whS/kMXSmfPLJJ7j//vvznoZxmuU6gea5Vr7OYnL58uUbQog1/u25CHl/fz/eeuutPIbOlAsXLmDjxo15T8M4zXKdQPNcK19nMSGiKdl2dq0wDJOasYkxDB4eRMvBFgweHsTYxFjeU2oqWMgZhknF2MQYhl8fxtStKQgITN2awvDrw7mKebPdWFjIGYZJxcjZEdy+e3vFttt3b2Pk7Egu8ynijcU0LOQMw6Ti2q1rsbabpmg3lixgIWcYJhUDPQOh27N0dRTtxpIFLOQMw6SitqWGrrauFdu62rpQ21IDkL2rI8qNpWywkDMMk4rqUBVHnz2KSk8FBEKlp4Kjzx5FdagKIHtXR9iNpYzkkkfOMEy5qA5Vl4TbT9auDu8N5NqtaxjoGUBtS005vzLAQs4wjFEGegYwdauxjsWkqyPoxlJG2LXCMIxRmtHVkTUs5AzDGCXMh86kh10rDMMYJ6mrY2xirKl83UlhIWcYppC4aYtuxoubtgiAxdwHu1YYhikkzVihmRQWcoZhCkkzVmgmhYWcYZhC0owVmklhIWcYppDknbZoUytcFnKGYRJhWujyTFu0rRUuZ60wDBObrDJK0lRopkldDAq0FjFjhi1yhmFiU/SMkj1v7sHO13YmtqhtC7SykDMME5siC93YxBh+8NYPICBWbI9zo7Et0MpCzjBMIDJfuA6h0+Vj959n7z/sbRBxl6g3mrwDrXFhIWcYRokq6LftsW2phE5XMFF2npm5GeX+AiLSTcO2/jDahJyIWonoX4joDV3nZBgmX1S+8DO/PJNK6HT52GXnCSPqTaM6VMXkvkksHljE5L5J7SKuM+tHZ9bKXgDvA/iMxnMyDJMjQb7wNBklunzsSX3y7k3j1S+/muj4tOjO+tFikRPRIwC+BuCYjvMxDFMMTAX9dJ1XtX9vZ+/S04KKPAOzurN+dLlWDgP4DoBFTedjGKYAmAr6JTmvzBWhOs+Rp48suUUqPRXp+fLMQNGd9UNCyKO7kU9A9AyAbUKIPUS0EcB+IcQzkv2GAQwDwJo1a544ffp0qnFtYHZ2Ft3d3XlPwzjNcp1A81yr9zrHr4/j2K+OYXp+Gn0dfdj9+d3Y2r819Rj+865fvR6Xbl6SjjN+fRyHPjiE+cX5peM7Wjqwf+1+AAicX9Cx6+9fn8vvc/ul7bg+f71he39HP06tP6U8btOmTZeFEE/6t+sQ8v8JYCeAewBWwfGRvyaE2KE6Zt26deLKlSupxrWBCxcuYOPGjXlPwzjNcp1A81xr1tfp9xkDjmXtBlAHDw9K1/2s9FQwuW8y0vllVZ55/T7DrlcFEUmFPHWwUwjxXQDfrQ+yEY5FrhRxhmEYP2El8XFdEXve3IOjl49iQSyglVox/MRwJMHPClesda1+xL1WGIbJHZUgu1b4QM+A1CKX+bn3vLkHr7z1ytL7BbGAV956BR/MfIAPb36Ia7euYXXnagDAzbmbGHg7nyXk0mT9+NFaECSEuCDzjzMMU1505EOrAo8ECgxqyoKjRy8flZ7r7K/OLhUOzczNYGZuJnIxUtFb2nJlJ8MwiZFVVu54bQce+vOHpGKnEsTalpo0VVBALLlXohYgLYiF2NcRlPpnQ0tbFnKGaWLSWpqqysqZuZkGsQsSxOpQNbQ/StRKy1ZqjXUN/nH8FL3TI8BCzjQJE2MTODx4GAdbDuLw4GFMjE3kPaXc0WFpBuU9+8UuTBBV+d5R+6O4DD8xHGk/P67f3E+ROz26sJAzpWdibAKvD7+OW1O3AAHcmrqF14dfjy3mE2MTuLT9UmluBjoszbCiGq/YhQmizA/uEucm8/LXXsYLT76wZJkntdBdbGhpy0LOlJ6zI2dx9/bdFdvu3r6LsyNnI5/DvRnMX59PdTMoEjoszSDxBVaKXZggev3gMuLcZF7+2su496f3IA4I3PvTe8pzerk5d1O63YaWtizkTOm5de1WrO0ydNwMioYOS9MV397O3obP/GIXJojeoh0Vys9GRwPnGXbDAdTXbUNLWxZypvT0DPTE2i5Dx82gaOiyNKtDVdz4zg2cfO5koNgFCaLfX69CeZM5eDB0jkHWPgBse2xb4PEmW9qmhYWcKT1balvQ1tW2YltbVxu21LZEPoeOm0HR0G1pphG7KH3Fo95kVJk47vxUYn7ml2ciz7docGUnU1omxiZwduQsbl27hc7Vnbiv8z7M3ZxDz0APttS2YKg6FPlcW2pb8Prw6yvcK3FvBkVEZ3VhGEE9uIPcKQSSl7CPjq60xMnJQ5/cfB+m/su9hjHcY23IQokLCzlTStzgpCu8czNzaOtqw3Mnnosl4C7uMW++9Cbmp+cT3QzKgqoBVRhBWTKqEvzAplijo8u+cSJACGlzLW+QdOTsiNJ100ItaDnYsnRN7v46eqGYhoWcKSVBwcmk4jtUHcLMZ2eaovuhijQr2wRZwieeO4Fv/t03cXfR88TT0tbgSgm7iQT1bPF3G/TjVoRO3ZrCt370LQghluaTdgUf07CPnCklZQxOFoE0uedhWTJEK0v0/e8DC5gOHAgco5ValSIuyzO/s3BnxU0FKF41pxcWcqaUlDE4WQTS+JeDsmRGzo7gzsKdFZ/dWbgTvTK07mJRjaHqv0KgWL1ZiupHZyFnSomOTJWyoLNzX5rc86AsmSg3iCj7qMZQZar0dfTFqvwsUjWnF/aRM6XE9YO7WSvNGpzUvVp7bUtNurJN1NxzVZaMKtjp7X8StSe5agzZvHd/fjdqv4g296JVc3phi5wpLUPVIeyb3IcDiwewb3IfhqpDTdc8S3fnPlNVjrUtNbS1tDVs//jOxyta3SYtYFLNe2v/VqW13t3eHfs68+pbzhY50zT4UxLdfikASmupm8iZNpF7Xh2qYu8/7MXM3MyK7a6f3Dtm0pRA2bwvXLiA2pYavvWjbzX46Ofvzcc6v+6nnziwRc40DWXslxKGDZ37XFRNq/w+cLd61A2SprV+q0NVPND+QMP2u4t3Yz25qJ5+vv3Gt41b6SzkTNPQjCmJNnTuc1HdXFqopUH8dK3aM359HIOHBxueBFziPLmo9p29M2t8dSEWcqZpaNaUxM77Ope+7+3sTe3TNuUHVnUoXBALK8RvbGIMu/52V2rf/9jEGA59cEgaQHWJ8+QSdV8T+egs5EzT0Gwpia7V6rU25+7NaTmnCQvTDUjK0gFv372NHa/tQPf3uvHNv/umMvc7jgU9cnYE84vzys/jPrnE2Vd3PjoLOdM0DFWH8OzRZ9FT6QEI6Kn04Nmjz5Y20GlirUnT61dWh6pYFIvKzz+5+0lDxaWXOBZ0mJh6n2SiUB2qSvuyy9Ado+CsFaapGKoOlVa4/ZjIWMmic6AqXzyMKBa0t1dLC7UEVnW6C0gD0bNOjjx9JLSnS3tru/YYBVvkDFNSTGSsZJEFE2U1Hz+t1Brq+/e7haKU5sd92oiygMUD7Q9oT0dkIWeYkmIiYyWLLJggX7mMrrYuHP/68VBxVC1e0UqtIJDkCIe4TwduiqTqnKo0yzSwkDNMSTFRhZnV+pXVoSqOf/042lvbGz5rpVb0dvbGHl/l/lkUi1g8sKi0ogmUKJibZQ4/+8gZpsSYqsLMoie3O4a34rO3sxdHnj6SaPywXi21LTXsfG1nw8ITAmKpujQ
2022-11-28 11:40:47 +01:00
"text/plain": [
2022-12-08 22:12:07 +01:00
"<Figure size 432x288 with 1 Axes>"
2022-11-28 11:40:47 +01:00
]
},
2022-12-08 22:12:07 +01:00
"metadata": {
"needs_background": "light"
},
2022-11-28 11:40:47 +01:00
"output_type": "display_data"
}
],
"source": [
"visualisation(clusters, index, Pc, K=K)\n",
"# print(Pc)\n",
"# print(mean)"
]
},
{
"cell_type": "code",
2022-12-08 22:12:07 +01:00
"execution_count": 201,
2022-11-28 11:40:47 +01:00
"metadata": {},
2022-12-05 09:01:01 +01:00
"outputs": [],
2022-11-28 11:40:47 +01:00
"source": [
2022-12-08 22:12:07 +01:00
"Pc, index, img_seg = kmeans_image(path_image=path_image, K=250)"
2022-11-28 11:40:47 +01:00
]
},
{
"cell_type": "code",
2022-12-05 18:14:46 +01:00
"execution_count": null,
2022-11-28 11:40:47 +01:00
"metadata": {},
2022-12-08 22:12:07 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(103230,)\n",
"[[252.69560225 251.67998184 249.86355505]\n",
" [ 69.54698482 50.76095445 17.63930586]\n",
" [228.4210873 174.61140875 69.5397857 ]\n",
" [154.87751731 83.70097198 45.64258942]\n",
" [ nan nan nan]]\n",
"[[[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" ...\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]]\n"
]
}
],
2022-11-28 11:40:47 +01:00
"source": [
2022-12-08 22:12:07 +01:00
"print(index.shape)\n",
"print(Pc)\n",
"print(img_seg)"
2022-11-28 11:40:47 +01:00
]
}
],
"metadata": {
"kernelspec": {
2022-12-08 22:12:07 +01:00
"display_name": "Python 3.9.4 64-bit",
2022-11-28 11:40:47 +01:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2022-12-08 22:12:07 +01:00
"version": "3.9.4"
2022-11-28 11:40:47 +01:00
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
2022-12-08 22:12:07 +01:00
"hash": "2ef431f6525756fa8a44688585fa332ef3b2e5fcfe8fe75df35bbf7028a8b511"
2022-11-28 11:40:47 +01:00
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}