M2_SETI/D3/TP/TP_SETI_Kmeans/TP1.ipynb

302 lines
34 KiB
Text
Raw Normal View History

2022-11-28 11:40:47 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TP1 KMEANS\n",
"\n",
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
]
},
{
"cell_type": "code",
"execution_count": 605,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import scipy.spatial\n",
"from skimage import io"
]
},
{
"cell_type": "code",
"execution_count": 606,
"metadata": {},
"outputs": [],
"source": [
"# mean = [1,2,3,4]\n",
"# sd = [0.25, 0.25, 0.1, 0.2]\n",
"clusters = 5\n",
"dim = 2\n",
"nb = 50\n",
"K= clusters\n",
"\n",
"path_image = \"fruits.jpg\"\n",
"# print(mean)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fonctions à utiliser pour le clustering"
]
},
{
"cell_type": "code",
"execution_count": 607,
"metadata": {},
"outputs": [],
"source": [
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
" size = []\n",
" mean = np.random.randint(5, size=clusters)\n",
" mean = mean.T * np.random.random(size=clusters)\n",
" sd = np.random.random(size=clusters)\n",
" # for i in range(0,dim):\n",
" size.append(nb)\n",
" size.append(dim)\n",
" points = np.random.normal(mean[0],sd[0],size=size)\n",
" for i in range(1,clusters):\n",
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
" \n",
" return points"
]
},
{
"cell_type": "code",
"execution_count": 608,
"metadata": {},
"outputs": [],
"source": [
"def distance(points,Pc): \n",
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
]
},
{
"cell_type": "code",
"execution_count": 609,
"metadata": {},
"outputs": [],
"source": [
"def kmeans(points = [0,0], K = 1):\n",
" # Initialisation K prototypes\n",
" dim = points.shape[1]\n",
" N = points.shape[0]\n",
" iter = 0\n",
" eps = 0.1\n",
" Pc_index = []\n",
" Pc_save = np.zeros([K,dim])\n",
" clusters = []\n",
"\n",
" for i in range(0,K):\n",
" Pc_index.append(np.random.randint(0,N))\n",
" Pc = points[Pc_index,:]\n",
"\n",
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 5):\n",
" iter += 1\n",
" Pc_save = Pc\n",
" # print(Pc)\n",
" # print(points[:,:Pc.shape[0]])\n",
" dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n",
" clust = np.argmin(dist, axis=1)\n",
" clust = np.expand_dims(clust, axis=0)\n",
" points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n",
" # print(points)\n",
" Pc = np.zeros([K,dim])\n",
" index = np.array([])\n",
"\n",
" for n in range(0,N):\n",
" for k in range(0,K):\n",
" index = np.append(index, (clust==k).sum())\n",
" if points[n,-1] == k:\n",
" # print(points)\n",
" # print(Pc)\n",
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
"\n",
" for k in range(0,K):\n",
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
"\n",
" # print(Pc)\n",
" index = points[:,-1]\n",
" points = points[:,:-1]\n",
" return Pc, index, points\n"
]
},
{
"cell_type": "code",
"execution_count": 610,
"metadata": {},
"outputs": [],
"source": [
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
"def visualisation(points, index, Pc=[0,0], K=1):\n",
" if(points.shape[1]==2):\n",
" # for k in range(0,K):\n",
" for n in range(0,len(points)):\n",
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n",
" plt.plot(Pc[:,0],Pc[:,1],'r+')\n",
" plt.grid(True)\n",
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
]
},
{
"cell_type": "code",
"execution_count": 611,
"metadata": {},
"outputs": [],
"source": [
"def img_2_mat(my_img):\n",
" mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n",
" return mat"
]
},
{
"cell_type": "code",
"execution_count": 612,
"metadata": {},
"outputs": [],
"source": [
"def mat_2_img(mat,my_img):\n",
" img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n",
" return img_seg"
]
},
{
"cell_type": "code",
"execution_count": 613,
"metadata": {},
"outputs": [],
"source": [
"def kmeans_image(path_image, K):\n",
" my_img = io.imread(path_image)\n",
" imgplot = plt.imshow(my_img)\n",
" Mat = img_2_mat(my_img)\n",
" \n",
" Pc, index, clusters = kmeans(Mat, K)\n",
"\n",
" for k in range(K):\n",
" Mat[k,:] = Pc[index[k],:]\n",
"\n",
" img_seg = mat_2_img(Mat, my_img)\n",
"\n",
" io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n",
" imgplot = plt.imshow(img_seg)\n",
" return Pc, index, img_seg\n"
]
},
{
"cell_type": "code",
"execution_count": 614,
"metadata": {},
"outputs": [],
"source": [
"points = gen_points(nb,dim,clusters)\n",
"# print(points.shape)\n",
"# print(points.mean(axis=0))\n",
"# print(points)"
]
},
{
"cell_type": "code",
"execution_count": 615,
"metadata": {},
"outputs": [],
"source": [
"dist = distance(points,points)\n",
"# print(dist)"
]
},
{
"cell_type": "code",
"execution_count": 616,
"metadata": {},
"outputs": [],
"source": [
"Pc, index, clusters = kmeans(points,K=K)\n",
"# print(index)\n",
"# print(clusters)\n"
]
},
{
"cell_type": "code",
"execution_count": 617,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAGdCAYAAAAvwBgXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOoElEQVR4nO3df3gU5b03/vdkE35pFkuIGrOJqT+qWKkt/ipqJGkFkefwpK4R0fY51LbWVrRE7OnR+nwLnKOP1h+Y1GNbj/2h5zoFxLjUpz0azfFKIFZsKz70ohq9qiUGYrQQLBsBIezO94/JJJvdmZ17Zuf3vl/XlStkdnbn3p1l57P3/bk/tyTLsgwiIiIiD5R43QAiIiIqXgxEiIiIyDMMRIiIiMgzDESIiIjIMwxEiIiIyDMMRIiIiMgzDESIiIjIMwxEiIiIyDOlXjcgn3Q6jffeew/l5eWQJMnr5hAREZEAWZYxPDyMk046CSUl+fs8fB2IvPfee6ipqfG6GURERGTBrl27EIvF8u7j60CkvLwcALBz507MmDHD49aQlpGREbzwwgtYsGABysrKvG4OaeA5CgaeJzulAJwN4L08+1QD2AEgIvyoPEfikskkampqxq7j+fg6EFGHY8rLyxGNRj1uDWkZGRnBtGnTEI1G+R/Tp3iOgoHnyU7dyB+EAMAAgD8BaBB+VJ4j80TSKpisSkREITNo837kJAYiREQUMlU270dOYiBCREQhUw8gBkBvWEACUDO6H3mNgQgREYVMBEDb6L+zgxH171aYSVQl5zAQISKiEIoDaIcyOyZTbHR73PUWkTZfz5ohIiKyLg6gCUAPlMTUKijDMewJ8RMGIkREFGIRmJmiS+7j0AwRERF5hoEIEREReYZDM0RUxFJg/gCRtxiIEFGRSgBYAWB3xrYYlGmfnFFB5BYOzRBREUoAaMbEIARQ1h9pHr2diNzAQISIikwKSk+IrHGbuq1ldD8ichoDESIqMj3I7QnJJAPYNbofETmNgQgRFRmuzErkJwxEiKjIcGVWIj/hrBkiKjLqyqwD0M4TkUZv58qslA+nftuFPSJEVGS4MisVRpI2AagD0AjgutHfdeBsK2sYiBBREeLKrGRNVdVWRCJLwanf9mEgQkRFKg6gD0AXgHWjv98GMAPAegDd4BRemiiF2bN/Bk79thdzRIioiGWuzJoAcCpYaZX0SNJLmDp1KM8emVO/G1xpUxiwR4SIiJVWSQinfjuBgQgRFTlWWnVOCsoQV1iGujj12wkMRIioyLHSqjMSCNvMElm+BIcOVUCWs2dbqSQANeDUb3MYiBBRkWN3u/3COtQVwY4d3xj9N6d+24WBCBEVOXa32yvcQ12Dg3ORSm0Ap37bh7NmiKjIsdKqvcwMdTW40SDbyfKVAK4CK6vag4EIERU5tdJqM5SgIzMYYXe7ecUy1JU59ZsKwaEZIiJWWrURh7rIHPaIEBEBUIKNJrC7vVAc6iJzGIgQEY1hd3vhONTlX/5cMZhDM0REZDMOdfmPf+u6sEeEiIgcwKEu/1DrumQPlal1XbwNDhmIEBGRQzjU5T2jui4SlLouTfAqSOTQDBERUWj5fwkDBiJERESh5f+6LgxEiIiIQsv/dV2YI0JERD7nz2mnweD/ui7sESEiIhukAHQDWD/6265F7fw77TQY1LougF9XDGYgQkREBXIqWFCnnWYnW6rTThmMiPF3XRcOzRARUQGcqlFxBMCNGo8LeDftNIWKih2QpCSAGgRriMi/dV0YiBARkUVO1ahIAPgWgL159smcdtpg4rGtSqC09Du45JKBjG0xKMMeQakU68+6LgxEiIjIhMzE0Q8gXqOiQfDx9XpY9Lgx7dTflUmDjoEIEREJSkDpAckXfGgRDRby9bDoKXTaqdGMnPE2Sdm5nj6pTBp0TFYlIiIBeomjIkSDBaMqoJkkjOdpWCWSZOv/yqRBx0CEiIgMWOmpAMwHC2aHWVphvRdCdEaO/yuTBh0DESIiMmCmp0JlpUaFaM9JJQrLyzBKsgWU4ZaUiTZ5V5k06JgjQkREBqx8249BCULUYEGkOqpRFVBACUJ2A5ikc7vIccwMt/i/MmnQsUeEiIgMiH7bfwjAOgBdAHZiPAgRLXhmVAVUAvBT6AchoscxM9wy3iZZ9mdl0qBzNBD5yU9+gs985jOIRqOIRqOYO3cunnvuOScPSUREtlN7BXKmjYxSc0FuAXAtlKm66oXZbHVUq1VAzRzH7HCL2qaTTLaJRDgaiMRiMdx7773Ytm0bXn31VXzhC19AU1MTXn/9dScPS0REluitF2N1vRIzuRiZ4gD6oPSsaPWwaLXbzHFEA6vM4ZY4jh59Gy+99K84evQ/BNpEohwNRBYvXoxFixbh9NNPx6c+9SncfffdOPbYY/HKK684eVgiIjLNaFjDSk9FIVNf1Sqg2T0sWswex2pgFcHQ0GzI8lKBNtnFqcUE/cO1ZNVUKoWnnnoKBw4cwNy5czX3OXz4MA4fPjz2dzKZBACMjIxgZGTElXaSOep54fnxL56jYPDyPEnSJkQiS5FdtEuWlWGNVGoDZPlKAIsBLIIkvQQ1GVSWL4FyQc5ttyTtQqnAVebo0V2QZevPW/Q4qdQLSKc/D6W9iyFJ6xCJ3AJJGi8lL8vVSKUehCwvRvZzcvscKedlJSRpvKy80r61o+fDv8y8RpIsy2YnhpuyY8cOzJ07Fx9//DGOPfZYrFu3DosWLdLcd/Xq1VizZk3O9nXr1mHatGlONpOIqEilsGDBNzFlypBG5VBAloFDh2ais/NRmO0BqKjYgUsu+f8M93vppX/F0NBszbZVVLyBKVM+xMcffwJDQ2dptkH0OABw6FAFduz4BgBg9uyfYerUobHbDh+O4k9/uhGDgxcLPZbZdppRVbUV55//QwDICg6V33/84z9jcFD7S70fHDx4ENdddx3279+PaDSad1/HA5EjR46gv78f+/fvR3t7O372s59h8+bNOOuss3L21eoRqampweDgICoqKpxsJlk0MjKCzs5OzJ8/H2VlZV43hzTwHAWDV+dJkjajtHS+4X5Hj3ZClueZfPQUSktPA/AeJCn3UqPMQqnG0aN/QfaF21xvQP7j5B5zfJ+JF3nlj/EeoIm0zpEzvRYplJZWA9inExzqv25+kUwmMXPmTKFAxPGhmUmTJuG0004DAJx77rn44x//iLa2Njz66KM5+06ePBmTJ0/O2V5WVsYPUJ/jOfI/nqNgcP887RHaq7R0DwCz7SoD8CMos1YmBgCANHqRbUNZ2RRMrP/xFwCrkZ18KknvobR0KXJzUvIdZ6J8gYpym4TS0u8CuAp6F/nxc5QAsNREO0XdA2CfQTt3o6zsFfhxNV0Apt7DrtcRSafTE3o9iIjIS05XDhVJcs1OlF0FazNttI5jlujaMVZnBBlJYTyR1kg4yso72iNyxx134IorrkBtbS2Gh4exbt06dHd34/nnn3fysEREJMyNyqFxKKvTalU8Vet/iGYJZAYKDTrHWQ3grgLaCxhf5M3M1Gkwcdwe5OsNmSgcZeUdDUT+9re/4R//8R8xODiI6dOn4zOf+Qyef/55zJ9vPB5JRERuUKeyag+fKFqhn4sgUlJdPU6Dxn2tLKYH6AcKEQBfROGBiNFF3qnF8ET3r0BYyso7Goj8/Oc/d/LhiYjIFuqwxgpM/JafvV5MtoTOfdry3CeTlcX0VPkCBZE1a4zsNbjdqSEt0f2/A78mqprFtWaIiAjmq5maLd2uxUqOg1bV02wiBcuMrET+/A4r1VlFGD0uoPSG3Gnycf2LgQgREY0SrWZqJVFTq0Ko2d4CM4vM5UuSza1XlcsoYdVqdVYj+R5X9VMLj+tfDESIqEiEv1R2Ycy8PmZLquuVj98D42//mcwuMqfXy3O64P2NemysLtBnxGgG0K0Q63EKBtdKvBMReafQXIawM/v6iA6pDAD4FyjTcbVuuwbAdwE8AO1EWRlK78XpyJ8Im49Wkqyd+R35ZgQVIg4lGFyicZs6/BWOlX/ZI0JEIWdHLkOYWXl9RC/kK6AdhADjQccGABuh3avwNJRciCo
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"visualisation(clusters, index, Pc, K=K)\n",
"# print(Pc)\n",
"# print(mean)"
]
},
{
"cell_type": "code",
"execution_count": 618,
"metadata": {},
"outputs": [],
"source": [
"Pc, index, img_seg = kmeans_image(path_image=path_image, K=K)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(103230,)\n"
]
}
],
"source": [
"print(index.shape)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}