M2_SETI/D3/TP/TP_SETI_Kmeans/TP1.ipynb
2022-12-08 22:12:07 +01:00

358 lines
24 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TP1 KMEANS\n",
"\n",
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import scipy.spatial\n",
"from skimage import io"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {},
"outputs": [],
"source": [
"# mean = [1,2,3,4]\n",
"# sd = [0.25, 0.25, 0.1, 0.2]\n",
"clusters = 5\n",
"dim = 2\n",
"nb = 50\n",
"K= clusters\n",
"mean = np.random.randint(5, size=clusters)\n",
"mean = mean.T * np.random.random(size=clusters)\n",
"sd = np.random.random(size=clusters)\n",
"path_image = \"fruits.jpg\"\n",
"# print(mean)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fonctions à utiliser pour le clustering"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
" size = []\n",
" # for i in range(0,dim):\n",
" size.append(nb)\n",
" size.append(dim)\n",
" points = np.random.normal(mean[0],sd[0],size=size)\n",
" for i in range(1,clusters):\n",
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
" \n",
" return points, mean"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"def distance(points,Pc): \n",
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {},
"outputs": [],
"source": [
"def kmeans(points = [0,0], K = 1):\n",
" # Initialisation K prototypes\n",
" dim = points.shape[1]\n",
" N = points.shape[0]\n",
" iter = 0\n",
" eps = 0.1\n",
" Pc_index = []\n",
" Pc_save = np.zeros([K,dim])\n",
" clusters = []\n",
"\n",
" for i in range(0,K):\n",
" Pc_index.append(np.random.randint(0,N))\n",
" Pc = points[Pc_index,:]\n",
"\n",
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 3):\n",
" iter += 1\n",
" Pc_save = Pc\n",
" # print(Pc)\n",
" # print(points[:,:Pc.shape[0]])\n",
" dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n",
" clust = np.argmin(dist, axis=1)\n",
" clust = np.expand_dims(clust, axis=0)\n",
" points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n",
" # print(points)\n",
" Pc = np.zeros([K,dim])\n",
" index = np.array([])\n",
"\n",
" for n in range(0,N):\n",
" for k in range(0,K):\n",
" index = np.append(index, (clust==k).sum())\n",
" if points[n,-1] == k:\n",
" # print(points)\n",
" # print(Pc)\n",
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
"\n",
" for k in range(0,K):\n",
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
"\n",
" # print(Pc)\n",
" indice = points[:,-1]\n",
" points = points[:,:-1]\n",
" return Pc, indice, points\n"
]
},
{
"cell_type": "code",
"execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
"def visualisation(points, index, Pc=[0,0], K=1):\n",
" if(points.shape[1]==2):\n",
" # for k in range(0,K):\n",
" for n in range(0,len(points)):\n",
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n",
" plt.plot(Pc[:,0],Pc[:,1],'r+')\n",
" plt.grid(True)\n",
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [],
"source": [
"def img_2_mat(my_img):\n",
" mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n",
" return mat"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [],
"source": [
"def mat_2_img(mat,my_img):\n",
" img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n",
" return img_seg"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [],
"source": [
"def kmeans_image(path_image, K):\n",
" my_img = io.imread(path_image)\n",
" imgplot = plt.imshow(my_img)\n",
" Mat = img_2_mat(my_img)\n",
" \n",
" Pc, index, clusters = kmeans(Mat, K)\n",
"\n",
" for k in range(Mat.shape[0]):\n",
" Mat[k,:] = np.floor(Pc[index[k],:])\n",
"\n",
" img_seg = mat_2_img(Mat, my_img)\n",
"\n",
" io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n",
" imgplot = plt.imshow(img_seg)\n",
" return Pc, index, img_seg\n"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [],
"source": [
"points, mean = gen_points(mean,sd,nb,dim,clusters)\n",
"# print(points.shape)\n",
"# print(points.mean(axis=0))\n",
"# print(points)"
]
},
{
"cell_type": "code",
"execution_count": 198,
"metadata": {},
"outputs": [],
"source": [
"dist = distance(points,points)\n",
"# print(dist)"
]
},
{
"cell_type": "code",
"execution_count": 199,
"metadata": {},
"outputs": [],
"source": [
"Pc, index, clusters = kmeans(points,K=K)\n",
"# print(index)\n",
"# print(clusters)\n"
]
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"visualisation(clusters, index, Pc, K=K)\n",
"# print(Pc)\n",
"# print(mean)"
]
},
{
"cell_type": "code",
"execution_count": 201,
"metadata": {},
"outputs": [],
"source": [
"Pc, index, img_seg = kmeans_image(path_image=path_image, K=250)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(103230,)\n",
"[[252.69560225 251.67998184 249.86355505]\n",
" [ 69.54698482 50.76095445 17.63930586]\n",
" [228.4210873 174.61140875 69.5397857 ]\n",
" [154.87751731 83.70097198 45.64258942]\n",
" [ nan nan nan]]\n",
"[[[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" ...\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]\n",
"\n",
" [[252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]\n",
" ...\n",
" [252 251 249]\n",
" [252 251 249]\n",
" [252 251 249]]]\n"
]
}
],
"source": [
"print(index.shape)\n",
"print(Pc)\n",
"print(img_seg)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "2ef431f6525756fa8a44688585fa332ef3b2e5fcfe8fe75df35bbf7028a8b511"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}