301 lines
34 KiB
Text
301 lines
34 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# TP1 KMEANS\n",
|
|
"\n",
|
|
"On nous propose de coder l'algorithme des kmeans afin de faire du clustering sur 2 classes puis plus de 2 classes.\n",
|
|
"Plus tard, on utilisera notre algorithme pour segmenter une image sur l'information de couleur."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 605,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"import scipy.spatial\n",
|
|
"from skimage import io"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 606,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# mean = [1,2,3,4]\n",
|
|
"# sd = [0.25, 0.25, 0.1, 0.2]\n",
|
|
"clusters = 5\n",
|
|
"dim = 2\n",
|
|
"nb = 50\n",
|
|
"K= clusters\n",
|
|
"\n",
|
|
"path_image = \"fruits.jpg\"\n",
|
|
"# print(mean)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Fonctions à utiliser pour le clustering"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 607,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gen_points(mean=1,sd=0.5, nb=100, dim=2, clusters=2):\n",
|
|
" size = []\n",
|
|
" mean = np.random.randint(5, size=clusters)\n",
|
|
" mean = mean.T * np.random.random(size=clusters)\n",
|
|
" sd = np.random.random(size=clusters)\n",
|
|
" # for i in range(0,dim):\n",
|
|
" size.append(nb)\n",
|
|
" size.append(dim)\n",
|
|
" points = np.random.normal(mean[0],sd[0],size=size)\n",
|
|
" for i in range(1,clusters):\n",
|
|
" points = np.concatenate((points,np.random.normal(mean[i],sd[i],size=size)),axis=0)\n",
|
|
" \n",
|
|
" return points"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 608,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def distance(points,Pc): \n",
|
|
" return scipy.spatial.distance.cdist(points[:,:], Pc[:,:])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 609,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def kmeans(points = [0,0], K = 1):\n",
|
|
" # Initialisation K prototypes\n",
|
|
" dim = points.shape[1]\n",
|
|
" N = points.shape[0]\n",
|
|
" iter = 0\n",
|
|
" eps = 0.1\n",
|
|
" Pc_index = []\n",
|
|
" Pc_save = np.zeros([K,dim])\n",
|
|
" clusters = []\n",
|
|
"\n",
|
|
" for i in range(0,K):\n",
|
|
" Pc_index.append(np.random.randint(0,N))\n",
|
|
" Pc = points[Pc_index,:]\n",
|
|
"\n",
|
|
" while (np.mean(distance(Pc,Pc_save)) > eps and iter < 5):\n",
|
|
" iter += 1\n",
|
|
" Pc_save = Pc\n",
|
|
" # print(Pc)\n",
|
|
" # print(points[:,:Pc.shape[0]])\n",
|
|
" dist = distance(points=points[:,:Pc.shape[1]],Pc=Pc)\n",
|
|
" clust = np.argmin(dist, axis=1)\n",
|
|
" clust = np.expand_dims(clust, axis=0)\n",
|
|
" points = np.append(points[:,:Pc.shape[1]], clust.T, axis=1)\n",
|
|
" # print(points)\n",
|
|
" Pc = np.zeros([K,dim])\n",
|
|
" index = np.array([])\n",
|
|
"\n",
|
|
" for n in range(0,N):\n",
|
|
" for k in range(0,K):\n",
|
|
" index = np.append(index, (clust==k).sum())\n",
|
|
" if points[n,-1] == k:\n",
|
|
" # print(points)\n",
|
|
" # print(Pc)\n",
|
|
" Pc[k,:] = np.add(Pc[k,:], points[n,:-1])\n",
|
|
"\n",
|
|
" for k in range(0,K):\n",
|
|
" Pc[k,:] = np.divide(Pc[k,:],index[k])\n",
|
|
"\n",
|
|
" # print(Pc)\n",
|
|
" index = points[:,-1]\n",
|
|
" points = points[:,:-1]\n",
|
|
" return Pc, index, points\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 610,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"colors=['red', 'green','yellow','blue','purple', 'orange']\n",
|
|
"def visualisation(points, index, Pc=[0,0], K=1):\n",
|
|
" if(points.shape[1]==2):\n",
|
|
" # for k in range(0,K):\n",
|
|
" for n in range(0,len(points)):\n",
|
|
" plt.plot(points[n,0], points[n,1], 'o', color=colors[int(index[n])])\n",
|
|
" plt.plot(Pc[:,0],Pc[:,1],'r+')\n",
|
|
" plt.grid(True)\n",
|
|
" plt.axis([min(mean)-1,max(mean)+1,min(mean)-1,max(mean)+1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 611,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def img_2_mat(my_img):\n",
|
|
" mat = my_img.reshape(my_img.shape[0]*my_img.shape[1],my_img.shape[2])\n",
|
|
" return mat"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 612,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def mat_2_img(mat,my_img):\n",
|
|
" img_seg = mat.reshape(my_img.shape[0], my_img.shape[1], my_img.shape[2])\n",
|
|
" return img_seg"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 613,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def kmeans_image(path_image, K):\n",
|
|
" my_img = io.imread(path_image)\n",
|
|
" imgplot = plt.imshow(my_img)\n",
|
|
" Mat = img_2_mat(my_img)\n",
|
|
" \n",
|
|
" Pc, index, clusters = kmeans(Mat, K)\n",
|
|
"\n",
|
|
" for k in range(K):\n",
|
|
" Mat[k,:] = Pc[index[k],:]\n",
|
|
"\n",
|
|
" img_seg = mat_2_img(Mat, my_img)\n",
|
|
"\n",
|
|
" io.imsave(path_image.split('.')[0] + \"_%d.jpg\" % K, img_seg)\n",
|
|
" imgplot = plt.imshow(img_seg)\n",
|
|
" return Pc, index, img_seg\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 614,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"points = gen_points(nb,dim,clusters)\n",
|
|
"# print(points.shape)\n",
|
|
"# print(points.mean(axis=0))\n",
|
|
"# print(points)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 615,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dist = distance(points,points)\n",
|
|
"# print(dist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 616,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"Pc, index, clusters = kmeans(points,K=K)\n",
|
|
"# print(index)\n",
|
|
"# print(clusters)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 617,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"visualisation(clusters, index, Pc, K=K)\n",
|
|
"# print(Pc)\n",
|
|
"# print(mean)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 618,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"Pc, index, img_seg = kmeans_image(path_image=path_image, K=K)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(103230,)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(index.shape)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3.8.10 64-bit",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.10"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|