Commit 3a0c54db authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

added normalization to datasets

parent 74a01d8a
......@@ -359,6 +359,19 @@
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"column_maxes_runtime = df_runtime_cluster.max()\n",
"df_runtime_cluster_max = column_maxes_runtime.max()\n",
"normalized_runtime_df = df_runtime_cluster / df_runtime_cluster_max\n",
"\n",
"print(normalized_runtime_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -384,8 +397,8 @@
"\n",
"# sets to clusters and returns the cluster points\n",
"kmeans_elapsed_reqmem = KMeans(n_clusters=3, random_state=111)\n",
"kmeans_elapsed_reqmem.fit(df_runtime_cluster)\n",
"print(kmeans_elapsed_reqmem.cluster_centers_)"
"kmeans_elapsed_reqmem.fit(normalized_runtime_df)\n",
"clusterpoints_elapsed_reqmem = kmeans_elapsed_reqmem.cluster_centers_ * df_runtime_cluster_max"
]
},
{
......@@ -489,7 +502,7 @@
"\n",
"elapsed_rqmem_clustergraph = figure.add_subplot(121)\n",
"elapsed_rqmem_clustergraph.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_reqmem.labels_, cmap='rainbow')\n",
"elapsed_rqmem_clustergraph.scatter(kmeans_elapsed_reqmem.cluster_centers_[:,0] ,kmeans_elapsed_reqmem.cluster_centers_[:,1], color='black')\n",
"elapsed_rqmem_clustergraph.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"\n",
......@@ -497,7 +510,7 @@
"elapsed_rqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
"elapsed_rqmem_clustergraph_3d.scatter(df_runtime_cluster['ReqMemCPU'], df_runtime_cluster['Elapsed'], df_runtime_cluster['AllocCPUS'], \n",
" c=kmeans_elapsed_reqmem.labels_ ,cmap='rainbow')\n",
"elapsed_rqmem_clustergraph_3d.scatter(kmeans_elapsed_reqmem.cluster_centers_[:,0] ,kmeans_elapsed_reqmem.cluster_centers_[:,1], color='black')\n",
"elapsed_rqmem_clustergraph_3d.scatter(clusterpoints_elapsed_reqmem[:,0] ,clusterpoints_elapsed_reqmem[:,1], color='black')\n",
"\n",
"\n",
"elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
......@@ -509,7 +522,7 @@
"elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"plt.show()\n"
"plt.show()"
]
},
{
......@@ -609,8 +622,8 @@
"\n",
"# sets to clusters and returns the cluster points\n",
"kmeans_elapsed_alloc = KMeans(n_clusters=3, random_state=111)\n",
"kmeans_elapsed_alloc.fit(df_runtime_cluster)\n",
"print(kmeans_elapsed_alloc.cluster_centers_)"
"kmeans_elapsed_alloc.fit(normalized_runtime_df)\n",
"clusterpoints_elapsed_alloc = kmeans_elapsed_alloc.cluster_centers_ * df_runtime_cluster_max"
]
},
{
......@@ -714,14 +727,14 @@
"\n",
"elapsed_alloc_clustergraph = figure.add_subplot(121)\n",
"elapsed_alloc_clustergraph.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans_elapsed_alloc.labels_, cmap='rainbow')\n",
"elapsed_alloc_clustergraph.scatter(kmeans_elapsed_alloc.cluster_centers_[:,0] ,kmeans_elapsed_alloc.cluster_centers_[:,1], color='black')\n",
"elapsed_alloc_clustergraph.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
"plt.xlabel('AllocCPUS')\n",
"plt.ylabel('Elapsed(hours)')\n",
"\n",
"# 3d veiw of the scatterplot for better understanding of the data\n",
"elapsed_alloc_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
"elapsed_alloc_clustergraph_3d.scatter(df_runtime_cluster['AllocCPUS'], df_runtime_cluster['Elapsed'], df_runtime_cluster['ReqMemCPU'], c=kmeans_elapsed_alloc.labels_ ,cmap='rainbow')\n",
"elapsed_alloc_clustergraph_3d.scatter(kmeans_elapsed_alloc.cluster_centers_[:,0] ,kmeans_elapsed_alloc.cluster_centers_[:,1], color='black')\n",
"elapsed_alloc_clustergraph_3d.scatter(clusterpoints_elapsed_alloc[:,0] ,clusterpoints_elapsed_alloc[:,1], color='black')\n",
"elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
"elapsed_alloc_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"elapsed_alloc_clustergraph_3d.set_zlabel('ReqMemCPU(gigs)')\n",
......@@ -881,11 +894,16 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
"All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
"column_maxes_alloc = df_alloc_cluster.max()\n",
"df_alloc_cluster_max = column_maxes_alloc.max()\n",
"normalized_alloc_df = df_alloc_cluster / df_alloc_cluster_max\n",
"\n",
"print(normalized_alloc_df)"
]
},
{
......@@ -898,8 +916,16 @@
"\n",
"# sets to clusters and returns the cluster points\n",
"kmeans_alloc_reqmem = KMeans(n_clusters=3, random_state=111)\n",
"kmeans_alloc_reqmem.fit(df_alloc_cluster)\n",
"print(kmeans_alloc_reqmem.cluster_centers_)"
"kmeans_alloc_reqmem.fit(normalized_alloc_df)\n",
"clusterpoints_alloc_reqmem = kmeans_alloc_reqmem.cluster_centers_ * df_alloc_cluster_max"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The next 5 cells create the clusters, find each cluster label, and create datasets of data in each cluster.\n",
"All the datasets are created for both the cluster graphs and plots of each cluster before those graphs are made."
]
},
{
......@@ -915,7 +941,7 @@
"# 0 = purple cluster\n",
"# 1 = green cluster\n",
"# 2 = red cluster\n",
"np.unique(kmeans_elapsed_alloc.labels_)"
"np.unique(kmeans_alloc_reqmem.labels_)"
]
},
{
......@@ -1003,14 +1029,14 @@
"\n",
"alloc_reqmem_cluster_graph = figure.add_subplot(121)\n",
"alloc_reqmem_cluster_graph.scatter(df_alloc_cluster['ReqMemCPU'],df_alloc_cluster['AllocCPUS'], c=kmeans_alloc_reqmem.labels_, cmap='rainbow')\n",
"alloc_reqmem_cluster_graph.scatter(kmeans_alloc_reqmem.cluster_centers_[:,0] ,kmeans_alloc_reqmem.cluster_centers_[:,1], color='black')\n",
"alloc_reqmem_cluster_graph.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,1], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"\n",
"# 3d veiw of the scatterplot for better understanding of the data\n",
"alloc_reqmem_clustergraph_3d = figure.add_subplot(122, projection='3d')\n",
"alloc_reqmem_clustergraph_3d.scatter(df_alloc_cluster['ReqMemCPU'], df_alloc_cluster['AllocCPUS'], df_alloc_cluster['Elapsed'], c=kmeans_alloc_reqmem.labels_ ,cmap='rainbow')\n",
"alloc_reqmem_clustergraph_3d.scatter(kmeans_alloc_reqmem.cluster_centers_[:,0] ,kmeans_alloc_reqmem.cluster_centers_[:,1], color='black')\n",
"alloc_reqmem_clustergraph_3d.scatter(clusterpoints_alloc_reqmem[:,0] ,clusterpoints_alloc_reqmem[:,1], color='black')\n",
"alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment