Commit eb84066c authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

Merge branch 'Cores-and-Runtime-Clustering' into 'master'

Cores and runtime clustering

See merge request rrand11/createandparsesacct!2
parents 3146b60f ebf6a277
......@@ -127,7 +127,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# ReqMemCPU,Corecount,Runtime"
"# ReqMemCPU,Corecount,Runtime Clustering"
]
},
{
......@@ -138,9 +138,9 @@
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for ReqMemCPU\n",
"UpperlimitGB = 50\n",
"LowerlimitGB = 0"
"# dataset of needed columns for all graphs below\n",
"df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
"df_1.head(5)"
]
},
{
......@@ -151,9 +151,8 @@
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for AllocCPUS\n",
"UpperlimitAllocCPU = 20\n",
"LowerlimitAllocCPU = 0"
"# rounds ReqMemCPU up to nearest whole number\n",
"df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)"
]
},
{
......@@ -164,9 +163,8 @@
"source": [
"# must run\n",
"\n",
"# dataset of needed columns for all graphs below\n",
"df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
"df_1.head(5)"
"# rounds Elapsed up to nearest 2 decimal places\n",
"df_1['Elapsed'] = df_1['Elapsed'].round(2)"
]
},
{
......@@ -177,8 +175,9 @@
"source": [
"# must run\n",
"\n",
"# rounds ReqMemCPU up to nearest whole number\n",
"df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)"
"# sorts dataset by AllocCPUS for easy visualization\n",
"df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n",
"df_1_sorted.head(5)"
]
},
{
......@@ -189,8 +188,9 @@
"source": [
"# must run\n",
"\n",
"# rounds Elapsed up to nearest 2 decimal places\n",
"df_1['Elapsed'] = df_1['Elapsed'].round(2)"
"# sets min and max parameters for ReqMemCPU\n",
"UpperlimitGB = 50\n",
"LowerlimitGB = 0"
]
},
{
......@@ -201,9 +201,9 @@
"source": [
"# must run\n",
"\n",
"# sorts dataset by AllocCPUS for easy visualization\n",
"df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n",
"df_1_sorted.head(5)"
"# sets min and max parameters for AllocCPUS\n",
"UpperlimitAllocCPU = 20\n",
"LowerlimitAllocCPU = 0"
]
},
{
......@@ -215,8 +215,8 @@
"# must run\n",
"\n",
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
"df_runtime = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n",
"df_runtime.head(5)"
"df_facet = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n",
"df_facet.head(5)"
]
},
{
......@@ -232,30 +232,42 @@
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"full_facet = sb.PairGrid(df_runtime)\n",
"full_facet = sb.PairGrid(df_facet)\n",
"full_facet.map(plt.scatter);\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"runtime_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_runtime)\n",
"\n",
"plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB)\n",
"\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"#plt.yscale(\"log\")\n",
"# sets min and max parameters for ReqMemCPU for clustered Elapsed Time Graphs\n",
"UpperlimitGB_elapsed = 50\n",
"LowerlimitGB_elapsed = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"plt.show()"
"# sets min and max parameters for AllocCPUS for clustered Elapsed Time Graphs\n",
"UpperlimitAllocCPU_elapsed = 20\n",
"LowerlimitAllocCPU_elapsed = 0"
]
},
{
......@@ -266,9 +278,9 @@
"source": [
"# must run\n",
"\n",
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above for clustering\n",
"df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU)]\n",
"df_runtime_cluster.tail(5)"
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
"df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_elapsed) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_elapsed) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_elapsed) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_elapsed)]\n",
"df_runtime_cluster.head(5)"
]
},
{
......@@ -330,8 +342,8 @@
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
"reqmem_alloc.map(sns.regplot, color=\"blue\")"
"elapsed_reqmem_alloc = sns.PairGrid(df_runtime_cluster, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
"elapsed_reqmem_alloc.map(sns.regplot, color=\"blue\")"
]
},
{
......@@ -346,12 +358,12 @@
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
"elapsed_runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
"plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
"\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
"plt.title('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB_elapsed)\n",
"plt.show()"
]
},
......@@ -367,12 +379,146 @@
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
"elapsed_alloc_cluster_graph = plt.scatter(df_runtime_cluster['AllocCPUS'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
"plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
"\n",
"plt.xlabel('AllocCPUS')\n",
"plt.ylabel('Elapsed(hours)')\n",
"plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
"plt.title('Runtime per Core %i cores or less'%UpperlimitAllocCPU_elapsed)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Detailed Look at Elapsed Time - In terms of Requested RAM and Cores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# second set of min and max parameters for ReqMemCPU to use for AllocCPU/ReqMemCPU cluster graph \n",
"UpperlimitGB_alloc = 50\n",
"LowerlimitGB_alloc = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for AllocCPUS\n",
"UpperlimitAllocCPU_alloc = 60\n",
"LowerlimitAllocCPU_alloc = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS using the min and max parameters created above\n",
"df_allocCPUS_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB_alloc) & (df_1_sorted['ReqMemCPU'] >= LowerlimitGB_alloc) & (df_1_sorted['AllocCPUS'] <= UpperlimitAllocCPU_alloc) & (df_1_sorted['AllocCPUS'] >= LowerlimitAllocCPU_alloc)]\n",
"df_allocCPUS.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets up info for plotting the optimal number of clusters - uses df_runtime_cluster datasaet\n",
"Sum_of_squared_distances = []\n",
"K = range(1,10)\n",
"for k in K:\n",
" km = KMeans(n_clusters=k)\n",
" km = km.fit(df_allocCPUS_cluster)\n",
" Sum_of_squared_distances.append(km.inertia_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# the bend in the graph is the optimal number of clusters for graphs using the df_runtime_cluster dataset\n",
"plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
"plt.xlabel('k')\n",
"plt.ylabel('Sum_of_squared_distances')\n",
"plt.title('Elbow Method For Optimal k')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets to clusters and returns the cluster points\n",
"kmeans = KMeans(n_clusters=3, random_state=111)\n",
"kmeans.fit(df_allocCPUS_cluster)\n",
"print(kmeans.cluster_centers_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"alloc_reqmem_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_allocCPUS_cluster)\n",
"\n",
"plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n",
"\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"#plt.yscale(\"log\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# clustered graph\n",
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"alloc_reqmem_cluster_graph = plt.scatter(df_allocCPUS_cluster['ReqMemCPU'],df_allocCPUS_cluster['AllocCPUS'], c=kmeans.labels_, cmap='rainbow')\n",
"plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')\n",
"\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"plt.title('Number of Cores used by Requested RAM %i gigs or less'%UpperlimitGB_alloc)\n",
"plt.show()"
]
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment