Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
createAndParseSACCT
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Eesaan Atluri
createAndParseSACCT
Commits
af17830d
Commit
af17830d
authored
4 years ago
by
Ryan Randles Jones
Browse files
Options
Downloads
Patches
Plain Diff
added cluster analysis graphs
parent
657abaaa
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Cluster_Analysis.ipynb
+436
-0
436 additions, 0 deletions
Cluster_Analysis.ipynb
with
436 additions
and
0 deletions
Cluster_Analysis.ipynb
0 → 100644
+
436
−
0
View file @
af17830d
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Setup Options"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# year-date-month\n",
"#start_date = '2020-10-09'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for ReqMemCPU\n",
"LowerlimitGB = 0\n",
"UpperlimitGB = 50"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for AllocCPUS\n",
"LowerlimitAllocCPU = 0\n",
"UpperlimitAllocCPU = 50"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets min and max parameters for Elapsed\n",
"LowerlimitElapsed = 0\n",
"UpperlimitElapsed = 150.02"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Enter 'none', '0-1', or 'log' as achoice for data nomralization\n",
"Data_Normalization_Choice = 'none'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"import sqlite3\n",
"import slurm2sql\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"import seaborn as sb\n",
"import plotly.express as px\n",
"import matplotlib.ticker as ticker\n",
"import numpy as np\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"import os\n",
"from RC_styles import rc_styles as style\n",
"from sklearn.cluster import KMeans"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Database Creation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# creates database of info from March 2020 using sqlite 3\n",
"db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# df is starting database\n",
"df = pd.read_sql('SELECT * FROM slurm', db)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# df_1 is dataframe of all completed jobs\n",
"df_1 = df[df.State.str.contains('COMPLETED')]\n",
"#df_completed.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# dataset of needed columns for all graphs below\n",
"df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
"#df_1.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number\n",
"df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)\n",
"#df_completed.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places\n",
"df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above\n",
"df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) & \n",
" (df_completed['ReqMemCPU'] >= LowerlimitGB) & \n",
" (df_completed['AllocCPUS'] <= UpperlimitAllocCPU) & \n",
" (df_completed['AllocCPUS'] >= LowerlimitAllocCPU)\n",
" & \n",
" (df_completed['Elapsed'] <= UpperlimitElapsed) & \n",
" (df_completed['Elapsed'] >= LowerlimitElapsed)]\n",
"df_clustering.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Normalizing the Data for ReqMem/Elapsed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if Data_Normalization_Choice == '0-1':\n",
" column_max = df_clustering.max()\n",
" df_clustering_max = column_max.max()\n",
" fit = df_clustering / df_clustering_max\n",
" print(\"0-1\")\n",
" \n",
"elif Data_Normalization_Choice == 'log':\n",
" fit = np.log10(df_clustering+1)\n",
" print(\"log\")\n",
" \n",
"else:\n",
" fit = df_clustering\n",
" print(\"none\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# kmeans Clustering"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# sets to clusters and returns the cluster points\n",
"kmeans_cluster = KMeans(n_clusters=3, random_state=111)\n",
"kmeans_cluster.fit(fit)\n",
"print(kmeans_cluster.cluster_centers_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reverting Cluster Points Back to align with UnNormalized data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if Data_Normalization_Choice == '0-1':\n",
" clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max\n",
" print(\"0-1\")\n",
" \n",
"elif Data_Normalization_Choice == 'log':\n",
" clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1\n",
" print(\"log\")\n",
" \n",
"else:\n",
" clusterpoints = kmeans_cluster.cluster_centers_\n",
" print(\"none\")\n",
" print(clusterpoints[:,0],clusterpoints[:,1])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"figure = plt.figure()\n",
"\n",
"figure.set_size_inches(20,20)\n",
"\n",
"# Elapsed/ReqMem 2d Graph\n",
"elapsed_rqmem_clustergraph = figure.add_subplot(3,3,1)\n",
"#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
"elapsed_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('Elapsed(hours)')\n",
"\n",
"\n",
"# Elapsed/Alloc 2d Graph\n",
"elapsed_alloc_clustergraph = figure.add_subplot(3,3,2)\n",
"#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)\n",
"elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"elapsed_alloc_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"plt.xlabel('AllocCPUS')\n",
"plt.ylabel('Elapsed(hours)')\n",
"\n",
"# Alloc/ReqMem 2d Graph\n",
"alloc_rqmem_clustergraph = figure.add_subplot(3,3,3)\n",
"#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)\n",
"alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_, cmap='rainbow')\n",
"elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"plt.xlabel('ReqMemCPU(gigs)')\n",
"plt.ylabel('AllocCPUS')\n",
"\n",
"###########\n",
"# Alloc/ReqMem 3d Graph\n",
"alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,4, projection='3d')\n",
"alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"# sets size and color for gridlines by axis\n",
"alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"# Elapsed/Alloc 3d Graph\n",
"elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,5, projection='3d')\n",
"elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
"elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
"elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"# Elapsed/ReqMem 3d Graph\n",
"elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,6, projection='3d')\n",
"elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow')\n",
"elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"\n",
"elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
"elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
"\n",
"elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"##############\n",
"# Alloc/ReqMem 3d Graph\n",
"alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')\n",
"alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')\n",
"alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')\n",
"alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')\n",
"alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"# sets size and color for gridlines by axis\n",
"alloc_reqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"alloc_reqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"alloc_reqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"# Elapsed/Alloc 3d Graph\n",
"elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')\n",
"elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')\n",
"elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')\n",
"elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')\n",
"elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')\n",
"\n",
"elapsed_alloc_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_alloc_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"\n",
"# Elapsed/ReqMem 3d Graph\n",
"elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d')\n",
"elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'], \n",
" c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)\n",
"elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')\n",
"\n",
"elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')\n",
"elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')\n",
"elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')\n",
"\n",
"elapsed_rqmem_clustergraph_3d.xaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.yaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"elapsed_rqmem_clustergraph_3d.zaxis._axinfo[\"grid\"].update({\"linewidth\":.5, \"color\" : \"black\"})\n",
"\n",
"\n",
"# sets the spacing\n",
"# top = space between title and graphs - increase number to bring title down and decrease to bring title up\n",
"# left = space to the left\n",
"# wspace = padding on both sides of graphs\n",
"# hspace = padding on top and bottom of graphs\n",
"figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)\n",
"figure.suptitle('Clusters', fontsize=20)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:markdown id: tags:
# Data Setup Options
%% Cell type:code id: tags:
```
# year-date-month
#start_date = '2020-10-09'
```
%% Cell type:code id: tags:
```
# must run
# sets min and max parameters for ReqMemCPU
LowerlimitGB = 0
UpperlimitGB = 50
```
%% Cell type:code id: tags:
```
# must run
# sets min and max parameters for AllocCPUS
LowerlimitAllocCPU = 0
UpperlimitAllocCPU = 50
```
%% Cell type:code id: tags:
```
# must run
# sets min and max parameters for Elapsed
LowerlimitElapsed = 0
UpperlimitElapsed = 150.02
```
%% Cell type:code id: tags:
```
# Enter 'none', '0-1', or 'log' as achoice for data nomralization
Data_Normalization_Choice = 'none'
```
%% Cell type:markdown id: tags:
# Imports
%% Cell type:code id: tags:
```
# must run
import sqlite3
import slurm2sql
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import seaborn as sb
import plotly.express as px
import matplotlib.ticker as ticker
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import os
from RC_styles import rc_styles as style
from sklearn.cluster import KMeans
```
%% Cell type:markdown id: tags:
# Database Creation
%% Cell type:code id: tags:
```
# must run
# creates database of info from March 2020 using sqlite 3
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
```
%% Cell type:code id: tags:
```
# must run
# df is starting database
df = pd.read_sql('SELECT * FROM slurm', db)
```
%% Cell type:code id: tags:
```
# must run
# df_1 is dataframe of all completed jobs
df_1 = df[df.State.str.contains('COMPLETED')]
#df_completed.head(5)
```
%% Cell type:code id: tags:
```
# must run
# dataset of needed columns for all graphs below
df_completed = df_1.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]
#df_1.head(5)
```
%% Cell type:code id: tags:
```
# must run
# converts units in ReqMemCPU column from bytes to gigs and rounds up to nearest whole number
df_completed['ReqMemCPU'] = df_completed['ReqMemCPU'].div(1024**3).apply(np.ceil).apply(int)
#df_completed.head()
```
%% Cell type:code id: tags:
```
# must run
# converts Elapsed time to hours (from seconds) and rounds up to nearest 2 decimal places
df_completed['Elapsed'] = df_completed['Elapsed'].div(3600).round(2)
```
%% Cell type:code id: tags:
```
# must run
# creates dataset of ReqMemCPU, Elapsed, and AllocCPUS for completed jobs using the min and max parameters created above
df_clustering = df_completed[(df_completed['ReqMemCPU'] <= UpperlimitGB) &
(df_completed['ReqMemCPU'] >= LowerlimitGB) &
(df_completed['AllocCPUS'] <= UpperlimitAllocCPU) &
(df_completed['AllocCPUS'] >= LowerlimitAllocCPU)
&
(df_completed['Elapsed'] <= UpperlimitElapsed) &
(df_completed['Elapsed'] >= LowerlimitElapsed)]
df_clustering.head(5)
```
%% Cell type:markdown id: tags:
# Normalizing the Data for ReqMem/Elapsed
%% Cell type:code id: tags:
```
if Data_Normalization_Choice == '0-1':
column_max = df_clustering.max()
df_clustering_max = column_max.max()
fit = df_clustering / df_clustering_max
print("0-1")
elif Data_Normalization_Choice == 'log':
fit = np.log10(df_clustering+1)
print("log")
else:
fit = df_clustering
print("none")
```
%% Cell type:markdown id: tags:
# kmeans Clustering
%% Cell type:code id: tags:
```
# must run
# sets to clusters and returns the cluster points
kmeans_cluster = KMeans(n_clusters=3, random_state=111)
kmeans_cluster.fit(fit)
print(kmeans_cluster.cluster_centers_)
```
%% Cell type:markdown id: tags:
# Reverting Cluster Points Back to align with UnNormalized data
%% Cell type:code id: tags:
```
if Data_Normalization_Choice == '0-1':
clusterpoints = kmeans_cluster.cluster_centers_ * df_clustering_max
print("0-1")
elif Data_Normalization_Choice == 'log':
clusterpoints = 10 ** (kmeans_cluster.cluster_centers_) - 1
print("log")
else:
clusterpoints = kmeans_cluster.cluster_centers_
print("none")
print(clusterpoints[:,0],clusterpoints[:,1])
```
%% Cell type:code id: tags:
```
# must run
figure = plt.figure()
figure.set_size_inches(20,20)
# Elapsed/ReqMem 2d Graph
elapsed_rqmem_clustergraph = figure.add_subplot(3,3,1)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)
elapsed_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('Elapsed(hours)')
# Elapsed/Alloc 2d Graph
elapsed_alloc_clustergraph = figure.add_subplot(3,3,2)
#figure.suptitle('Runtime per Core %i cores or less'%UpperlimitAllocCPU)
elapsed_alloc_clustergraph.scatter(df_clustering['AllocCPUS'],df_clustering['Elapsed'],
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_alloc_clustergraph.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
plt.xlabel('AllocCPUS')
plt.ylabel('Elapsed(hours)')
# Alloc/ReqMem 2d Graph
alloc_rqmem_clustergraph = figure.add_subplot(3,3,3)
#figure.suptitle('Runtime per Requested gigs of RAM %i gigs or less'%UpperlimitGB)
alloc_rqmem_clustergraph.scatter(df_clustering['ReqMemCPU'],df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_, cmap='rainbow')
elapsed_rqmem_clustergraph.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
plt.xlabel('ReqMemCPU(gigs)')
plt.ylabel('AllocCPUS')
###########
# Alloc/ReqMem 3d Graph
alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,4, projection='3d')
alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis
alloc_reqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/Alloc 3d Graph
elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,5, projection='3d')
elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/ReqMem 3d Graph
elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,6, projection='3d')
elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_ ,cmap='rainbow')
elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
##############
# Alloc/ReqMem 3d Graph
alloc_reqmem_clustergraph_3d = figure.add_subplot(3,3,7, projection='3d')
alloc_reqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['AllocCPUS'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
alloc_reqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,2], color='black')
alloc_reqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs')
alloc_reqmem_clustergraph_3d.set_ylabel('AllocCPUS')
alloc_reqmem_clustergraph_3d.set_zlabel('Elapsed(hours)')
# sets size and color for gridlines by axis
alloc_reqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
alloc_reqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/Alloc 3d Graph
elapsed_alloc_clustergraph_3d = figure.add_subplot(3,3,8, projection='3d')
elapsed_alloc_clustergraph_3d.scatter(df_clustering['AllocCPUS'], df_clustering['ReqMemCPU'], df_clustering['Elapsed'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_alloc_clustergraph_3d.scatter(clusterpoints[:,2] ,clusterpoints[:,1], color='black')
elapsed_alloc_clustergraph_3d.set_xlabel('AllocCPUS')
elapsed_alloc_clustergraph_3d.set_ylabel('ReqMemCPU(gigs)')
elapsed_alloc_clustergraph_3d.set_zlabel('Elapsed(hours)')
elapsed_alloc_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_alloc_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# Elapsed/ReqMem 3d Graph
elapsed_rqmem_clustergraph_3d = figure.add_subplot(3,3,9, projection='3d')
elapsed_rqmem_clustergraph_3d.scatter(df_clustering['ReqMemCPU'], df_clustering['Elapsed'], df_clustering['AllocCPUS'],
c=kmeans_cluster.labels_ ,cmap='rainbow', alpha = .08)
elapsed_rqmem_clustergraph_3d.scatter(clusterpoints[:,0] ,clusterpoints[:,1], color='black')
elapsed_rqmem_clustergraph_3d.set_xlabel('ReqMemCPU(gigs)')
elapsed_rqmem_clustergraph_3d.set_ylabel('Elapsed(hours)')
elapsed_rqmem_clustergraph_3d.set_zlabel('AllocCPUS')
elapsed_rqmem_clustergraph_3d.xaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.yaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
elapsed_rqmem_clustergraph_3d.zaxis._axinfo["grid"].update({"linewidth":.5, "color" : "black"})
# sets the spacing
# top = space between title and graphs - increase number to bring title down and decrease to bring title up
# left = space to the left
# wspace = padding on both sides of graphs
# hspace = padding on top and bottom of graphs
figure.subplots_adjust(left=0.0, wspace=0.2, top=.92, hspace=0.3)
figure.suptitle('Clusters', fontsize=20)
plt.show()
```
%% Cell type:code id: tags:
```
```
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment