Commit 384ff64c authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

Delete Runtime-and-CoreCount-ReqMemCPU.ipynb

parent 6720ce25
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"import sqlite3\n",
"import slurm2sql\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"import seaborn as sb\n",
"import plotly.express as px\n",
"import matplotlib.ticker as ticker\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from RC_styles import rc_styles as style"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# creates database of info from March 2020 using sqlite 3\n",
"db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# df is starting database\n",
"df = pd.read_sql('SELECT * FROM slurm', db)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# voluntary\n",
"\n",
"# for displaying all available column options\n",
"pd.set_option('display.max_columns', None)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# converts units in ReqMemCPU column from bytes to gigs\n",
"df['ReqMemCPU'] = df['ReqMemCPU'].div(1024**3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# converts Elapsed time to hours (from seconds)\n",
"df['Elapsed'] = df['Elapsed'].div(3600)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# must run\n",
"\n",
"# df_completed is dataframe of all completed jobs\n",
"df_completed = df[df.State.str.contains('COMPLETED')]\n",
"#df_completed.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ReqMemCPU,Corecount,Runtime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"UpperlimitGB = 50"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_1 = df_completed.loc[:,['ReqMemCPU', 'Elapsed', 'AllocCPUS']]\n",
"df_1.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_1['ReqMemCPU'] = df_1['ReqMemCPU'].apply(np.ceil)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_1['Elapsed'] = df_1['Elapsed'].apply(np.ceil)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_1_sorted = df_1.sort_values(by='AllocCPUS', ascending=True)\n",
"df_1_sorted.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_runtime = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB)]\n",
"df_runtime.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"runtime_graph = sns.scatterplot(x=\"ReqMemCPU\", y=\"AllocCPUS\",data=df_runtime)\n",
" #hue=\"AllocCPUS\")\n",
" #, size=\"AllocCPUS\")\n",
"\n",
"#plt.title('Average Requested RAM per CPU by User for all Users Running %i Jobs or less'%UpperlimitJobCount)\n",
"\n",
"plt.xlabel('ReqMemCPU')\n",
"plt.ylabel('AllocCPUS')\n",
"#plt.yscale(\"log\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"g = sns.PairGrid(df_runtime, y_vars=[\"Elapsed\"], x_vars=[\"ReqMemCPU\", \"AllocCPUS\"], height=4)\n",
"g.map(sns.regplot, color=\"blue\")\n",
"#g.set(ylim=(-1, 11), yticks=[0, 5, 10]);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"style.default_axes_and_ticks()\n",
"style.figsize()\n",
"\n",
"\n",
"g = sb.PairGrid(df_runtime)\n",
"g.map(plt.scatter);\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_runtime_cluster = df_1_sorted[(df_1_sorted['ReqMemCPU'] <= UpperlimitGB)]\n",
"#df_runtime_graph_cluster.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Sum_of_squared_distances = []\n",
"K = range(1,10)\n",
"for k in K:\n",
" km = KMeans(n_clusters=k)\n",
" km = km.fit(df_runtime_cluster)\n",
" Sum_of_squared_distances.append(km.inertia_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.plot(K, Sum_of_squared_distances, 'bx-')\n",
"plt.xlabel('k')\n",
"plt.ylabel('Sum_of_squared_distances')\n",
"plt.title('Elbow Method For Optimal k')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"kmeans = KMeans(n_clusters=3, random_state=111)\n",
"kmeans.fit(df_runtime_cluster)\n",
"print(kmeans.cluster_centers_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"runtime_cluster_graph = plt.scatter(df_runtime_cluster['ReqMemCPU'],df_runtime_cluster['Elapsed'], c=kmeans.labels_, cmap='rainbow')\n",
"plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='grey')\n",
"#plt.yscale(\"log\")\n",
"plt.xlabel('ReqMemCPU')\n",
"plt.ylabel('Runtime')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment