Commit 68721f64 authored by KOMAL BADI's avatar KOMAL BADI
Browse files

onverted the ReqMemCPU to GB

parent 75154fbf
......@@ -17,7 +17,8 @@
"metadata": {},
"outputs": [],
"source": [
"db = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3')"
"db = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3')\n",
"#db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
]
},
{
......@@ -82,7 +83,7 @@
"metadata": {},
"outputs": [],
"source": [
"df1['Waiting'] = df1['Waiting']/60"
"df1['Waiting'] = df1['Waiting']/60 #computing waiting time"
]
},
{
......@@ -91,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"df1['ReqMemCPU']=df1['ReqMemCPU']/(1024*1024*1024)"
"df1['ReqMemCPU']=df1['ReqMemCPU']/(1024*1024*1024) #converting ReqMemCPU to GB"
]
},
{
......@@ -100,7 +101,7 @@
"metadata": {},
"outputs": [],
"source": [
"df1['TotalRAM']=df1['NCPUS']*df1['ReqMemCPU']"
"df1['ReqTotalRAM']=df1['NCPUS']*df1['ReqMemCPU'] #computing ReqTotalRAM"
]
},
{
......@@ -118,7 +119,7 @@
"metadata": {},
"outputs": [],
"source": [
"df1['TotalRAM'].describe()"
"df1['ReqTotalRAM'].describe()"
]
},
{
......@@ -137,7 +138,7 @@
"metadata": {},
"outputs": [],
"source": [
"df1.insert(64,'wait_period_cat',category)"
"df1.insert(64,'wait_period_cat',category)\n"
]
},
{
......@@ -157,7 +158,9 @@
"metadata": {},
"outputs": [],
"source": [
"df_slected_bin = get_row_as_bin(\">2000min\")"
"\n",
"df_slected_bin = get_row_as_bin(\">2000min\")\n",
"df1.head(10)"
]
},
{
......@@ -166,7 +169,7 @@
"metadata": {},
"outputs": [],
"source": [
"t = df_slected_bin[['NCPUS','wait_period_cat','TotalRAM','ReqMemCPU','User']]"
"t = df_slected_bin[['NCPUS','wait_period_cat','ReqTotalRAM','ReqMemCPU','User','Partition']]"
]
},
{
......@@ -184,8 +187,8 @@
"metadata": {},
"outputs": [],
"source": [
"gp = t['TotalRAM'].value_counts(normalize=False,sort=False).plot(kind='bar')\n",
"gp.set_xlabel('Total RAM for the selected bin')\n",
"gp = t['ReqTotalRAM'].value_counts(normalize=False,sort=False).plot(kind='bar')\n",
"gp.set_xlabel('Req Total RAM for the selected bin')\n",
"gp.set_ylabel('Frequency')"
]
},
......@@ -195,7 +198,7 @@
"metadata": {},
"outputs": [],
"source": [
"t['TotalRAM'].describe()"
"t['ReqTotalRAM'].describe()"
]
},
{
......@@ -206,10 +209,9 @@
"source": [
"import matplotlib.pyplot as plt\n",
"fig= plt.figure(figsize=(10,10))\n",
"plt.scatter(t['ReqTotalRAM'],t['NCPUS'])\n",
"\n",
"plt.scatter(df1['TotalRAM'],df1['NCPUS'])\n",
"\n",
"plt.xlabel('Total RAM')\n",
"plt.xlabel('Req Total RAM')\n",
"plt.ylabel('N CPUS')\n",
"\n",
"\n",
......@@ -224,21 +226,592 @@
"source": [
"fig= plt.figure(figsize=(10,10))\n",
"\n",
"plt.scatter(df1['TotalRAM'],df1['ReqMemCPU'])\n",
"plt.scatter(t['ReqTotalRAM'],t['ReqMemCPU'])\n",
"\n",
"plt.xlabel('Total RAM')\n",
"plt.xlabel('Req Total RAM')\n",
"plt.ylabel('ReqMemCPU')\n",
"\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scatterplot for TotalRAM vs NCPUS\n",
"The threshold line here is to represent Number of CPUS between [2,50] for RAM [0,3500]\n",
"The slope of this line is 375/48 which is 7.8125"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sam=375/48\n",
"print(sam)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"fig = px.scatter(t, x=\"NCPUS\", y=\"ReqTotalRAM\", color=\"Partition\")\n",
"fig.add_trace(\n",
" go.Scatter(\n",
" x=[2,50],\n",
" y=[0,375],\n",
" mode=\"lines\",\n",
" line=go.scatter.Line(color=\"black\"),\n",
" showlegend=False))\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scatterplot for TotalRAM vs NCPUS\n",
"The threshold line here is a linear regression line using ordinary least square method."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = px.scatter(t, x=\"ReqTotalRAM\", y=\"NCPUS\", trendline=\"ols\")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filter dataset to pullout Failed jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_jobs(state):\n",
" w = df1.loc[df1['State'] ==state ]\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_failed = get_jobs(\"FAILED\")\n",
"\n",
"df_failed_jobs=df_failed[['Elapsed','State','Partition','ReqTotalRAM','Waiting','CPUTime']]\n",
"df_failed_jobs['CPUTime']=df_failed_jobs['CPUTime']/60\n",
"df_failed_jobs['Elapsed']=df_failed_jobs['Elapsed']/60"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_failed_jobs.head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.graph_objects as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"labels = df_failed_jobs['Partition']\n",
"values_1=df_failed_jobs['ReqTotalRAM']\n",
"values_2=df_failed_jobs['Waiting']\n",
"fig = make_subplots(1, 2, specs=[[{'type':'domain'}, {'type':'domain'}]],\n",
" subplot_titles=['ReqTotalRAM for Failed Jobs', 'Waiting time for Failed jobs'])\n",
"fig.add_trace(go.Pie(labels=labels, values=values_1, scalegroup='one'), 1, 1)\n",
"fig.add_trace(go.Pie(labels=labels, values=values_2, scalegroup='one'), 1, 2)\n",
"\n",
"fig.update_layout(title_text='Failed Jobs')\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Count of the Requested RAM for the failed jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = px.histogram(df_failed_jobs,x='ReqTotalRAM',color=\"Partition\")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs Waiting time for the failed jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='ReqTotalRAM',y='Waiting',hue='Partition',data=df_failed_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs CPU time for the failed jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='ReqTotalRAM',y='CPUTime',hue='Partition',data=df_failed_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"px.scatter_matrix(df_failed_jobs,dimensions=['Elapsed','ReqTotalRAM','Waiting','CPUTime'],color=\"Partition\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filter dataset to pull out the Successful jobs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful= get_jobs(\"COMPLETED\")\n",
"\n",
"df_Successful_jobs=df_Successful[['Elapsed','State','Partition','ReqTotalRAM','Waiting','CPUTime','NGPU']]\n",
"df_Successful_jobs['CPUTime']=df_Successful_jobs['CPUTime']/60\n",
"df_Successful_jobs['Elapsed']=df_Successful_jobs['Elapsed']/60"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs waiting time for the Successful jobs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='ReqTotalRAM',y='Waiting',hue='Partition',data=df_Successful_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='Waiting',y='ReqTotalRAM',hue='Partition',data=df_Successful_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Successful Array Jobs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful_Array_jobs=df_Successful.dropna(subset=['ArrayTaskID'])\n",
"df_Successful_Array_jobs.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs waiting time for the Successful Array jobs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,10))\n",
"sns.scatterplot(x='ReqTotalRAM',y='Waiting',hue='Partition',data=df_Successful_Array_jobs,linewidth=0)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_user(user):\n",
" w = df_Successful_Array_jobs.loc[df_Successful_Array_jobs['User'] ==user ]\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful_Array_jobs_user=get_user('gpekmezi')\n",
"df_Successful_Array_jobs_user.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"\n",
"#Waiting time= Start time - submit time\n",
"#waiting time = waiting/ 60 ; waiting time in minutes\n",
"#Requested Memory CPU =Requested Memory CPU /(1024*1024*1024)\n",
"#Total Requested RAM per cPU = NCPUS* ReqMemCPU\n",
"\n",
"fig = px.scatter(df_Successful_Array_jobs, x='ReqTotalRAM',y='Waiting',color=\"Partition\",title=('Requested Total RAM vs Waiting Time for Successful Array Jobs '),hover_data=['User','Elapsed','ReqMem','NCPUS','AllocCPUS','NGPU'])\n",
"\n",
"fig.update_layout(\n",
" title=\"Requested Total RAM vs Waiting Time for Successful Array Jobs \",\n",
" xaxis_title=\"Requested Total RAM per CPU in GB\",\n",
" yaxis_title=\"Waiting Time in minutes\",\n",
" font=dict(\n",
" family=\"Courier New, monospace\",\n",
" size=18,\n",
" color=\"#7f7f7f\"\n",
" )\n",
")\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# for waiting in days wating/1440\n",
"#df_Successful_Array_jobs_user['Waiting']=df_Successful_Array_jobs_user['Waiting']/1440\n",
"fig = px.histogram(df_Successful_Array_jobs_user,x='Waiting',color=\"Partition\")\n",
"\n",
"#Waiting time= Start time - submit time\n",
"#waiting time = waiting/ 60 waiting time in minutes\n",
"#Requested Memory CPU =Requested Memory CPU /(1024*1024*1024)\n",
"#Total Requested RAM per cPU = NCPUS* ReqMemCPU\n",
"fig.update_layout(\n",
" title=\"'User-gpekmezi' Waiting Time for Successful Array Jobs \",\n",
" xaxis_title=\"Waiting Time in days\",\n",
" yaxis_title=\"Count\",\n",
" font=dict(\n",
" family=\"Courier New, monospace\",\n",
" size=18,\n",
" color=\"#7f7f7f\"\n",
" )\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Successful non_array_jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful_non_Array_jobs=df_Successful.loc[df['ArrayTaskID'].isnull()]\n",
"#df_Successful_non_Array_jobs.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs waiting time for the Successful non_array_jobs "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,10))\n",
"sns.scatterplot(x='ReqTotalRAM',y='Waiting',hue='Partition',data=df_Successful_non_Array_jobs,linewidth=0)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful_non_Array_jobs.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"fig = px.scatter(df_Successful_non_Array_jobs, x=\"ReqTotalRAM\", y=\"Waiting\", color=\"Partition\",hover_data=['User','Elapsed','ReqMem','NCPUS','AllocCPUS'])\n",
"#computing waiting time= waiting/60 \n",
"#Requested Memory CPU =Requested Memory CPU /(1024*1024*1024)\n",
"#Requested Total RAM = Requested Memory CPU * NCPUS\n",
"\n",
"fig.update_layout(\n",
" title=\"Requested Total RAM vs Waiting Time for Successful non Array Jobs \",\n",
" xaxis_title=\"Requested Total RAM per CPU in GB\",\n",
" yaxis_title=\"Waiting Time in minutes\",\n",
" font=dict(\n",
" family=\"Courier New, monospace\",\n",
" size=18,\n",
" color=\"#7f7f7f\"\n",
" )\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_user(user):\n",
" w = df_Successful_non_Array_jobs.loc[df_Successful_non_Array_jobs['User'] ==user ]\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"df_Successful_non_Array_jobs_user=get_user('mmootz')\n",
"df_Successful_non_Array_jobs_user.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# for waiting in days wating/1440\n",
"#df_Successful_non_Array_jobs_user['Waiting']=df_Successful_non_Array_jobs_user['Waiting']/1440\n",
"fig = px.histogram(df_Successful_non_Array_jobs_user,x='Waiting',color=\"Partition\")\n",
"#computing waiting time= waiting/60 \n",
"#Requested Memory CPU =Requested Memory CPU /(1024*1024*1024)\n",
"#Requested Total RAM = Requested Memory CPU * NCPUS\n",
"\n",
"fig.update_layout(\n",
" title=\"'User-mmootz' Waiting Time for Successful non Array Jobs \",\n",
" xaxis_title=\"Waiting Time in days\",\n",
" yaxis_title=\"Count\",\n",
" font=dict(\n",
" family=\"Courier New, monospace\",\n",
" size=18,\n",
" color=\"#7f7f7f\"\n",
" )\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"px.scatter_matrix(df_Successful_non_Array_jobs,dimensions=['Elapsed','ReqTotalRAM','Waiting','CPUTime'],color=\"Partition\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Successful['Array_jobs']=df_Successful_Array_jobs['ArrayTaskID']\n",
"df_Successful['non_Array_jobs']=df_Successful_non_Array_jobs['ArrayTaskID']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filter dataset to pullout Timeout jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Timeout= get_jobs(\"TIMEOUT\")\n",
"\n",
"df_Timeout_jobs=df_Timeout[['Elapsed','State','Partition','ReqTotalRAM','Waiting','CPUTime']]\n",
"df_Timeout_jobs['CPUTime']=df_Timeout_jobs['CPUTime']/60\n",
"df_Timeout_jobs['Elapsed']=df_Timeout_jobs['Elapsed']/60"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs Waiting time for theTimeout jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='ReqTotalRAM',y='Waiting',hue='Partition',data=df_Timeout_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What is the Requested Total RAM vs CPU time for the Timeout jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"sns.scatterplot(x='ReqTotalRAM',y='CPUTime',hue='Partition',data=df_Timeout_jobs)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],