Gitlab has been upgraded from version 14.2 to 14.4. Release notes can be found here (we run the Core release): https://about.gitlab.com/releases/2021/10/22/gitlab-14-4-released/

Commit 75154fbf authored by KOMAL BADI's avatar KOMAL BADI
Browse files

Analysis on pascalnode and pascalnode-medium partition

parent 087f7f95
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sqlite3\n",
"import slurm2sql\n",
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import seaborn as sb\n",
"from matplotlib import pyplot as plt\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#db = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3')\n",
"db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_sql('SELECT * FROM slurm', db)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['Partition'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Converting ReqMemNodes in GB\n",
"df['ReqMemNode']=df['ReqMemNode']/((1024)*(1024)*(1024)) \n",
"\n",
"#Naming all the cancelled by user jobs as Cancelled jobs\n",
"df.loc[df['State'].str.contains('CANCELLED'), 'State'] = 'CANCELLED'\n",
"df['AveRSS']=df['AveRSS']/((1024)*(1024)*(1024))\n",
"#print(df_pascalnodes_jobs.State.str.contains('CANCELLED'))\n",
"#Converting ReqMemCPU in GB\n",
"df['ReqMemCPU']=df['ReqMemCPU']/((1024)*(1024)*(1024))\n",
"\n",
"#computing waiting time\n",
"df['Waiting'] = df['Start']-df['Submit']\n",
"df1 = df.dropna(subset=['Waiting'])\n",
"\n",
"#Computing waiting time in hours\n",
"df1['Waiting'] = df1['Waiting']/3600 \n",
"\n",
"#Computing Elapsed time in hours\n",
"df1['Elapsed'] = df1['Elapsed']/3600\n",
"\n",
"#droping na values for time(submitted jobs at a particular time)\n",
"#df1 = df1.dropna(subset=['Time']) \n",
"df1 = df1.dropna(subset=['Submit']) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# converts the submit time\n",
"df1['submit_time'] = pd.to_datetime(df1['Submit'],unit='s') \n",
"\n",
"#converts time to week\n",
"df1['submit_week_number'] = df1['submit_time'].dt.week\n",
"df1['submit_week_number'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#converts time to month\n",
"df1['submit_month_number'] = df1['submit_time'].dt.month \n",
"df1['submit_month_number'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1['submit_month'] = df1['submit_time'].dt.strftime('%b')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1['submit_month'].head() #More specific month"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#To filter out the dataset for pascalnodes and pascalnodes-medium partition\n",
"def get_jobs_partition(partition):\n",
" w = df1.loc[df1['Partition'] ==partition]\n",
" #print (df1.loc[df1['AveRSS']])\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Calling get_jobs_partition for pascalnodes partition\n",
"df_pascalnodes_jobs=get_jobs_partition('pascalnodes')\n",
"\n",
"#Calling get_jobs_partition for pascalnodes-medium partition\n",
"df_pascalnodes_medium_jobs=get_jobs_partition('pascalnodes-medium')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"count_jobs_week = df_pascalnodes_jobs.groupby(\"submit_week_number\")[\"JobID\"].count()\n",
"print(count_jobs_week)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Box-plot and sctter plot Pascalnode partition ----- weekly\n",
"f, axes = plt.subplots(1,2, sharey=True, figsize=(16, 6))\n",
"#lineplot within the box-plot \n",
"box_p = sns.boxplot(x=\"submit_week_number\", y=\"Waiting\", data=df_pascalnodes_jobs, ax=axes[0])\n",
"\n",
"#scatter plor\n",
"sns.scatterplot(x=\"submit_week_number\", y=\"Waiting\", hue=\"State\", data=df_pascalnodes_jobs, ax=axes[1], edgecolor=\".0\", linewidth=.0, alpha=.5)\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"\n",
"axes[0].set_title('Box plot Pascalnode partition')\n",
"axes[0].set_ylabel('Wait time in hours')\n",
"axes[0].set_xlabel('week number')\n",
"axes[1].set_title('Scatter plot pascalnode partition')\n",
"axes[1].set_ylabel('Wait time in hours')\n",
"axes[1].set_xlabel('week number')\n",
"box_p.set_xticklabels(box_p.get_xticklabels())\n",
"box_p.set(yscale=\"log\")\n",
"\n",
"ax2 = box_p.twinx()\n",
"#sns.countplot(x=\"submit_week_number\",data=df_pascalnodes_jobs, ax=ax2)\n",
"sns.lineplot(y=count_jobs_week.values,x=count_jobs_week.index,ax=ax2)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sns.jointplot(x=\"submit_week_number\",y=\"Waiting\",data=df1.loc[df1['Partition'] == \"pascalnodes\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Box-plot and sctter plot Pascalnode partition ----- monthly\n",
"f, axes = plt.subplots(1,2, sharey=True, figsize=(16, 6))\n",
"box_p = sns.boxplot(x=\"submit_month\", y=\"Waiting\", data=df1.loc[df1['Partition'] == \"pascalnodes\"], ax=axes[0])\n",
"\n",
"sns.scatterplot(x=\"submit_month\", y=\"Waiting\", hue=\"State\", data=df1.loc[df1['Partition'] == \"pascalnodes\"], ax=axes[1], edgecolor=\".0\", linewidth=.0, alpha=.5)\n",
"axes[0].set_title('Box plot Pascalnode partition')\n",
"axes[0].set_ylabel('Wait time in hours')\n",
"axes[0].set_xlabel('month number')\n",
"axes[1].set_title('Scatter plot pascalnode partition')\n",
"axes[1].set_ylabel('Wait time in hours')\n",
"axes[1].set_xlabel('month number')\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"box_p.set_xticklabels(box_p.get_xticklabels())\n",
"box_p.set(yscale=\"log\")\n",
"\n",
"ax2 = box_p.twinx()\n",
"sns.countplot(x=\"submit_month\",data=df1.loc[df1['Partition'] == \"pascalnodes\"], ax=ax2)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Box-plot and sctter plot Pascalnode-medium partition ----- Weekly\n",
"f, axes = plt.subplots(1,2, sharey=True, figsize=(16, 6))\n",
"box_p = sns.boxplot(x=\"submit_week_number\", y=\"Waiting\", data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=axes[0])\n",
"sns.scatterplot(x=\"submit_week_number\", y=\"Waiting\", hue=\"State\", data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=axes[1], edgecolor=\".0\", linewidth=.0, alpha=.5)\n",
"axes[0].set_title('Box plot Pascalnodes-medium partition')\n",
"axes[0].set_ylabel('Wait time in hours')\n",
"axes[0].set_xlabel('week number')\n",
"axes[1].set_title('Scatter plot pascalnode-medium partition')\n",
"axes[1].set_ylabel('Wait time in hours')\n",
"axes[1].set_xlabel('week number')\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"box_p.set_xticklabels(box_p.get_xticklabels())\n",
"box_p.set(yscale=\"log\")\n",
"ax2 = box_p.twinx()\n",
"sns.lineplot(x=\"submit_week_number\", y=,data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=ax2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Box-plot and sctter plot Pascalnode-medium partition ----- monthly\n",
"f, axes = plt.subplots(1,2, sharey=True, figsize=(16, 6))\n",
"box_p = sns.boxplot(x=\"submit_month\", y=\"Waiting\", data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=axes[0])\n",
"sns.scatterplot(x=\"submit_month\", y=\"Waiting\", hue=\"State\", data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=axes[1], edgecolor=\".0\", linewidth=.0, alpha=.5)\n",
"axes[0].set_title('Box plot Pascalnodes-medium partition')\n",
"axes[0].set_ylabel('Wait time (hrs)')\n",
"axes[0].set_xlabel('month number')\n",
"axes[1].set_title('Scatter plot pascalnode-medium partition')\n",
"axes[1].set_ylabel('Wait time (hrs)')\n",
"axes[1].set_xlabel('month number')\n",
"plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n",
"box_p.set_xticklabels(box_p.get_xticklabels())\n",
"box_p.set(yscale=\"log\")\n",
"ax2 = box_p.twinx()\n",
"sns.countplot(x=\"submit_month\",data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"], ax=ax2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#waiting time vs the elapsed time for the pascalnodes\n",
"\n",
"fig= plt.figure(figsize=(10,4))\n",
"fig=sns.scatterplot(x=\"Elapsed\",y='Waiting',hue='State',data=df1.loc[df1['Partition'] == \"pascalnodes\"],linewidth=0)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)\n",
"fig.set_xlabel('Elapsed time in hours')\n",
"fig.set_ylabel('Wait time in hours')\n",
"fig.set_title('Waiting time vs Elapsed time for Pascalnode jobs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig= plt.figure(figsize=(10,4))\n",
"fig=sns.scatterplot(x=\"Elapsed\",y='Waiting',hue='State',data=df1.loc[df1['Partition'] == \"pascalnodes-medium\"],linewidth=0)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)\n",
"fig.set_xlabel('Elapsed time in hours')\n",
"fig.set_ylabel('Wait time in hours')\n",
"fig.set_title('Waiting time vs Elapsed time for Pascalnode-medium jobs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_jobs_week(partition,week):\n",
" if(partition==\"pascalnodes\"):\n",
" w = df1.loc[df1['submit_week_number'] == week ]\n",
" return w\n",
" elif(partition == \"pascalnodes-medium\"):\n",
" w = df1.loc[df1['submit_week_number'] == week ]\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"week=11\n",
"weekly_pascalnode=get_jobs_week('pascalnodes',week)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weekly_pascalnode.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(weekly_pascalnode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Scatter-plot for week 11 - pascalnodes partition\n",
"\n",
"fig= plt.figure(figsize=(10,5))\n",
"fig=sns.scatterplot(x=\"Elapsed\",y='Waiting',hue='State',data=weekly_pascalnode,linewidth=0)\n",
"plt.legend(bbox_to_anchor=(1.05,1),loc=2,borderaxespad=0.)\n",
"fig.set_title('Waiting time vs Elapsed time for Pascalnode jobs for week %i'%week)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1['AveRSS'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1['MaxRSS'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Calculating total RAM used \n",
"df1['RAM_Used']=df1['AveRSS']*df1['AllocCPUS']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1['RAM_Used'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t=df1[['State','AveRSS','RAM_Used','MaxRSS']]\n",
"print(t.groupby(['State']).sum().reset_index())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t=df1[['Partition','AveRSS','RAM_Used','MaxRSS']]\n",
"print(t.groupby(['Partition']).sum().reset_index())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_jobs(state):\n",
" w = df1.loc[df1['State'] ==state ]\n",
" return w"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Completed = get_jobs(\"COMPLETED\")#filtering completed jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Completed['AveRSS'].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t=df_Completed[['State','AveRSS','RAM_Used','MaxRSS']]\n",
"print(t.groupby(['State']).sum().reset_index())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t=df_pascalnodes_jobs[['State','AveRSS','RAM_Used','MaxRSS']]\n",
"print(t.groupby(['State']).sum().reset_index()) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_pascalnodes_jobs['AveRSS'].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df1_pascalnodes_jobs=df_pascalnodes_jobs[['State','Elapsed','Waiting','AveRSS','RAM_Used']]\n",
"print(df1_pascalnodes_jobs.groupby(['State']).sum().reset_index())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_pascalnodes_jobs['AveRSS'].tail()"
]
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment