Commit 5b8ce16e authored by KOMAL BADI's avatar KOMAL BADI
Browse files

Adding the core count plots.

parent 8ac0e2c3
......@@ -62,8 +62,7 @@
"metadata": {},
"outputs": [],
"source": [
"#Enter the JobID for which you want to do throughput analysis \n",
"array_job_id='5976984'"
"array_job_id='6144338'"
]
},
{
......@@ -80,6 +79,7 @@
"import matplotlib.pyplot as plt\n",
"import matplotlib\n",
"import warnings\n",
"import os\n",
"from RC_STYLES import rc_styles as s\n",
"warnings.filterwarnings(\"ignore\")\n",
"import numpy as np"
......@@ -100,7 +100,7 @@
"outputs": [],
"source": [
"#connecting to database\n",
"db = sqlite3.connect('throughput_analysis_array_job.db')"
"db = sqlite3.connect('throughput_analysis_array_job_'+str(array_job_id)+'.db')"
]
},
{
......@@ -131,7 +131,19 @@
"metadata": {},
"outputs": [],
"source": [
"df.head()"
"#Deleting the database\n",
"os.remove('throughput_analysis_array_job_'+str(array_job_id)+'.db')\n",
"os.remove('throughput_analysis_array_job_'+str(array_job_id)+'.db-shm')\n",
"os.remove('throughput_analysis_array_job_'+str(array_job_id)+'.db-wal')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head(20)"
]
},
{
......@@ -149,9 +161,7 @@
"source": [
"\n",
"#Filtering data by removing batch and extern columns from dataframe. \n",
"df= df.loc[df['JobName'] =='R_array_job']\n",
"#Total array tasks\n",
"Total_Jobs_Submitted = 50"
"df= df[df['User']!='']\n"
]
},
{
......@@ -271,6 +281,25 @@
"array_jobs.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Total array tasks\n",
"Total_Jobs_Submitted = array_jobs['ArrayTaskID'].values[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(Total_Jobs_Submitted)"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -493,6 +522,17 @@
"plt.ylabel('Job_Count')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"running_df.plot( figsize=(15, 5), subplots=True, rot=0)\n",
"plt.ylabel('Job_Count')\n",
"plt.xlabel('Time')"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -525,6 +565,17 @@
"plt.ylabel('Job_Count')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"running_df.plot( figsize=(15,8), subplots=True, rot=0)\n",
"plt.ylabel('Job_Count')\n",
"plt.xlabel('Time')"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -613,6 +664,17 @@
"plt.ylabel('Job_Count')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"completed_df.plot( figsize=(15, 5), subplots=True,rot=0)\n",
"plt.ylabel('Job_Count')\n",
"plt.xlabel('Time')"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -629,7 +691,8 @@
"source": [
"#Mandatory\n",
"#Joing all the 2 dataframes to plot throughput analysis\n",
"merged_df=completed_df.join(running_df)"
"#merged_df=completed_df.join(running_df)\n",
"merged_df=running_df.merge(completed_df, left_index=True, right_index=True)"
]
},
{
......@@ -663,6 +726,17 @@
"plt.ylabel('Job_Count')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t.plot( figsize=(10, 10), subplots=True, rot=0)\n",
"plt.ylabel('Job_Count')\n",
"plt.xlabel('Time')"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -739,8 +813,8 @@
"outputs": [],
"source": [
"figure,axes = plt.subplots(1, 2)\n",
"currently_running_plot=q['currently_running'].plot(figsize=(15,5),ax=axes[0],color=\"blue\")\n",
"pending_jobs_plot=q['pending_jobs'].plot(figsize=(15,5),ax=axes[1],color=\"orange\")\n",
"currently_running_plot=q['currently_running'].plot(figsize=(15,5),ax=axes[0],color=\"green\")\n",
"pending_jobs_plot=q['pending_jobs'].plot(figsize=(15,5),ax=axes[1],color=\"blue\")\n",
"currently_running_plot.set_ylabel('Job_Count')\n",
"currently_running_plot.set_title('Currently running jobs')\n",
"currently_running_plot.set_xlabel('Time')\n",
......@@ -748,6 +822,107 @@
"pending_jobs_plot.set_xlabel('Time')\n",
"pending_jobs_plot.set_title('Pending jobs')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"axes = q.plot( figsize=(15, 5), style=['g','b'],subplots=True,rot=0,legend=False)\n",
"#plt.ylabel('Currently running job count')\n",
"\n",
"plt.xlabel('Time')\n",
"axes[0].set_ylabel('Running job count')\n",
"axes[1].set_ylabel('Pending job count')\n",
"plt.title('Throughput Analysis (Job ID - '+str(array_job_id)+')',loc='center',y=2.30)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Number of CPUS \n",
"Number of CPUS used for the given job ID"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"count_jobs_core= array_job_data.groupby([\"NCPUS\"] , as_index=False)[\"ArrayTaskID\"].count()\n",
"df_core_count = count_jobs_core.rename(columns={'ArrayTaskID': 'job count'})\n",
"print(df_core_count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ncpus=df_core_count['NCPUS'].values[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Core count\n",
"\n",
"The number of CPUS used per each array task is given by multiplying the currently running jobs with the number of CPUS assigned for the job."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merged_df['core_count']=ncpus*merged_df['currently_running']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"merged_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"core_count=pd.DataFrame(merged_df[['currently_running','pending_jobs','core_count']])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"axes = core_count.plot( figsize=(20, 10), style=['g','b','r'],subplots=True,rot=0,legend=False)\n",
"plt.xlabel('Time')\n",
"axes[0].set_ylabel('Running job count')\n",
"axes[1].set_ylabel('Pending job count')\n",
"axes[2].set_ylabel('core count')\n",
"plt.title('Throughput Analysis (Job ID - '+str(array_job_id)+')',loc='center',y=3.58)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment