Commit 50b8c3e5 authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

formatting docstrings

parent 43a317bd
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
import sqlite3 import sqlite3
import slurm2sql import slurm2sql
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
%matplotlib inline %matplotlib inline
import seaborn as sns import seaborn as sns
import plotly.express as px import plotly.express as px
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates database of info from March 2020 using sqlite 3 # creates database of info from March 2020 using sqlite 3
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3') db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates database of allocation info from March 2020 using sqlite 3 # creates database of allocation info from March 2020 using sqlite 3
# not using this right now, but is here as an option # not using this right now, but is here as an option
#db_allocation = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3') #db_allocation = sqlite3.connect('/data/rc/rc-team/slurm-since-March-allocation.sqlite3')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
upperRAMlimit = 50e+10 # 5 gigs upperRAMlimit = 50e+10 # 5 gigs
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# df_1 is starting database # df_1 is starting database
df_1 = pd.read_sql('SELECT * FROM slurm', db) df_1 = pd.read_sql('SELECT * FROM slurm', db)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# for displaying all available column options # for displaying all available column options
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
df_1.head(5) df_1.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# df_2 is database with only ReqMemCpu and ReqMemNode, and ArrayTaskID # df_2 is database with only ReqMemCpu and ReqMemNode, and ArrayTaskID
df_2 = df_1.loc[:,['JobName','ReqMemCPU', 'ReqMemNode', 'ArrayJobID','ArrayTaskID']] df_2 = df_1.loc[:,['JobName','ReqMemCPU', 'ReqMemNode', 'ArrayJobID','ArrayTaskID']]
df_2.head(5) df_2.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# df_batch is df_2 with only batch jobs # df_batch is df_2 with only batch jobs
df_batch = df_1.JobName.str.contains('batch') df_batch = df_1.JobName.str.contains('batch')
df_2[df_batch] df_2[df_batch]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates database from df_batch of ReqMemCPU batch jobs that are < or = a given point # creates database from df_batch of ReqMemCPU batch jobs that are < or = a given point
CPU_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemCPU <= upperRAMlimit)] CPU_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemCPU <= upperRAMlimit)]
#CPU_cutoff #CPU_cutoff
Node_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemNode <= upperRAMlimit)] Node_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemNode <= upperRAMlimit)]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# gives mean, min, max, std, and 3 percentiles for cutoff data # gives mean, min, max, std, and 3 percentiles for cutoff data
# can change what to include or exclude # can change what to include or exclude
CPU_cutoff.describe(include=None, exclude=None) CPU_cutoff.describe(include=None, exclude=None)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# gives mean, min, max, std, and 3 percentiles for cutoff data # gives mean, min, max, std, and 3 percentiles for cutoff data
# can change what to include or exclude # can change what to include or exclude
Node_cutoff.describe(include=None, exclude=None) Node_cutoff.describe(include=None, exclude=None)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates databases of requested cpu and node that have an array task id using the upper RAM limit cutoff # creates databases of requested cpu and node that have an array task id using the upper RAM limit cutoff
CPU_arraytask = CPU_cutoff.dropna(subset=['ArrayTaskID']) CPU_arraytask = CPU_cutoff.dropna(subset=['ArrayTaskID'])
Node_arraytask = Node_cutoff.dropna(subset=['ArrayTaskID']) Node_arraytask = Node_cutoff.dropna(subset=['ArrayTaskID'])
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates databases of requested cpu and node that do not have an array task id using the upper RAM limit cutoff # creates databases of requested cpu and node that do not have an array task id using the upper RAM limit cutoff
CPU_nonarraytask = CPU_cutoff[CPU_cutoff['ArrayTaskID'].isnull()] CPU_nonarraytask = CPU_cutoff[CPU_cutoff['ArrayTaskID'].isnull()]
Node_nonarraytask = Node_cutoff[Node_cutoff['ArrayTaskID'].isnull()] Node_nonarraytask = Node_cutoff[Node_cutoff['ArrayTaskID'].isnull()]
#CPU_nonarraytask.head(5) #CPU_nonarraytask.head(5)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Graphs: User Request of CPU Memory for all Jobs Graphs: User Request of CPU Memory for all Jobs
User Request of Node Memory for all Jobs User Request of Node Memory for all Jobs
User Request of CPU and Node User Request of CPU and Node
User Request of CPU Memory for Array Job vs Not Array Job User Request of CPU Memory for Array Job vs Not Array Job
User Request of Node Memory for Array Job vs Not Array Job User Request of Node Memory for Array Job vs Not Array Job
These graphs create histograms using the data for the month of March 2020 These graphs create histograms using the data for the month of March 2020
They use cutoff cpu and node memory declared in CPU_cutoff and Node_cutoff - 5 gigs. They use cutoff cpu and node memory declared in CPU_cutoff and Node_cutoff - 5 gigs.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# shows all user requested cpu memory for array and non array jobs # shows all user requested cpu memory for array and non array jobs
CPU_fig = sns.distplot(CPU_cutoff['ReqMemCPU'], kde=False, label='All CPU', color = "green") CPU_fig = sns.distplot(CPU_cutoff['ReqMemCPU'], kde=False, label='All CPU', color = "green")
CPU_fig.set_yscale('log') CPU_fig.set_yscale('log')
plt.legend(prop={'size': 12}) plt.legend(prop={'size': 12})
plt.title('User Request of CPU Memory for all Jobs') plt.title('User Request of CPU Memory for all Jobs')
plt.xlabel('Requested CPU Gigs') plt.xlabel('Requested CPU Gigs')
plt.ylabel('Amount of Users Requesting') plt.ylabel('Amount of Users Requesting')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# shows all user requested node memory for array and non array jobs # shows all user requested node memory for array and non array jobs
Node_fig = sns.distplot(Node_cutoff['ReqMemNode'], kde=False, label='All Node') Node_fig = sns.distplot(Node_cutoff['ReqMemNode'], kde=False, label='All Node')
Node_fig.set_yscale('log') Node_fig.set_yscale('log')
plt.legend(prop={'size': 12}) plt.legend(prop={'size': 12})
plt.title('User Request of Node Memory for all Jobs') plt.title('User Request of Node Memory for all Jobs')
plt.xlabel('Requested Node Gigs') plt.xlabel('Requested Node Gigs')
plt.ylabel('Amount of Users Requesting') plt.ylabel('Amount of Users Requesting')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#shows requested cpu and node for all job types (array and non array jobs) side by side for easy comparison. #shows requested cpu and node for all job types (array and non array jobs) side by side for easy comparison.
CPU_fig = sns.distplot(CPU_cutoff['ReqMemCPU'], kde=False, label='All CPU', color = "green") CPU_fig = sns.distplot(CPU_cutoff['ReqMemCPU'], kde=False, label='All CPU', color = "green")
CPU_fig.set_yscale('log') CPU_fig.set_yscale('log')
Node_fig = sns.distplot(Node_cutoff['ReqMemNode'], kde=False, label='All Node') #color = 'darkblue') Node_fig = sns.distplot(Node_cutoff['ReqMemNode'], kde=False, label='All Node') #color = 'darkblue')
Node_fig.set_yscale('log') Node_fig.set_yscale('log')
plt.legend(prop={'size': 12}) plt.legend(prop={'size': 12})
plt.title('User Request of CPU and Node') plt.title('User Request of CPU and Node')
plt.xlabel('Requested Gigs') plt.xlabel('Requested Gigs')
plt.ylabel('Amount of Users Requesting') plt.ylabel('Amount of Users Requesting')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#shows requested cpu memory for array jobs alongside requested cpu memory for non array jobs for easy comparison. #shows requested cpu memory for array jobs alongside requested cpu memory for non array jobs for easy comparison.
CPU_arraytask_fig = sns.distplot(CPU_arraytask['ReqMemCPU'], kde=False, label='CPU Array Task', color = "green") CPU_arraytask_fig = sns.distplot(CPU_arraytask['ReqMemCPU'], kde=False, label='CPU Array Task', color = "green")
CPU_arraytask_fig.set_yscale('log') CPU_arraytask_fig.set_yscale('log')
CPU_nonarraytask_fig = sns.distplot(CPU_nonarraytask['ReqMemCPU'], kde=False, label='CPU Non Array Task') CPU_nonarraytask_fig = sns.distplot(CPU_nonarraytask['ReqMemCPU'], kde=False, label='CPU Non Array Task')
CPU_nonarraytask_fig.set_yscale('log') CPU_nonarraytask_fig.set_yscale('log')
plt.legend(prop={'size': 12}) plt.legend(prop={'size': 12})
plt.title('User Request of CPU Memory for Array Job vs Not Array Job') plt.title('User Request of CPU Memory for Array Job vs Not Array Job')
plt.xlabel('Requested Gigs') plt.xlabel('Requested Gigs')
plt.ylabel('Amount of Users Requesting') plt.ylabel('Amount of Users Requesting')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#shows requested node memory for array jobs alongside requested node memory for non array jobs for easy comparison. #shows requested node memory for array jobs alongside requested node memory for non array jobs for easy comparison.
Node_arraytask_fig = sns.distplot(Node_arraytask['ReqMemCPU'], kde=False, label='Node Array Task', color = "green") Node_arraytask_fig = sns.distplot(Node_arraytask['ReqMemCPU'], kde=False, label='Node Array Task', color = "green")
Node_arraytask_fig.set_yscale('log') Node_arraytask_fig.set_yscale('log')
Node_nonarraytask_fig = sns.distplot(Node_nonarraytask['ReqMemNode'], kde=False, label='Node Non Array Task') Node_nonarraytask_fig = sns.distplot(Node_nonarraytask['ReqMemNode'], kde=False, label='Node Non Array Task')
Node_nonarraytask_fig.set_yscale('log') Node_nonarraytask_fig.set_yscale('log')
plt.legend(prop={'size': 12}) plt.legend(prop={'size': 12})
plt.title('User Request of Node Memory for Array Job vs Not Array Job') plt.title('User Request of Node Memory for Array Job vs Not Array Job')
plt.xlabel('Requested Gigs') plt.xlabel('Requested Gigs')
plt.ylabel('Amount of Users Requesting') plt.ylabel('Amount of Users Requesting')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# # These are Plotly Express Graphs of the some of the above Seaborn graphs. Run them only if you need more details about the data in the graph. They will make your notebook run slower. # These are Plotly Express Graphs of the some of the above Seaborn graphs. Run them only if you need more details about the data in the graph. They will make your notebook run slower.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Graphs: User Request of CPU Memory for all Jobs Graphs: > User Request of CPU Memory for all Jobs
User Request of CPU Memory for Array Job > User Request of CPU Memory for Array Job
User Request of CPU Memory for Non Array Job > User Request of CPU Memory for Non Array Job
User Request of Node Memory for all Jobs > User Request of Node Memory for all Jobs
User Request of Node Memory for Array Job > User Request of Node Memory for Array Job
User Request of Node Memory for Non Array Job > User Request of Node Memory for Non Array Job
These graphs create histograms using the data for the month of March 2020 These graphs create histograms using the data for the month of March 2020
They use cutoff cpu and node memory declared in CPU_cutoff and Node_cutoff - 5 gigs. They use cutoff cpu and node memory declared in CPU_cutoff and Node_cutoff - 5 gigs.
Can also show box or violin graph above to show where min, max, median, and 3rd quartile is. Can also show box or violin graph above to show where min, max, median, and 3rd quartile is.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
CPU_fig = px.histogram(CPU_cutoff, x="ReqMemCPU", CPU_fig = px.histogram(CPU_cutoff, x="ReqMemCPU",
title='User Request of CPU Memory for all Jobs', title='User Request of CPU Memory for all Jobs',
labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=CPU_cutoff.columns, hover_data=CPU_cutoff.columns,
nbins=30, nbins=30,
color_discrete_sequence=['goldenrod'] # color of histogram bars color_discrete_sequence=['goldenrod'] # color of histogram bars
) )
CPU_fig.show() CPU_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
CPU_nonarraytask_fig = px.histogram(CPU_nonarraytask, x="ReqMemCPU", CPU_nonarraytask_fig = px.histogram(CPU_nonarraytask, x="ReqMemCPU",
title='User Request of CPU Memory for Non Array Job', title='User Request of CPU Memory for Non Array Job',
labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=CPU_nonarraytask.columns, hover_data=CPU_nonarraytask.columns,
nbins=30, nbins=30,
color_discrete_sequence=['goldenrod'] # color of histogram bars color_discrete_sequence=['goldenrod'] # color of histogram bars
) )
CPU_nonarraytask_fig.show() CPU_nonarraytask_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
CPU_arraytask_fig = px.histogram(CPU_arraytask, x="ReqMemCPU", CPU_arraytask_fig = px.histogram(CPU_arraytask, x="ReqMemCPU",
title='User Request of CPU Memory for Array Job', title='User Request of CPU Memory for Array Job',
labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=CPU_arraytask.columns, hover_data=CPU_arraytask.columns,
nbins=30, nbins=30,
color_discrete_sequence=['goldenrod'] # color of histogram bars color_discrete_sequence=['goldenrod'] # color of histogram bars
) )
CPU_arraytask_fig.show() CPU_arraytask_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
Node_fig = px.histogram(Node_cutoff, x="ReqMemNode", Node_fig = px.histogram(Node_cutoff, x="ReqMemNode",
title='User Request of Node Memory for all Jobs', title='User Request of Node Memory for all Jobs',
labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=Node_cutoff.columns, hover_data=Node_cutoff.columns,
nbins=30, nbins=30,
color_discrete_sequence=['darkblue'] # color of histogram bars color_discrete_sequence=['darkblue'] # color of histogram bars
) )
Node_fig.show() Node_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
Node_nonarraytask_fig = px.histogram(Node_nonarraytask, x="ReqMemNode", Node_nonarraytask_fig = px.histogram(Node_nonarraytask, x="ReqMemNode",
title='User Request of Node Memory for Non Array Job', title='User Request of Node Memory for Non Array Job',
labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=Node_nonarraytask.columns, hover_data=Node_nonarraytask.columns,
nbins=30, nbins=30,
color_discrete_sequence=['darkblue'] # color of histogram bars color_discrete_sequence=['darkblue'] # color of histogram bars
) )
Node_nonarraytask_fig.show() Node_nonarraytask_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
Node_arraytask_fig = px.histogram(Node_arraytask, x="ReqMemNode", Node_arraytask_fig = px.histogram(Node_arraytask, x="ReqMemNode",
title='User Request of Node Memory for Array Job', title='User Request of Node Memory for Array Job',
labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=Node_arraytask.columns, hover_data=Node_arraytask.columns,
nbins=30, nbins=30,
color_discrete_sequence=['darkblue'] # color of histogram bars color_discrete_sequence=['darkblue'] # color of histogram bars
) )
Node_arraytask_fig.show() Node_arraytask_fig.show()
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment