Commit 1a2043a5 authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

updated variable names and added doc strings

parent 114ca4aa
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
import sqlite3 import sqlite3
import slurm2sql import slurm2sql
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
%matplotlib inline %matplotlib inline
import seaborn as sns import seaborn as sns
from scipy.stats import skew from scipy.stats import skew
import plotly.express as px import plotly.express as px
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# creates database of info from March 2020 using sqlite 3
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3') db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
#slurm2sql.slurm2sql(db, ['-S', '2020-04-01', '-a']) # df_1 is starting database
``` df_1 = pd.read_sql('SELECT * FROM slurm', db)
%% Cell type:code id: tags:
```
# For example, you can then convert to a dataframe:
df1 = pd.read_sql('SELECT * FROM slurm', db)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# for displaying all available column options
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
df1.head(5) df_1.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
df2 = df1.loc[:,['ReqMemCPU', 'ReqMemNode']] # df_2 is database with only ReqMemCpu and ReqMemNode
#df2.head(5) df_2 = df_1.loc[:,['ReqMemCPU', 'ReqMemNode']]
#df_2.head(5)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
# df_batch is df_2 with only batch jobs
df_batch = df1.JobName.str.contains('batch') df_batch = df1.JobName.str.contains('batch')
#df2[df_batch] #df_2[df_batch]
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
cutoff = df2[df_batch][(df2[df_batch].ReqMemCPU <= 1e+10)] # creates database from df_batch of ReqMemCPU batch jobs that are < or = a given point
cutoff CPU_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemCPU <= 1e+10)] # 1e+10 is 1 gig
CPU_cutoff
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
cutoff.describe(include=None, exclude=None) # gives mean, min, max, std, and 3 percentiles for cutoff data
# can change what to include or exclude
CPU_cutoff.describe(include=None, exclude=None)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
fig = px.histogram(cutoff, x="ReqMemCPU", # creates histogram of ReqMemCPU for the month of March 2020
# uses cutoff cpu memory declared in df_cutoff - 1 gig
# also can show box or violing graph above to show where min, max, median, and 3rd quartile is
# the mean is at just under half a gig requested memory CPU
CPU_fig = px.histogram(CPU_cutoff, x="ReqMemCPU",
title='Histogram of ReqMemCPU', title='Histogram of ReqMemCPU',
labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=cutoff.columns, hover_data=CPU_cutoff.columns,
nbins=30, nbins=30,
color_discrete_sequence=['indianred'] # color of histogram bars color_discrete_sequence=['goldenrod'] # color of histogram bars
) )
fig.show() CPU_fig.show()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
cutoff[['ReqMemCPU']].plot(kind='hist',bins=50,rwidth=1, logy=True) # creates database from df_batch of ReqMemNode batch jobs that are < or = a given point
plt.show() Node_cutoff = df_2[df_batch][(df_2[df_batch].ReqMemNode <= 1e+10)] # 1e+10 is 1 gig
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
x = cutoff[['ReqMemCPU']] # creates histogram of ReqMemNode for the month of March 2020
print (skew(x)) # uses cutoff node memory declared in Node_cutoff - 1 gig
``` # also can show box or violing graph above to show where min, max, median, and 3rd quartile is
# the mean is at just under half a gig requested memory Node
%% Cell type:code id: tags: Node_fig = px.histogram(Node_cutoff, x="ReqMemNode",
```
cutoff = df2[df_batch][(df2[df_batch].ReqMemNode <= 1e+10)]
```
%% Cell type:code id: tags:
```
fig = px.histogram(cutoff, x="ReqMemNode",
title='Histogram of ReqMemNode', title='Histogram of ReqMemNode',
labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column
opacity=0.8, opacity=0.8,
log_y=True, # represent bars with log scale log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin` marginal="box", # can be `box`, `violin`
hover_data=cutoff.columns, hover_data=Node_cutoff.columns,
nbins=30, nbins=30,
color_discrete_sequence=['indianred'] # color of histogram bars color_discrete_sequence=['darkblue'] # color of histogram bars
) )
fig.show() Node_fig.show()
```
%% Cell type:code id: tags:
```
cutoff[['ReqMemNode']].plot(kind='hist',bins=50,rwidth=1, logy=True)
plt.show()
```
%% Cell type:code id: tags:
```
x = cutoff[['ReqMemNode']]
print (skew(x))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` ```
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment