Commit 64bdd3d2 authored by Ryan Randles Jones's avatar Ryan Randles Jones
Browse files

outputs

parent 04ed1058
%% Cell type:code id: tags:
``` python
import sqlite3
import slurm2sql
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.stats import skew
import plotly.express as px
```
%% Cell type:code id: tags:
``` python
db = sqlite3.connect('/data/rc/rc-team/slurm-since-March.sqlite3')
```
%% Cell type:code id: tags:
``` python
#slurm2sql.slurm2sql(db, ['-S', '2020-04-01', '-a'])
```
%% Cell type:code id: tags:
``` python
# For example, you can then convert to a dataframe:
df1 = pd.read_sql('SELECT * FROM slurm', db)
```
%% Cell type:code id: tags:
``` python
pd.set_option('display.max_columns', None)
df1.head(5)
```
%%%% Output: execute_result
JobID ArrayJobID ArrayTaskID JobStep JobIDSlurm \
0 3305723 3305723 49.0 None 3305723_49
1 3305723 3305723 49.0 batch 3305723_49.batch
2 3305723 3305723 49.0 extern 3305723_49.extern
3 3307288 3307288 NaN None 3307288_[0-9%10]
4 3319116 3319116 NaN None 3319116_[43-45,47%5]
JobName User Group Account State Timelimit \
0 1mID1MPa gpekmezi gpekmezi gpekmezi COMPLETED 540000.0
1 batch gpekmezi COMPLETED NaN
2 extern gpekmezi COMPLETED NaN
3 1mCrC100MPa gpekmezi gpekmezi gpekmezi CANCELLED by 2153 540000.0
4 1mUD1MPa gpekmezi gpekmezi gpekmezi PENDING 540000.0
Elapsed Time Submit Start End Partition \
0 518725.0 1.583287e+09 1.580242e+09 1.582768e+09 1.583287e+09 long
1 518725.0 1.583287e+09 1.582768e+09 1.582768e+09 1.583287e+09
2 518725.0 1.583287e+09 1.582768e+09 1.582768e+09 1.583287e+09
3 0.0 1.583509e+09 1.580252e+09 1.583509e+09 1.583509e+09 long
4 0.0 1.580486e+09 1.580486e+09 NaN NaN long
ExitCodeRaw ExitCode ExitSignal NodeList Priority ConsumedEnergy \
0 0:0 0.0 0.0 c0024 5476.0 0.0
1 0:0 0.0 0.0 c0024 NaN 0.0
2 0:0 0.0 0.0 c0024 NaN 0.0
3 0:0 0.0 0.0 None assigned 0.0 0.0
4 0:0 0.0 0.0 None assigned 5011.0 0.0
ReqNodes NNodes AllocNodes ReqGRES NTasks NCPUS ReqCPUS AllocCPUS \
0 0 1 1 NaN 6 6 6
1 0 1 1 1.0 6 6 6
2 0 1 1 1.0 6 6 6
3 0 1 0 NaN 6 6 6
4 0 1 0 NaN 6 6 6
CPUTime TotalCPU UserCPU SystemCPU CPUEff MinCPU MinCPUNode \
0 3112350.0 648258.0 646015.0 2243.873 0.208286 NaN
1 3112350.0 648258.0 646015.0 2243.872 0.208286 648234.0 c0024
2 3112350.0 0.0 0.0 0.000 0.000000 0.0 c0024
3 0.0 0.0 0.0 0.000 NaN NaN
4 0.0 0.0 0.0 0.000 NaN NaN
MinCPUTask ReqMem ReqMemType ReqMemNode ReqMemCPU AveRSS \
0 8388608Kc c 5.153961e+10 8.589935e+09 NaN
1 0 8388608Kc c 5.153961e+10 8.589935e+09 1.084926e+10
2 0 8388608Kc c 5.153961e+10 8.589935e+09 1.650688e+06
3 8388608Kc c 5.153961e+10 8.589935e+09 NaN
4 8388608Kc c 5.153961e+10 8.589935e+09 NaN
MaxRSS MaxRSSNode MaxRSSTask MaxPages MaxVMSize MemEff \
0 NaN NaN NaN NaN
1 1.084926e+10 c0024 0 0.0 1.209698e+10 0.210503
2 1.650688e+06 c0024 0 0.0 1.974436e+08 0.000032
3 NaN NaN NaN NaN
4 NaN NaN NaN NaN
AveDiskRead AveDiskWrite MaxDiskRead MaxDiskReadNode MaxDiskReadTask \
0 NaN NaN NaN
1 494427136.0 2.083804e+09 494427136.0 c0024 0
2 0.0 0.000000e+00 0.0 c0024 0
3 NaN NaN NaN
4 NaN NaN NaN
MaxDiskWrite MaxDiskWriteNode MaxDiskWriteTask ReqGPUS Comment GPUMem \
0 NaN NaN None None
1 2.083804e+09 c0024 0 NaN None None
2 0.000000e+00 c0024 0 NaN None None
3 NaN NaN None None
4 NaN NaN None None
GPUEff NGPU
0 None None
1 None None
2 None None
3 None None
4 None None
%% Cell type:code id: tags:
``` python
df2 = df1.loc[:,['ReqMemCPU', 'ReqMemNode']]
#df2.head(5)
```
%% Cell type:code id: tags:
``` python
df_batch = df1.JobName.str.contains('batch')
#df2[df_batch]
```
%% Cell type:code id: tags:
``` python
cutoff = df2[df_batch][(df2[df_batch].ReqMemCPU <= 1e+10)]
cutoff
```
%% Cell type:code id: tags:
``` python
cutoff.describe(include=None, exclude=None)
```
%% Cell type:code id: tags:
``` python
fig = px.histogram(cutoff, x="ReqMemCPU",
title='Histogram of ReqMemCPU',
labels={'ReqMemCPU':'ReqMemCPU'}, # can specify one label per df column
opacity=0.8,
log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin`
hover_data=cutoff.columns,
nbins=30,
color_discrete_sequence=['indianred'] # color of histogram bars
)
fig.show()
```
%% Cell type:code id: tags:
``` python
cutoff[['ReqMemCPU']].plot(kind='hist',bins=50,rwidth=1, logy=True)
plt.show()
```
%% Cell type:code id: tags:
``` python
x = cutoff[['ReqMemCPU']]
print (skew(x))
```
%% Cell type:code id: tags:
``` python
cutoff = df2[df_batch][(df2[df_batch].ReqMemNode <= 1e+10)]
```
%% Cell type:code id: tags:
``` python
fig = px.histogram(cutoff, x="ReqMemNode",
title='Histogram of ReqMemNode',
labels={'ReqMemNode':'ReqMemNode'}, # can specify one label per df column
opacity=0.8,
log_y=True, # represent bars with log scale
marginal="box", # can be `box`, `violin`
hover_data=cutoff.columns,
nbins=30,
color_discrete_sequence=['indianred'] # color of histogram bars
)
fig.show()
```
%% Cell type:code id: tags:
``` python
cutoff[['ReqMemNode']].plot(kind='hist',bins=50,rwidth=1, logy=True)
plt.show()
```
%% Cell type:code id: tags:
``` python
x = cutoff[['ReqMemNode']]
print (skew(x))
```
%% Cell type:code id: tags:
``` python
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment