Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Chirag Chandrahas Shetty
createAndParseSACCT
Commits
3037c804
Commit
3037c804
authored
Apr 16, 2020
by
Chirag Chandrahas Shetty
Browse files
removing the output and metadata from notebook while doing git diff
parent
ea026f5f
Changes
2
Hide whitespace changes
Inline
Side-by-side
importSACCTinfo.ipynb
View file @
3037c804
...
...
@@ -2,13 +2,8 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-03-16T20:57:10.405006Z",
"start_time": "2020-03-16T20:56:55.837670Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
...
...
@@ -18,13 +13,8 @@
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-03-16T20:57:11.865980Z",
"start_time": "2020-03-16T20:57:10.414986Z"
}
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('neurobiologyusage.txt',delimiter='|')"
...
...
@@ -32,175 +22,18 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-03-16T20:57:11.932878Z",
"start_time": "2020-03-16T20:57:11.905219Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>User</th>\n",
" <th>Start</th>\n",
" <th>JobID</th>\n",
" <th>JobName</th>\n",
" <th>State</th>\n",
" <th>Partition</th>\n",
" <th>MaxRSS</th>\n",
" <th>ReqMem</th>\n",
" <th>ReqCPUS</th>\n",
" <th>NodeList</th>\n",
" <th>NNodes</th>\n",
" <th>Elapsed</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>mdefende</td>\n",
" <td>2019-01-06T22:00:21</td>\n",
" <td>2040834</td>\n",
" <td>_interactive</td>\n",
" <td>COMPLETED</td>\n",
" <td>medium</td>\n",
" <td>NaN</td>\n",
" <td>10000Mc</td>\n",
" <td>1</td>\n",
" <td>c0088</td>\n",
" <td>1</td>\n",
" <td>16:04:23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" <td>2019-01-06T22:00:21</td>\n",
" <td>2040834.batch</td>\n",
" <td>batch</td>\n",
" <td>COMPLETED</td>\n",
" <td>NaN</td>\n",
" <td>1394528K</td>\n",
" <td>10000Mc</td>\n",
" <td>1</td>\n",
" <td>c0088</td>\n",
" <td>1</td>\n",
" <td>16:04:23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>lianov</td>\n",
" <td>2019-01-07T16:15:21</td>\n",
" <td>2043373</td>\n",
" <td>Pipe_trim_galore</td>\n",
" <td>COMPLETED</td>\n",
" <td>medium</td>\n",
" <td>NaN</td>\n",
" <td>2000Mc</td>\n",
" <td>1</td>\n",
" <td>c0038</td>\n",
" <td>1</td>\n",
" <td>00:18:41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>2019-01-07T16:15:21</td>\n",
" <td>2043373.batch</td>\n",
" <td>batch</td>\n",
" <td>COMPLETED</td>\n",
" <td>NaN</td>\n",
" <td>58592K</td>\n",
" <td>2000Mc</td>\n",
" <td>1</td>\n",
" <td>c0038</td>\n",
" <td>1</td>\n",
" <td>00:18:41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>lianov</td>\n",
" <td>2019-01-07T16:15:21</td>\n",
" <td>2043374</td>\n",
" <td>Pipe_trim_galore</td>\n",
" <td>COMPLETED</td>\n",
" <td>medium</td>\n",
" <td>NaN</td>\n",
" <td>2000Mc</td>\n",
" <td>1</td>\n",
" <td>c0063</td>\n",
" <td>1</td>\n",
" <td>00:15:48</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" User Start JobID JobName State \\\n",
"0 mdefende 2019-01-06T22:00:21 2040834 _interactive COMPLETED \n",
"1 NaN 2019-01-06T22:00:21 2040834.batch batch COMPLETED \n",
"2 lianov 2019-01-07T16:15:21 2043373 Pipe_trim_galore COMPLETED \n",
"3 NaN 2019-01-07T16:15:21 2043373.batch batch COMPLETED \n",
"4 lianov 2019-01-07T16:15:21 2043374 Pipe_trim_galore COMPLETED \n",
"\n",
" Partition MaxRSS ReqMem ReqCPUS NodeList NNodes Elapsed \n",
"0 medium NaN 10000Mc 1 c0088 1 16:04:23 \n",
"1 NaN 1394528K 10000Mc 1 c0088 1 16:04:23 \n",
"2 medium NaN 2000Mc 1 c0038 1 00:18:41 \n",
"3 NaN 58592K 2000Mc 1 c0038 1 00:18:41 \n",
"4 medium NaN 2000Mc 1 c0063 1 00:15:48 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-03-16T20:57:14.154962Z",
"start_time": "2020-03-16T20:57:11.967976Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array(['medium', nan, 'medium', ..., 'medium', nan, nan], dtype=object)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df[['jid','step']] = df.JobID.str.split(\".\",expand=True) \n",
"df.Partition.values"
...
...
@@ -209,34 +42,8 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2020-03-16T20:56:57.392Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" after removing the cwd from sys.path.\n",
"/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/pandas/core/generic.py:7626: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" self._update_inplace(new_data)\n",
"/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2961: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
}
],
"metadata": {},
"outputs": [],
"source": [
"batchDF=df.dropna(subset=[\"MaxRSS\"])\n",
"userDF=df.dropna(subset=[\"User\"])\n",
...
...
@@ -257,64 +64,9 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:wsmplayground]",
"language": "python",
"name": "conda-env-wsmplayground-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
...
...
%% Cell type:code id: tags:
```
python
```
import numpy as np
import pandas as pd
import pandas_profiling
```
%% Cell type:code id: tags:
```
python
```
df = pd.read_csv('neurobiologyusage.txt',delimiter='|')
```
%% Cell type:code id: tags:
```
python
```
df.head()
```
%% Output
User Start JobID JobName State \
0 mdefende 2019-01-06T22:00:21 2040834 _interactive COMPLETED
1 NaN 2019-01-06T22:00:21 2040834.batch batch COMPLETED
2 lianov 2019-01-07T16:15:21 2043373 Pipe_trim_galore COMPLETED
3 NaN 2019-01-07T16:15:21 2043373.batch batch COMPLETED
4 lianov 2019-01-07T16:15:21 2043374 Pipe_trim_galore COMPLETED
Partition MaxRSS ReqMem ReqCPUS NodeList NNodes Elapsed
0 medium NaN 10000Mc 1 c0088 1 16:04:23
1 NaN 1394528K 10000Mc 1 c0088 1 16:04:23
2 medium NaN 2000Mc 1 c0038 1 00:18:41
3 NaN 58592K 2000Mc 1 c0038 1 00:18:41
4 medium NaN 2000Mc 1 c0063 1 00:15:48
%% Cell type:code id: tags:
```
python
```
df[['jid','step']] = df.JobID.str.split(".",expand=True)
df.Partition.values
```
%% Output
array(['medium', nan, 'medium', ..., 'medium', nan, nan], dtype=object)
%% Cell type:code id: tags:
```
python
```
batchDF=df.dropna(subset=["MaxRSS"])
userDF=df.dropna(subset=["User"])
for jid in df.jid.unique():
userDF['MaxRSS'][userDF['jid'] == jid]=batchDF['MaxRSS'][batchDF['jid'] == jid]
#print(userDF[userDF['jid'] == jid])
userDF.head()
```
%% Output
/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
after removing the cwd from sys.path.
/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/pandas/core/generic.py:7626: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
self._update_inplace(new_data)
/home/wsmonroe/.conda/envs/wsmplayground/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2961: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
exec(code_obj, self.user_global_ns, self.user_ns)
%% Cell type:code id: tags:
```
python
```
```
...
...
slurm-2sql.ipynb
View file @
3037c804
...
...
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count":
8
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -13,20 +13,9 @@
},
{
"cell_type": "code",
"execution_count":
6
,
"execution_count":
null
,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"db = sqlite3.connect('test.db')\n",
"slurm2sql.slurm2sql(db, ['-S', '2020-03-18', '-a'])"
...
...
@@ -34,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count":
9
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -44,216 +33,9 @@
},
{
"cell_type": "code",
"execution_count":
11
,
"execution_count":
null
,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>JobID</th>\n",
" <th>ArrayJobID</th>\n",
" <th>ArrayTaskID</th>\n",
" <th>JobStep</th>\n",
" <th>JobIDSlurm</th>\n",
" <th>JobName</th>\n",
" <th>User</th>\n",
" <th>Group</th>\n",
" <th>Account</th>\n",
" <th>State</th>\n",
" <th>...</th>\n",
" <th>MaxDiskReadNode</th>\n",
" <th>MaxDiskReadTask</th>\n",
" <th>MaxDiskWrite</th>\n",
" <th>MaxDiskWriteNode</th>\n",
" <th>MaxDiskWriteTask</th>\n",
" <th>ReqGPUS</th>\n",
" <th>Comment</th>\n",
" <th>GPUMem</th>\n",
" <th>GPUEff</th>\n",
" <th>NGPU</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3319116</td>\n",
" <td>3319116</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>3319116_[43-45,47%5]</td>\n",
" <td>1mUD1MPa</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>PENDING</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3927198</td>\n",
" <td>3887451</td>\n",
" <td>30.0</td>\n",
" <td>None</td>\n",
" <td>3887451_30</td>\n",
" <td>100kCrC20MPa</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>COMPLETED</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3927198</td>\n",
" <td>3887451</td>\n",
" <td>30.0</td>\n",
" <td>batch</td>\n",
" <td>3887451_30.batch</td>\n",
" <td>batch</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>gpekmezi</td>\n",
" <td>COMPLETED</td>\n",
" <td>...</td>\n",
" <td>c0088</td>\n",
" <td>0</td>\n",
" <td>1.222336e+10</td>\n",
" <td>c0088</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3927198</td>\n",
" <td>3887451</td>\n",
" <td>30.0</td>\n",
" <td>extern</td>\n",
" <td>3887451_30.extern</td>\n",
" <td>extern</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>gpekmezi</td>\n",
" <td>COMPLETED</td>\n",
" <td>...</td>\n",
" <td>c0088</td>\n",
" <td>0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>c0088</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3927199</td>\n",
" <td>3887451</td>\n",
" <td>31.0</td>\n",
" <td>None</td>\n",
" <td>3887451_31</td>\n",
" <td>100kCrC20MPa</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>gpekmezi</td>\n",
" <td>COMPLETED</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 63 columns</p>\n",
"</div>"
],
"text/plain": [
" JobID ArrayJobID ArrayTaskID JobStep JobIDSlurm \\\n",
"0 3319116 3319116 NaN None 3319116_[43-45,47%5] \n",
"1 3927198 3887451 30.0 None 3887451_30 \n",
"2 3927198 3887451 30.0 batch 3887451_30.batch \n",
"3 3927198 3887451 30.0 extern 3887451_30.extern \n",
"4 3927199 3887451 31.0 None 3887451_31 \n",
"\n",
" JobName User Group Account State ... \\\n",
"0 1mUD1MPa gpekmezi gpekmezi gpekmezi PENDING ... \n",
"1 100kCrC20MPa gpekmezi gpekmezi gpekmezi COMPLETED ... \n",
"2 batch gpekmezi COMPLETED ... \n",
"3 extern gpekmezi COMPLETED ... \n",
"4 100kCrC20MPa gpekmezi gpekmezi gpekmezi COMPLETED ... \n",
"\n",
" MaxDiskReadNode MaxDiskReadTask MaxDiskWrite MaxDiskWriteNode \\\n",
"0 NaN \n",
"1 NaN \n",
"2 c0088 0 1.222336e+10 c0088 \n",
"3 c0088 0 0.000000e+00 c0088 \n",
"4 NaN \n",
"\n",
" MaxDiskWriteTask ReqGPUS Comment GPUMem GPUEff NGPU \n",
"0 NaN None None None None \n",
"1 NaN None None None None \n",
"2 0 NaN None None None None \n",
"3 0 NaN None None None None \n",
"4 NaN None None None None \n",
"\n",
"[5 rows x 63 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df1.head(5)"
]
...
...
@@ -267,22 +49,9 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:.conda-slurm-ds]",
"language": "python",
"name": "conda-env-.conda-slurm-ds-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
...
...