Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Mitchell Moore
DTN_tests
Commits
bcd47b1f
Commit
bcd47b1f
authored
Jan 11, 2021
by
Mitchell A Moore
Browse files
Create stack and group with dummy data
parent
46d84d25
Changes
1
Hide whitespace changes
Inline
Side-by-side
DTN_notebook.ipynb
View file @
bcd47b1f
...
...
@@ -245,6 +245,145 @@
"ds16['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"N = 1\n",
"menMeans = (20)\n",
"womenMeans = (25)\n",
"menStd = (2)\n",
"womenStd = (3)\n",
"ind = np.arange(N) # the x locations for the groups\n",
"width = 0.35 # the width of the bars: can also be len(x) sequence\n",
"\n",
"p1 = plt.bar(ind, menMeans, width, yerr=menStd)\n",
"p2 = plt.bar(ind, womenMeans, width,\n",
" bottom=menMeans, yerr=womenStd)\n",
"\n",
"plt.ylabel('Scores')\n",
"plt.title('Scores by group and gender')\n",
"plt.xticks(ind, ('ds01',))\n",
"plt.legend((p1[0], p2[0]), ('Men', 'Women'))\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"labels = ['G1', 'G2', 'G3', 'G4', 'G5']\n",
"men_means = [20, 34, 30, 35, 27]\n",
"men_stack = [1,2,3,4,5]\n",
"women_stack = [5,4,3,2,1]\n",
"women_means = [25, 32, 34, 20, 25]\n",
"\n",
"x = np.arange(len(labels)) # the label locations\n",
"width = 0.35 # the width of the bars\n",
"\n",
"fig, ax = plt.subplots()\n",
"rects1 = ax.bar(x - width/2, men_means, width, label='Men')\n",
"stack1 = ax.bar(x - width/2, men_stack, width, bottom=men_means, label='M+')\n",
"\n",
"\n",
"rects2 = ax.bar(x + width/2, women_means, width, label='Women')\n",
"stack2 = ax.bar(x + width/2, women_stack, width, bottom=women_means, label='W+')\n",
"\n",
"# Add some text for labels, title and custom x-axis tick labels, etc.\n",
"ax.set_ylabel('Scores')\n",
"ax.set_title('Scores by group and gender')\n",
"ax.set_xticks(x)\n",
"ax.set_xticklabels(labels)\n",
"ax.legend()\n",
"\n",
"\n",
"def autolabel(rects):\n",
" \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{}'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3), # 3 points vertical offset\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom')\n",
"\n",
"\n",
"# autolabel(rects1)\n",
"# autolabel(rects2)\n",
"\n",
"fig.tight_layout()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cac_ds01 = cac[(cac == 'ds01').any(axis=1)]['Speed']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(cac_ds01)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# First create some toy data:\n",
"x = np.linspace(0, 2*np.pi, 400)\n",
"y = np.sin(x**2)\n",
"\n",
"# Create just a figure and only one subplot\n",
"fig, ax = plt.subplots()\n",
"ax.plot(x, y)\n",
"ax.set_title('Simple plot')\n",
"\n",
"# Create two subplots and unpack the output array immediately\n",
"f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)\n",
"ax1.plot(x, y)\n",
"ax1.set_title('Sharing Y axis')\n",
"ax2.scatter(x, y)\n",
"\n",
"# Create four polar axes and access them through the returned array\n",
"fig, axs = plt.subplots(2, 2, subplot_kw=dict(polar=True))\n",
"axs[0, 0].plot(x, y)\n",
"axs[1, 1].scatter(x, y)\n",
"\n",
"# Share a X axis with each column of subplots\n",
"plt.subplots(2, 2, sharex='col')\n",
"\n",
"# Share a Y axis with each row of subplots\n",
"plt.subplots(2, 2, sharey='row')\n",
"\n",
"# Share both X and Y axes with all subplots\n",
"plt.subplots(2, 2, sharex='all', sharey='all')\n",
"\n",
"# Note that this is the same as\n",
"plt.subplots(2, 2, sharex=True, sharey=True)\n",
"\n",
"# Create figure number 10 with a single subplot\n",
"# and clears it if it already exists.\n",
"fig, ax = plt.subplots(num=10, clear=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
%% Cell type:code id: tags:
```
python
import
globus_sdk
import
matplotlib.pyplot
as
plt
from
matplotlib
import
figure
import
numpy
as
np
import
csv
import
pandas
as
pd
from
datetime
import
datetime
,
timedelta
from
mpl_toolkits.mplot3d
import
Axes3D
```
%% Cell type:code id: tags:
```
python
# Read File into DataFrame object
data
=
pd
.
read_csv
(
"test_big.csv"
)
# reads comma delimited file into a DataFrame object
#data.head(85) # returns the first n rows of the DataFrame, n here is 16
```
%% Cell type:code id: tags:
```
python
# Replace Source EP ID with Endpoint name
data
=
data
.
replace
(
to_replace
=
'924a32b0-6a2a-11e6-83a8-22000b97daec'
,
value
=
"Pamela Hill Data Share"
)
data
=
data
.
replace
(
to_replace
=
'e261ffb8-6d04-11e5-ba46-22000b92c6ec'
,
value
=
"DME PerfTest - Argonne"
)
data
=
data
.
replace
(
to_replace
=
'606579ae-5b03-11e9-bf32-0edbf3a4e7ee'
,
value
=
"cac_dtn_test"
)
data
=
data
.
replace
(
to_replace
=
'9c8c88c2-ea4a-11e6-b9ba-22000b9a448b'
,
value
=
"Cheaha On-Campus"
)
data
=
data
.
replace
(
to_replace
=
'7167cb38-9f78-11e6-b0dd-22000b92c261'
,
value
=
"Cheaha Off-Campus"
)
data
.
head
(
85
)
```
%% Cell type:code id: tags:
```
python
# Convert String to datatime object and get total time elapsed
data
[
'Elapsed'
]
=
pd
.
to_datetime
(
data
[
'Elapsed'
],
format
=
'%H:%M:%S.%f'
)
total
=
0.0
for
item
in
data
[
'Elapsed'
]:
total
+=
timedelta
(
hours
=
item
.
hour
,
minutes
=
item
.
minute
,
seconds
=
item
.
second
).
total_seconds
()
print
(
round
(
total
/
60
/
60
,
2
),
"Hours"
)
```
%% Cell type:code id: tags:
```
python
# Group data by dataset name
ds01
=
data
[(
data
==
'ds01'
).
any
(
axis
=
1
)]
ds04
=
data
[(
data
==
'ds04'
).
any
(
axis
=
1
)]
ds06
=
data
[(
data
==
'ds06'
).
any
(
axis
=
1
)]
ds08
=
data
[(
data
==
'ds08'
).
any
(
axis
=
1
)]
ds10
=
data
[(
data
==
'ds10'
).
any
(
axis
=
1
)]
ds12
=
data
[(
data
==
'ds12'
).
any
(
axis
=
1
)]
ds14
=
data
[(
data
==
'ds14'
).
any
(
axis
=
1
)]
ds16
=
data
[(
data
==
'ds16'
).
any
(
axis
=
1
)]
# Group data by endpoint
cac
=
data
[(
data
==
'cac_dtn_test'
).
any
(
axis
=
1
)]
cheaha_off
=
data
[(
data
==
'Cheaha Off-Campus'
).
any
(
axis
=
1
)]
cheaha_on
=
data
[(
data
==
'Cheaha On-Campus'
).
any
(
axis
=
1
)]
pamela
=
data
[(
data
==
'Pamela Hill Data Share'
).
any
(
axis
=
1
)]
argonne
=
data
[(
data
==
'DME PerfTest - Argonne'
).
any
(
axis
=
1
)]
# Examples
# argonne.head(50)
# ds10.head(15)
```
%% Cell type:code id: tags:
```
python
# Builds bar graphs to represent transfer speeds for different datasets
#plot
# bg1 = data.plot.bar(x = 'Dataset', y = 'Speed', rot = 100,) # graph shows the speed for each ds
```
%% Cell type:code id: tags:
```
python
# Builds bar graphs to represent data for different endpoints
# time = (pd.to_datetime(data['End'], infer_datetime_format=True) - pd.to_datetime(data['Start']))
# print(time)
# bg2 = data.plot.bar(x = data["Dataset"],
# y = (pd.to_datetime(data['End'],
# infer_datetime_format=True) - pd.to_datetime(data['Start'])),
# rot=100)
```
%% Cell type:code id: tags:
```
python
# Builds scatter plots to represent transfer speeds for different datasets
# plt.scatter((data['Dataset']), data['Speed'])
# plt.title('Dataset Speed')
# plt.xlabel('Dataset')
# plt.ylabel('Speed')
# plt.show()
```
%% Cell type:code id: tags:
```
python
# Show how much the reading varied across endpoints
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds01 (100MB, 10,000 x 10KB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds01
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds01
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds04 (10GB, 10,000 x 1MB files, 100-dirs)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds04
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds04
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds06 (100GB, 100,000 x 1MB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds06
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds06
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds08 (50 x 10GB; 350 x 1GB; 1,000 x 100MB; 5,500 x 10MB; 23,176 x 1MB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds08
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds08
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds10 (1TB, 100 x 10GB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds10
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds10
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds12 (100GB, 1 x 100GB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds12
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds12
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
fig
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
)
fig
.
set_figwidth
(
15
)
fig
.
suptitle
(
'ds16 (1TB, 4 x 250GB, 1-dir)'
,
fontsize
=
18
)
ax1
.
set_title
(
'Effective Speed'
)
ax1
.
set_ylabel
(
'Frequency'
)
ax1
.
set_xlabel
(
'Speed (Mb/s)'
)
ax2
.
set_title
(
'Elapsed Time'
)
ax2
.
set_ylabel
(
'Frequency'
)
ax2
.
set_xlabel
(
'Time (hh:mm:ss)'
)
ds16
[
'Speed'
].
hist
(
grid
=
True
,
ax
=
ax1
,
color
=
'gold'
,
edgecolor
=
'green'
)
# histogram using pandas
ds16
[
'Elapsed'
].
hist
(
grid
=
True
,
ax
=
ax2
,
color
=
'green'
,
edgecolor
=
'gold'
)
# histogram using pandas
```
%% Cell type:code id: tags:
```
python
N
=
1
menMeans
=
(
20
)
womenMeans
=
(
25
)
menStd
=
(
2
)
womenStd
=
(
3
)
ind
=
np
.
arange
(
N
)
# the x locations for the groups
width
=
0.35
# the width of the bars: can also be len(x) sequence
p1
=
plt
.
bar
(
ind
,
menMeans
,
width
,
yerr
=
menStd
)
p2
=
plt
.
bar
(
ind
,
womenMeans
,
width
,
bottom
=
menMeans
,
yerr
=
womenStd
)
plt
.
ylabel
(
'Scores'
)
plt
.
title
(
'Scores by group and gender'
)
plt
.
xticks
(
ind
,
(
'ds01'
,))
plt
.
legend
((
p1
[
0
],
p2
[
0
]),
(
'Men'
,
'Women'
))
plt
.
show
()
```
%% Cell type:code id: tags:
```
python
labels
=
[
'G1'
,
'G2'
,
'G3'
,
'G4'
,
'G5'
]
men_means
=
[
20
,
34
,
30
,
35
,
27
]
men_stack
=
[
1
,
2
,
3
,
4
,
5
]
women_stack
=
[
5
,
4
,
3
,
2
,
1
]
women_means
=
[
25
,
32
,
34
,
20
,
25
]
x
=
np
.
arange
(
len
(
labels
))
# the label locations
width
=
0.35
# the width of the bars
fig
,
ax
=
plt
.
subplots
()
rects1
=
ax
.
bar
(
x
-
width
/
2
,
men_means
,
width
,
label
=
'Men'
)
stack1
=
ax
.
bar
(
x
-
width
/
2
,
men_stack
,
width
,
bottom
=
men_means
,
label
=
'M+'
)
rects2
=
ax
.
bar
(
x
+
width
/
2
,
women_means
,
width
,
label
=
'Women'
)
stack2
=
ax
.
bar
(
x
+
width
/
2
,
women_stack
,
width
,
bottom
=
women_means
,
label
=
'W+'
)
# Add some text for labels, title and custom x-axis tick labels, etc.
ax
.
set_ylabel
(
'Scores'
)
ax
.
set_title
(
'Scores by group and gender'
)
ax
.
set_xticks
(
x
)
ax
.
set_xticklabels
(
labels
)
ax
.
legend
()
def
autolabel
(
rects
):
"""Attach a text label above each bar in *rects*, displaying its height."""
for
rect
in
rects
:
height
=
rect
.
get_height
()
ax
.
annotate
(
'{}'
.
format
(
height
),
xy
=
(
rect
.
get_x
()
+
rect
.
get_width
()
/
2
,
height
),
xytext
=
(
0
,
3
),
# 3 points vertical offset
textcoords
=
"offset points"
,
ha
=
'center'
,
va
=
'bottom'
)
# autolabel(rects1)
# autolabel(rects2)
fig
.
tight_layout
()
plt
.
show
()
```
%% Cell type:code id: tags:
```
python
cac_ds01
=
cac
[(
cac
==
'ds01'
).
any
(
axis
=
1
)][
'Speed'
]
```
%% Cell type:code id: tags:
```
python
print
(
cac_ds01
)
```
%% Cell type:code id: tags:
```
python
# First create some toy data:
x
=
np
.
linspace
(
0
,
2
*
np
.
pi
,
400
)
y
=
np
.
sin
(
x
**
2
)
# Create just a figure and only one subplot
fig
,
ax
=
plt
.
subplots
()
ax
.
plot
(
x
,
y
)
ax
.
set_title
(
'Simple plot'
)
# Create two subplots and unpack the output array immediately
f
,
(
ax1
,
ax2
)
=
plt
.
subplots
(
1
,
2
,
sharey
=
True
)
ax1
.
plot
(
x
,
y
)
ax1
.
set_title
(
'Sharing Y axis'
)
ax2
.
scatter
(
x
,
y
)
# Create four polar axes and access them through the returned array
fig
,
axs
=
plt
.
subplots
(
2
,
2
,
subplot_kw
=
dict
(
polar
=
True
))
axs
[
0
,
0
].
plot
(
x
,
y
)
axs
[
1
,
1
].
scatter
(
x
,
y
)
# Share a X axis with each column of subplots
plt
.
subplots
(
2
,
2
,
sharex
=
'col'
)
# Share a Y axis with each row of subplots
plt
.
subplots
(
2
,
2
,
sharey
=
'row'
)
# Share both X and Y axes with all subplots
plt
.
subplots
(
2
,
2
,
sharex
=
'all'
,
sharey
=
'all'
)
# Note that this is the same as
plt
.
subplots
(
2
,
2
,
sharex
=
True
,
sharey
=
True
)
# Create figure number 10 with a single subplot
# and clears it if it already exists.
fig
,
ax
=
plt
.
subplots
(
num
=
10
,
clear
=
True
)
```
%% Cell type:code id: tags:
```
python
```
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment