Commit bcd47b1f authored by Mitchell A Moore's avatar Mitchell A Moore
Browse files

Create stack and group with dummy data

parent 46d84d25
%% Cell type:code id: tags:
``` python
import globus_sdk
import matplotlib.pyplot as plt
from matplotlib import figure
import numpy as np
import csv
import pandas as pd
from datetime import datetime, timedelta
from mpl_toolkits.mplot3d import Axes3D
```
%% Cell type:code id: tags:
``` python
# Read File into DataFrame object
data = pd.read_csv("test_big.csv") # reads comma delimited file into a DataFrame object
#data.head(85) # returns the first n rows of the DataFrame, n here is 16
```
%% Cell type:code id: tags:
``` python
# Replace Source EP ID with Endpoint name
data = data.replace(to_replace='924a32b0-6a2a-11e6-83a8-22000b97daec', value="Pamela Hill Data Share")
data = data.replace(to_replace='e261ffb8-6d04-11e5-ba46-22000b92c6ec', value="DME PerfTest - Argonne")
data = data.replace(to_replace='606579ae-5b03-11e9-bf32-0edbf3a4e7ee', value="cac_dtn_test")
data = data.replace(to_replace='9c8c88c2-ea4a-11e6-b9ba-22000b9a448b', value="Cheaha On-Campus")
data = data.replace(to_replace='7167cb38-9f78-11e6-b0dd-22000b92c261', value="Cheaha Off-Campus")
data.head(85)
```
%% Cell type:code id: tags:
``` python
# Convert String to datatime object and get total time elapsed
data['Elapsed'] = pd.to_datetime(data['Elapsed'], format='%H:%M:%S.%f')
total = 0.0
for item in data['Elapsed']:
total += timedelta(hours=item.hour, minutes=item.minute, seconds=item.second).total_seconds()
print(round(total/60/60, 2), "Hours")
```
%% Cell type:code id: tags:
``` python
# Group data by dataset name
ds01 = data[(data == 'ds01').any(axis=1)]
ds04 = data[(data == 'ds04').any(axis=1)]
ds06 = data[(data == 'ds06').any(axis=1)]
ds08 = data[(data == 'ds08').any(axis=1)]
ds10 = data[(data == 'ds10').any(axis=1)]
ds12 = data[(data == 'ds12').any(axis=1)]
ds14 = data[(data == 'ds14').any(axis=1)]
ds16 = data[(data == 'ds16').any(axis=1)]
# Group data by endpoint
cac = data[(data == 'cac_dtn_test').any(axis=1)]
cheaha_off = data[(data == 'Cheaha Off-Campus').any(axis=1)]
cheaha_on = data[(data == 'Cheaha On-Campus').any(axis=1)]
pamela = data[(data == 'Pamela Hill Data Share').any(axis=1)]
argonne = data[(data == 'DME PerfTest - Argonne').any(axis=1)]
# Examples
# argonne.head(50)
# ds10.head(15)
```
%% Cell type:code id: tags:
``` python
# Builds bar graphs to represent transfer speeds for different datasets
#plot
# bg1 = data.plot.bar(x = 'Dataset', y = 'Speed', rot = 100,) # graph shows the speed for each ds
```
%% Cell type:code id: tags:
``` python
# Builds bar graphs to represent data for different endpoints
# time = (pd.to_datetime(data['End'], infer_datetime_format=True) - pd.to_datetime(data['Start']))
# print(time)
# bg2 = data.plot.bar(x = data["Dataset"],
# y = (pd.to_datetime(data['End'],
# infer_datetime_format=True) - pd.to_datetime(data['Start'])),
# rot=100)
```
%% Cell type:code id: tags:
``` python
# Builds scatter plots to represent transfer speeds for different datasets
# plt.scatter((data['Dataset']), data['Speed'])
# plt.title('Dataset Speed')
# plt.xlabel('Dataset')
# plt.ylabel('Speed')
# plt.show()
```
%% Cell type:code id: tags:
``` python
# Show how much the reading varied across endpoints
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds01 (100MB, 10,000 x 10KB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds01['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds01['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds04 (10GB, 10,000 x 1MB files, 100-dirs)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds04['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds04['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds06 (100GB, 100,000 x 1MB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds06['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds06['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds08 (50 x 10GB; 350 x 1GB; 1,000 x 100MB; 5,500 x 10MB; 23,176 x 1MB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds08['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds08['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds10 (1TB, 100 x 10GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds10['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds10['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds12 (100GB, 1 x 100GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds12['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds12['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds16 (1TB, 4 x 250GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds16['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds16['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
```
%% Cell type:code id: tags:
``` python
N = 1
menMeans = (20)
womenMeans = (25)
menStd = (2)
womenStd = (3)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, yerr=menStd)
p2 = plt.bar(ind, womenMeans, width,
bottom=menMeans, yerr=womenStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind, ('ds01',))
plt.legend((p1[0], p2[0]), ('Men', 'Women'))
plt.show()
```
%% Cell type:code id: tags:
``` python
labels = ['G1', 'G2', 'G3', 'G4', 'G5']
men_means = [20, 34, 30, 35, 27]
men_stack = [1,2,3,4,5]
women_stack = [5,4,3,2,1]
women_means = [25, 32, 34, 20, 25]
x = np.arange(len(labels)) # the label locations
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, men_means, width, label='Men')
stack1 = ax.bar(x - width/2, men_stack, width, bottom=men_means, label='M+')
rects2 = ax.bar(x + width/2, women_means, width, label='Women')
stack2 = ax.bar(x + width/2, women_stack, width, bottom=women_means, label='W+')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
# autolabel(rects1)
# autolabel(rects2)
fig.tight_layout()
plt.show()
```
%% Cell type:code id: tags:
``` python
cac_ds01 = cac[(cac == 'ds01').any(axis=1)]['Speed']
```
%% Cell type:code id: tags:
``` python
print(cac_ds01)
```
%% Cell type:code id: tags:
``` python
# First create some toy data:
x = np.linspace(0, 2*np.pi, 400)
y = np.sin(x**2)
# Create just a figure and only one subplot
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_title('Simple plot')
# Create two subplots and unpack the output array immediately
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.plot(x, y)
ax1.set_title('Sharing Y axis')
ax2.scatter(x, y)
# Create four polar axes and access them through the returned array
fig, axs = plt.subplots(2, 2, subplot_kw=dict(polar=True))
axs[0, 0].plot(x, y)
axs[1, 1].scatter(x, y)
# Share a X axis with each column of subplots
plt.subplots(2, 2, sharex='col')
# Share a Y axis with each row of subplots
plt.subplots(2, 2, sharey='row')
# Share both X and Y axes with all subplots
plt.subplots(2, 2, sharex='all', sharey='all')
# Note that this is the same as
plt.subplots(2, 2, sharex=True, sharey=True)
# Create figure number 10 with a single subplot
# and clears it if it already exists.
fig, ax = plt.subplots(num=10, clear=True)
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment