Commit e7f3e384 authored by Mitchell Moore's avatar Mitchell Moore
Browse files

Fix chart titles

parent 00f96764
%% Cell type:code id: tags:
``` python
import globus_sdk
import matplotlib.pyplot as plt
from matplotlib import figure
import numpy as np
import csv
import pandas as pd
from datetime import datetime, timedelta
from mpl_toolkits.mplot3d import Axes3D
import matplotlib
matplotlib.style.use('ggplot')
import matplotlib.ticker as ticker
```
%% Cell type:code id: tags:
``` python
# Read File into DataFrame object
data = pd.read_csv("test_big.csv") # reads comma delimited file into a DataFrame object
#data.head(85) # returns the first n rows of the DataFrame, n here is 85
```
%% Cell type:code id: tags:
``` python
# Replace Source EP ID with Endpoint name
data = data.replace(to_replace='924a32b0-6a2a-11e6-83a8-22000b97daec', value="Pamela Hill Data Share")
data = data.replace(to_replace='e261ffb8-6d04-11e5-ba46-22000b92c6ec', value="DME PerfTest - Argonne")
data = data.replace(to_replace='606579ae-5b03-11e9-bf32-0edbf3a4e7ee', value="cac_dtn_test")
data = data.replace(to_replace='9c8c88c2-ea4a-11e6-b9ba-22000b9a448b', value="Cheaha On-Campus")
data = data.replace(to_replace='7167cb38-9f78-11e6-b0dd-22000b92c261', value="Cheaha Off-Campus")
data.head(85)
```
%% Cell type:code id: tags:
``` python
# Convert String to datatime object and get total time elapsed
data['Elapsed'] = pd.to_datetime(data['Elapsed'], format='%H:%M:%S.%f')
total = 0.0
for item in data['Elapsed']:
total += timedelta(hours=item.hour, minutes=item.minute, seconds=item.second).total_seconds()
print(round(total/60/60, 2), "Hours")
# Show Elapsed time on a row by row basis
# time = (pd.to_datetime(data['End'], infer_datetime_format=True) - pd.to_datetime(data['Start']))
# print(time)
```
%% Cell type:code id: tags:
``` python
# Group data by dataset name
ds01 = data[(data == 'ds01').any(axis=1)]
ds04 = data[(data == 'ds04').any(axis=1)]
ds06 = data[(data == 'ds06').any(axis=1)]
ds08 = data[(data == 'ds08').any(axis=1)]
ds10 = data[(data == 'ds10').any(axis=1)]
ds12 = data[(data == 'ds12').any(axis=1)]
ds14 = data[(data == 'ds14').any(axis=1)]
ds16 = data[(data == 'ds16').any(axis=1)]
# Group data by endpoint
cac = data[(data == 'cac_dtn_test').any(axis=1)]
cheaha_off = data[(data == 'Cheaha Off-Campus').any(axis=1)]
cheaha_on = data[(data == 'Cheaha On-Campus').any(axis=1)]
pamela = data[(data == 'Pamela Hill Data Share').any(axis=1)]
argonne = data[(data == 'DME PerfTest - Argonne').any(axis=1)]
# Examples
# argonne.head(50)
# ds10.head(15)
```
%% Cell type:code id: tags:
``` python
# Builds scatter plots to represent transfer speeds for different datasets
plt.scatter((data['Dataset']), data['Speed'])
plt.title('Dataset Speed')
plt.xlabel('Dataset')
plt.ylabel('Speed')
plt.show()
```
%% Cell type:code id: tags:
``` python
# Show how much the reading varied across endpoints
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds01 (100MB, 10,000 x 10KB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds01['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds01['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds04 (10GB, 10,000 x 1MB files, 100-dirs)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds04['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds04['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds06 (100GB, 100,000 x 1MB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds06['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds06['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds08 (50 x 10GB; 350 x 1GB; 1,000 x 100MB; 5,500 x 10MB; 23,176 x 1MB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds08['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds08['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds10 (1TB, 100 x 10GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds10['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds10['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds12 (100GB, 1 x 100GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds12['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds12['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figwidth(15)
fig.suptitle('ds16 (1TB, 4 x 250GB, 1-dir)', fontsize=18)
ax1.set_title('Effective Speed')
ax1.set_ylabel('Frequency')
ax1.set_xlabel('Speed (Mb/s)')
ax2.set_title('Elapsed Time')
ax2.set_ylabel('Frequency')
ax2.set_xlabel('Time (hh:mm:ss)')
ds16['Speed'].hist(grid=True, ax=ax1, color='gold', edgecolor='green') # histogram using pandas
ds16['Elapsed'].hist(grid=True, ax=ax2, color='green', edgecolor='gold') # histogram using pandas
```
%% Cell type:code id: tags:
``` python
labels = ['ds01: 100MB, 10,000 x 10KB, 1-dir', 'ds04: 10GB, 10,000 x 1MB files, 100-dirs', 'ds06: 100GB, 100,000 x 1MB, 1-dir', 'ds08: 50 x 10GB; 350 x 1GB; 1,000 x 100MB; 5,500 x 10MB; 23,176 x 1MB, 1-dir', 'ds10: 1TB, 100 x 10GB, 1-dir', 'ds12: 100GB, 1 x 100GB, 1-dir', 'ds16: 1TB, 4 x 250GB, 1-dir']
cac_out = cac.where(cac['Source EP ID'] == 'cac_dtn_test').dropna()['Speed']
cac_in = cac.where(cac['Dest. EP ID'] == 'cac_dtn_test').dropna()['Speed']
pamela_out = pamela.where(pamela['Source EP ID'] == 'Pamela Hill Data Share').dropna()['Speed']
pamela_in = pamela.where(pamela['Dest. EP ID'] == 'Pamela Hill Data Share').dropna()['Speed']
argonne_out = argonne.where(argonne['Source EP ID'] == 'DME PerfTest - Argonne').dropna()['Speed']
argonne_in = argonne.where(argonne['Dest. EP ID'] == 'DME PerfTest - Argonne').dropna()['Speed']
cac_to = [cac_out.iloc[0], cac_out.iloc[1], cac_out.iloc[2], cac_out.iloc[3], cac_out.iloc[4], cac_out.iloc[5], cac_out.iloc[6]]
cac_from = [cac_in.iloc[0], cac_in.iloc[1], cac_in.iloc[2], cac_in.iloc[3], cac_in.iloc[4], cac_in.iloc[5], cac_in.iloc[6]]
pamela_to = [pamela_out.iloc[0], pamela_out.iloc[1], pamela_out.iloc[2], pamela_out.iloc[3], pamela_out.iloc[4], pamela_out.iloc[5], pamela_out.iloc[6]]
pamela_from = [pamela_in.iloc[0], pamela_in.iloc[1], pamela_in.iloc[2], pamela_in.iloc[3], pamela_in.iloc[4], pamela_in.iloc[5], pamela_in.iloc[6]]
argonne_to = [argonne_out.iloc[0], argonne_out.iloc[1], argonne_out.iloc[2], argonne_out.iloc[3], argonne_out.iloc[4], argonne_out.iloc[5], argonne_out.iloc[6]]
argonne_from = [argonne_in.iloc[0], argonne_in.iloc[1], argonne_in.iloc[2], argonne_in.iloc[3], argonne_in.iloc[4], argonne_in.iloc[5], argonne_in.iloc[6]]
```
%% Cell type:code id: tags:
``` python
# Define Group-Stacked Bar plot function
def group_stack(yscale):
def group_stack(yscale, on_off):
x = np.arange(len(labels)) # the label locations
width = 0.27 # the width of the bars
fig, ax = plt.subplots()
# fig.set_figwidth(15)
ax.figure.set_size_inches(10,8)
rects1 = ax.bar(x - width, cac_to, width, label='cac -->', color='#1F618D')
stack1 = ax.bar(x - width, cac_from, width, bottom=cac_to, label='cac <--', color='#7FB3D5')
rects2 = ax.bar(x, pamela_to, width, label='pamela -->', color='#B9770E')
stack2 = ax.bar(x, pamela_from, width, bottom=pamela_to, label='pamela <--', color='#F8C471')
rects3 = ax.bar(x + width, argonne_to, width, label='argonne -->', color='#196F3D')
stack3 = ax.bar(x + width, argonne_from, width, bottom=argonne_to, label='argonne <--', color='#7DCEA0')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Speed (Mb/s)')
ax.set_title('Cheaha On-Campus')
ax.set_title('Cheaha {}-Campus'.format(on_off))
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
def autolabel(rects, stacks, lift):
"""Attach a text label above each bar in *rects*, displaying its height."""
i = 0
offset = 3
if lift: # lifts the output number by 12px
offset = 12
for rect in rects:
height = round((rect.get_height() + stacks[i].get_height()), 2)
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, offset), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
i += 1
autolabel(rects1, stack1, False)
autolabel(rects2, stack2, True)
autolabel(rects3, stack3, False)
fig.tight_layout()
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right") # # rotates labels and aligns them horizontally to left
plt.yscale(yscale)
plt.show()
```
%% Cell type:code id: tags:
``` python
group_stack('linear')
group_stack('linear', 'On')
```
%% Cell type:code id: tags:
``` python
group_stack('log')
group_stack('log', 'On')
```
%% Cell type:code id: tags:
``` python
# Indices substituted for Off-Campus Data
cac_to = [cac_out.iloc[7], cac_out.iloc[8], cac_out.iloc[9], cac_out.iloc[10], cac_out.iloc[11], cac_out.iloc[12], cac_out.iloc[13]]
cac_from = [cac_in.iloc[7], cac_in.iloc[8], cac_in.iloc[9], cac_in.iloc[10], cac_in.iloc[11], cac_in.iloc[12], cac_in.iloc[13]]
pamela_to = [pamela_out.iloc[7], pamela_out.iloc[8], pamela_out.iloc[9], pamela_out.iloc[10], pamela_out.iloc[11], pamela_out.iloc[12], pamela_out.iloc[13]]
pamela_from = [pamela_in.iloc[7], pamela_in.iloc[8], pamela_in.iloc[9], pamela_in.iloc[10], pamela_in.iloc[11], pamela_in.iloc[12], pamela_in.iloc[13]]
argonne_to = [argonne_out.iloc[7], argonne_out.iloc[8], argonne_out.iloc[9], argonne_out.iloc[10], argonne_out.iloc[11], argonne_out.iloc[12], argonne_out.iloc[13]]
argonne_from = [argonne_in.iloc[7], argonne_in.iloc[8], argonne_in.iloc[9], argonne_in.iloc[10], argonne_in.iloc[11], argonne_in.iloc[12], argonne_in.iloc[13]]
```
%% Cell type:code id: tags:
``` python
group_stack('linear')
group_stack('linear', 'Off')
```
%% Cell type:code id: tags:
``` python
group_stack('log')
group_stack('log', 'Off')
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment