DATA Analysis - Binder version
Contents
DATA Analysis - Binder versionΒΆ
Explain
Further implementation:ΒΆ
Data to be put in ordo (or Zenodo)
LibrariesΒΆ
import numpy
import math
import matplotlib
import matplotlib.pyplot as plt
import ipympl
from matplotlib import cm
import matplotlib.colors as mcolors
import pandas as pd
from glob import glob
from functools import reduce
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, HBox
import re
from itertools import cycle
import zipfile
#import wx
import gdown
%matplotlib widget
spl = 'ASW'
Data importΒΆ
5 csv that include all the reduced data so far (31-03-2022) are imported as df :
XP_list_test (XP_Ramp_df)
Data_Annex_full (Data_Annex_full_df)
DR1_full (DR1_full_df)
DR2_full (DR2_full_df)
DR3_full (DR3_full_df)
DownloadΒΆ
DR2 and DR1 are big files, hence they are stored on google drive and needs to be uploaded within the notebook.
# DR2
url = "https://drive.google.com/file/d/1deWpgNp7kvvIsh056PGX9fBzADer0eYX/view?usp=sharing"
gdown.download(url=url, quiet=False, fuzzy=True)
#DR1
url = "https://drive.google.com/file/d/1zmtG61wYExabbecMznA8P5JIA7BXy49n/view?usp=sharing"
gdown.download(url=url, quiet=False, fuzzy=True)
Downloading...
From: https://drive.google.com/uc?id=1deWpgNp7kvvIsh056PGX9fBzADer0eYX
To: G:\MAMP\htdocs\PhD-WS\Script\DR2_full.csv
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 602M/602M [00:52<00:00, 11.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1zmtG61wYExabbecMznA8P5JIA7BXy49n
To: G:\MAMP\htdocs\PhD-WS\Script\DR1_full.csv
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 166M/166M [00:14<00:00, 11.7MB/s]
'DR1_full.csv'
Data-frame constructionΒΆ
# XP-Ramp
XP_Ramp_df = pd.read_csv('XP_list_test.csv')
XP_Ramp_df_I = XP_Ramp_df.set_index('Date')
#Data_Annex
Data_Annex_full_df1 = pd.read_csv('Data_Annex_full.csv')
Data_Annex_full_df = Data_Annex_full_df1.set_index('Name')
del Data_Annex_full_df['Unnamed: 0']
#DR1
DR1_full_df = pd.read_csv('DR1_full.csv')
#DR2
DR2_full_df = pd.read_csv('DR2_full.csv')
#DR3
DR3_full_df = pd.read_csv('DR3_full.csv')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-5-1ffcc60ef137> in <module>
1 # XP-Ramp
2
----> 3 XP_Ramp_df = pd.read_csv('XP_list_test.csv')
4 XP_Ramp_df_I = XP_Ramp_df.set_index('Date')
5
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
674 )
675
--> 676 return _read(filepath_or_buffer, kwds)
677
678 parser_f.__name__ = name
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
446
447 # Create the parser.
--> 448 parser = TextFileReader(fp_or_buf, **kwds)
449
450 if chunksize or iterator:
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
878 self.options["has_index_names"] = kwds["has_index_names"]
879
--> 880 self._make_engine(self.engine)
881
882 def close(self):
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
1112 def _make_engine(self, engine="c"):
1113 if engine == "c":
-> 1114 self._engine = CParserWrapper(self.f, **self.options)
1115 else:
1116 if engine == "python":
~\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, src, **kwds)
1889 kwds["usecols"] = self.usecols
1890
-> 1891 self._reader = parsers.TextReader(src, **kwds)
1892 self.unnamed_cols = self._reader.unnamed_cols
1893
pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File XP_list_test.csv does not exist: 'XP_list_test.csv'
Sanity CheckΒΆ
#Data_Annex_full_df
Scan selectionΒΆ
Parameter listΒΆ
Temp = XP_Ramp_df_I.columns[1:].values.tolist()
Date = XP_Ramp_df_I.index.values.tolist()
Sample = XP_Ramp_df_I['Sample'].values.tolist()
Reduc = ['DR2','DR3']
#Scans = ['Single','Multi']
Param = list(Data_Annex_full_df.columns.values)
Widget selectionΒΆ
Some stuffs
#Create Dropdown Box Widget
#wR = widgets.SelectMultiple(
# options= Reduc,
# description='Reduction',
# disabled=False,
#)
wT = widgets.SelectMultiple(
options= Temp,
description='Temperature:',
disabled=False,
)
wD = widgets.SelectMultiple(
options= Date,
description='Date',
disabled=False,
)
#wS = widgets.SelectMultiple(
# options= Sample,
# description='Sample',
# disabled=False,
#)
wSc = widgets.Checkbox(
value=False,
description='Iso',
disabled=False,
indent=False
)
#widgets.HBox([wD,wT])
h1 = widgets.HBox(children=[wD,wT,wSc])
#display(wT,wD)
display(h1)
ConfirmΒΆ
Click on the cell below to confirm the selected scans
wT_L = list(wT.value)
wD_L = list(wD.value)
wSc_B = bool(wSc.value)
wSc_B
False
Sample name constructionΒΆ
Can I create an intermediate step to select only one number from the many per temperature when I don`t want to plot the isotherms
create input button (boolean that allow to choose between 1 scan or isotherm)
#date = ['2020_09_16','2020_09_17']
#temp = ['60K','80K']
#spl = 'ASW'
z = []
value_1 = []
value_2 = []
for x in wD_L:
for y in wT_L:
value = str(XP_Ramp_df_I.loc[XP_Ramp_df_I.index == x, y].values[0])
print(value)
print(type(value))
value_1 = re.findall(r"[-+]?\d*\.\d+|\d+", value)
print(value_1)
#Isotherm (all values but first one - Warm-up scan)
value_2 = value_1[1:] #here we selct all but first value (warm-up)
#Multiple scans (T range) - (Only the last value)
#value_2 = value_1[-1:] #here we selct only the last value of the list
print(value_2)
for items in value_2:
to_plot = str('{}_{}_{}'.format(spl, x, items))
z.append({
'Name' : str(to_plot),
'Temp' : y,
'Date' : x,
})
dat= pd.DataFrame(z)
data_df = dat.set_index('Name')
[1, 2]
<class 'str'>
['1', '2']
['2']
nan
<class 'str'>
[]
[]
[3, 4, 5, 6]
<class 'str'>
['3', '4', '5', '6']
['4', '5', '6']
[7, 8, 9, 10]
<class 'str'>
['7', '8', '9', '10']
['8', '9', '10']
[11, 12, 13, 14]
<class 'str'>
['11', '12', '13', '14']
['12', '13', '14']
nan
<class 'str'>
[]
[]
[15, 16, 17, 18]
<class 'str'>
['15', '16', '17', '18']
['16', '17', '18']
nan
<class 'str'>
[]
[]
[19, 20, 21, 22]
<class 'str'>
['19', '20', '21', '22']
['20', '21', '22']
[23, 24, 25, 26]
<class 'str'>
['23', '24', '25', '26']
['24', '25', '26']
[27, 28, 29, 30]
<class 'str'>
['27', '28', '29', '30']
['28', '29', '30']
[31, 32, 33, 34]
<class 'str'>
['31', '32', '33', '34']
['32', '33', '34']
[39, 40, 41, 42, 43, 44, 45, 46]
<class 'str'>
['39', '40', '41', '42', '43', '44', '45', '46']
['40', '41', '42', '43', '44', '45', '46']
[51, 52, 53, 54]
<class 'str'>
['51', '52', '53', '54']
['52', '53', '54']
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
[59, 60, 61, 62]
<class 'str'>
['59', '60', '61', '62']
['60', '61', '62']
nan
<class 'str'>
[]
[]
[63, 64, 65, 66]
<class 'str'>
['63', '64', '65', '66']
['64', '65', '66']
[67, 68, 69, 70, 71, 72, 73, 74]
<class 'str'>
['67', '68', '69', '70', '71', '72', '73', '74']
['68', '69', '70', '71', '72', '73', '74']
[75, 76, 77, 78]
<class 'str'>
['75', '76', '77', '78']
['76', '77', '78']
[79, 80]
<class 'str'>
['79', '80']
['80']
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
Plot pre-formatingΒΆ
TemperatureΒΆ
def Temp_color(row):
if row['Temp'] == '20':
return int(1)
elif row['Temp'] == '30':
return int(2)
elif row['Temp'] == '40':
return int(3)
elif row['Temp'] == '50':
return int(4)
elif row['Temp'] == '60':
return int(5)
elif row['Temp'] == '70':
return int(6)
elif row['Temp'] == '80':
return int(7)
elif row['Temp'] == '90':
return int(8)
elif row['Temp'] == '100':
return int(9)
elif row['Temp'] == '110':
return int(10)
elif row['Temp'] == '120':
return int(11)
elif row['Temp'] == '125':
return int(12)
elif row['Temp'] == '130':
return int(13)
elif row['Temp'] == '132':
return int(14)
elif row['Temp'] == '134':
return int(15)
elif row['Temp'] == '135':
return int(16)
elif row['Temp'] == '136':
return int(17)
elif row['Temp'] == '137':
return int(18)
elif row['Temp'] == '138':
return int(19)
elif row['Temp'] == '140':
return int(20)
elif row['Temp'] == '145':
return int(21)
elif row['Temp'] == '150':
return int(21)
elif row['Temp'] == '155':
return int(22)
elif row['Temp'] == '160':
return int(23)
elif row['Temp'] == '180':
return int(24)
elif row['Temp'] == '200':
return int(25)
data_df['Colour'] = data_df.apply (lambda row: Temp_color(row), axis=1)
Linestyle (date)ΒΆ
linestyle = ['-',':','--','-.']
LD = dict(zip(wD_L, linestyle))
data_df['linestyle'] = data_df['Date'].map(LD)
LD
{'2020_09_28': '-'}
Sanity CheckΒΆ
data_df
Temp | Date | Colour | linestyle | |
---|---|---|---|---|
Name | ||||
ASW_2020_09_28_2 | 20 | 2020_09_28 | 1 | - |
ASW_2020_09_28_4 | 30 | 2020_09_28 | 2 | - |
ASW_2020_09_28_5 | 30 | 2020_09_28 | 2 | - |
ASW_2020_09_28_6 | 30 | 2020_09_28 | 2 | - |
ASW_2020_09_28_8 | 40 | 2020_09_28 | 3 | - |
ASW_2020_09_28_9 | 40 | 2020_09_28 | 3 | - |
ASW_2020_09_28_10 | 40 | 2020_09_28 | 3 | - |
ASW_2020_09_28_12 | 50 | 2020_09_28 | 4 | - |
ASW_2020_09_28_13 | 50 | 2020_09_28 | 4 | - |
ASW_2020_09_28_14 | 50 | 2020_09_28 | 4 | - |
ASW_2020_09_28_16 | 60 | 2020_09_28 | 5 | - |
ASW_2020_09_28_17 | 60 | 2020_09_28 | 5 | - |
ASW_2020_09_28_18 | 60 | 2020_09_28 | 5 | - |
ASW_2020_09_28_20 | 70 | 2020_09_28 | 6 | - |
ASW_2020_09_28_21 | 70 | 2020_09_28 | 6 | - |
ASW_2020_09_28_22 | 70 | 2020_09_28 | 6 | - |
ASW_2020_09_28_24 | 80 | 2020_09_28 | 7 | - |
ASW_2020_09_28_25 | 80 | 2020_09_28 | 7 | - |
ASW_2020_09_28_26 | 80 | 2020_09_28 | 7 | - |
ASW_2020_09_28_28 | 90 | 2020_09_28 | 8 | - |
ASW_2020_09_28_29 | 90 | 2020_09_28 | 8 | - |
ASW_2020_09_28_30 | 90 | 2020_09_28 | 8 | - |
ASW_2020_09_28_32 | 100 | 2020_09_28 | 9 | - |
ASW_2020_09_28_33 | 100 | 2020_09_28 | 9 | - |
ASW_2020_09_28_34 | 100 | 2020_09_28 | 9 | - |
ASW_2020_09_28_40 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_41 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_42 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_43 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_44 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_45 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_46 | 120 | 2020_09_28 | 11 | - |
ASW_2020_09_28_52 | 130 | 2020_09_28 | 13 | - |
ASW_2020_09_28_53 | 130 | 2020_09_28 | 13 | - |
ASW_2020_09_28_54 | 130 | 2020_09_28 | 13 | - |
ASW_2020_09_28_60 | 140 | 2020_09_28 | 20 | - |
ASW_2020_09_28_61 | 140 | 2020_09_28 | 20 | - |
ASW_2020_09_28_62 | 140 | 2020_09_28 | 20 | - |
ASW_2020_09_28_64 | 145 | 2020_09_28 | 21 | - |
ASW_2020_09_28_65 | 145 | 2020_09_28 | 21 | - |
ASW_2020_09_28_66 | 145 | 2020_09_28 | 21 | - |
ASW_2020_09_28_68 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_69 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_70 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_71 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_72 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_73 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_74 | 150 | 2020_09_28 | 21 | - |
ASW_2020_09_28_76 | 155 | 2020_09_28 | 22 | - |
ASW_2020_09_28_77 | 155 | 2020_09_28 | 22 | - |
ASW_2020_09_28_78 | 155 | 2020_09_28 | 22 | - |
ASW_2020_09_28_80 | 160 | 2020_09_28 | 23 | - |
PlottingΒΆ
ScansΒΆ
DR3ΒΆ
nscan = len(list(data_df['Date'].values.tolist()))
print(nscan)
fig, ax= plt.subplots(figsize=(12,10))
normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet
for i in dat['Name']:
#`DR3
x = DR3_full_df.Wavenumber
y = DR3_full_df['{}'.format(i)]
plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle']))
plt.title('DR3 ')
plt.axis([3800,2800,0,0.45])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)
#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))
plt.show()
52
DR2ΒΆ
nscan = len(list(data_df['Date'].values.tolist()))
print(nscan)
fig, ax= plt.subplots(figsize=(10,10))
normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet
for i in dat['Name']:
x = DR2_full_df.Wavenumber
y = DR2_full_df['{}'.format(i)]
plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle']))
plt.title('DR2')
plt.axis([3800,2800,0,0.60])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)
#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))
plt.show()
111
DR1ΒΆ
Why:
Compare the DR2 and DR1 to see if the difference observed arise from the reduction process.
nscan = len(list(data_df['Date'].values.tolist()))
print(nscan)
fig, ax= plt.subplots(figsize=(10,10))
normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet
for i in dat['Name']:
x = DR1_full_df.Wavenumber
y = DR1_full_df['{}'.format(i)]
plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle']))
plt.title('DR1')
plt.axis([3800,2800,-0.2,0.6])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)
#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))
plt.show()
48
Data AnnexΒΆ
PlottingΒΆ
Use interact to play with the data I plot
wP = widgets.SelectMultiple(
options= Param,
description='Parameters',
disabled=False,
)
display(wP)
wP_L = list(wP.value)
# Widget selection
#interact(wP = widgets.SelectMultiple(
# options= Param,
# description='Parameters',
# disabled=False,
#));
# Figure
fig, ax= plt.subplots(figsize=(10,10))
normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet
for i in dat['Name']:
for j in wP_L:
#print(i)
#print(j)
x = Data_Annex_full_df[('{}'.format(j))].loc[('{}'.format(i))]
#y = Data_Annex_full_df['{}'.format(j)]
#y = Data_Annex_full_df['{}'.format(j)].loc[Data_Annex_full_df.index == ('{}'.format(i))]
#y = Data_Annex_full_df.loc[(Data_Annex_full_df['{}'.format(j)])] & [(Data_Annex_full_df['column_name'] == x)]
#print(x,y)
#print(i)
#print(x)
plt.plot(i,x, '+', mew=2, ms=8, color=colormap(normalize(int(data_df.loc[i]['Temp']))) )
plt.title('Data Annex')
#plt.axis([3800,2800,0,0.45])
# Set number of ticks for x-axis
# Set ticks labels for x-axis
#plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
#plt.ylabel('Absorbance').set_fontsize(13)
plt.xticks(rotation=90)
#plt.tight_layout()
ax = fig.gca()
#plt.grid()
#plt.legend()
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)
#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))
plt.show()
<ipython-input-98-4732c201c243>:16: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
fig, ax= plt.subplots(figsize=(10,10))
MoreΒΆ
fit linear model to desorption great