DATA Analysis - Binder version¶

Explain

Further implementation:¶

Data to be put in ordo (or Zenodo)

Libraries¶

import numpy 
import math
import matplotlib
import matplotlib.pyplot as plt
import ipympl 
from matplotlib import cm
import matplotlib.colors as mcolors
import pandas as pd
from glob import glob
from functools import reduce
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, HBox
import re
from itertools import cycle
import zipfile  
#import wx
import gdown

%matplotlib widget

spl = 'ASW'

Data import¶

5 csv that include all the reduced data so far (31-03-2022) are imported as df :

XP_list_test (XP_Ramp_df)
Data_Annex_full (Data_Annex_full_df)
DR1_full (DR1_full_df)
DR2_full (DR2_full_df)
DR3_full (DR3_full_df)

Download¶

DR2 and DR1 are big files, hence they are stored on google drive and needs to be uploaded within the notebook.

# DR2

url = "https://drive.google.com/file/d/1deWpgNp7kvvIsh056PGX9fBzADer0eYX/view?usp=sharing"
gdown.download(url=url, quiet=False, fuzzy=True)

#DR1

url = "https://drive.google.com/file/d/1zmtG61wYExabbecMznA8P5JIA7BXy49n/view?usp=sharing"
gdown.download(url=url, quiet=False, fuzzy=True)

Downloading...
From: https://drive.google.com/uc?id=1deWpgNp7kvvIsh056PGX9fBzADer0eYX
To: G:\MAMP\htdocs\PhD-WS\Script\DR2_full.csv
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 602M/602M [00:52<00:00, 11.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1zmtG61wYExabbecMznA8P5JIA7BXy49n
To: G:\MAMP\htdocs\PhD-WS\Script\DR1_full.csv
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166M/166M [00:14<00:00, 11.7MB/s]

'DR1_full.csv'

Data-frame construction¶

# XP-Ramp

XP_Ramp_df = pd.read_csv('XP_list_test.csv')
XP_Ramp_df_I = XP_Ramp_df.set_index('Date')

#Data_Annex

Data_Annex_full_df1 = pd.read_csv('Data_Annex_full.csv')
Data_Annex_full_df = Data_Annex_full_df1.set_index('Name')
del Data_Annex_full_df['Unnamed: 0']

#DR1

DR1_full_df = pd.read_csv('DR1_full.csv')

#DR2

DR2_full_df = pd.read_csv('DR2_full.csv')

#DR3

DR3_full_df = pd.read_csv('DR3_full.csv')

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-5-1ffcc60ef137> in <module>
      1 # XP-Ramp
      2 
----> 3 XP_Ramp_df = pd.read_csv('XP_list_test.csv')
      4 XP_Ramp_df_I = XP_Ramp_df.set_index('Date')
      5 

~\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    674         )
    675 
--> 676         return _read(filepath_or_buffer, kwds)
    677 
    678     parser_f.__name__ = name

~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    446 
    447     # Create the parser.
--> 448     parser = TextFileReader(fp_or_buf, **kwds)
    449 
    450     if chunksize or iterator:

~\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
    878             self.options["has_index_names"] = kwds["has_index_names"]
    879 
--> 880         self._make_engine(self.engine)
    881 
    882     def close(self):

~\Anaconda3\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
   1112     def _make_engine(self, engine="c"):
   1113         if engine == "c":
-> 1114             self._engine = CParserWrapper(self.f, **self.options)
   1115         else:
   1116             if engine == "python":

~\Anaconda3\lib\site-packages\pandas\io\parsers.py in __init__(self, src, **kwds)
   1889         kwds["usecols"] = self.usecols
   1890 
-> 1891         self._reader = parsers.TextReader(src, **kwds)
   1892         self.unnamed_cols = self._reader.unnamed_cols
   1893 

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()

FileNotFoundError: [Errno 2] File XP_list_test.csv does not exist: 'XP_list_test.csv'

Sanity Check¶

#Data_Annex_full_df

Scan selection¶

Parameter list¶

Temp = XP_Ramp_df_I.columns[1:].values.tolist()
Date = XP_Ramp_df_I.index.values.tolist()
Sample = XP_Ramp_df_I['Sample'].values.tolist()
Reduc = ['DR2','DR3']
#Scans = ['Single','Multi']
Param = list(Data_Annex_full_df.columns.values)

Widget selection¶

Some stuffs

#Create Dropdown Box Widget

#wR = widgets.SelectMultiple(
#    options= Reduc,
#    description='Reduction',
#    disabled=False,
#)

wT = widgets.SelectMultiple(
    options= Temp,
    description='Temperature:',
    disabled=False,
)

wD = widgets.SelectMultiple(
    options= Date,
    description='Date',
    disabled=False,
)

#wS = widgets.SelectMultiple(
#    options= Sample,
#    description='Sample',
#    disabled=False,
#)

wSc = widgets.Checkbox(
    value=False,
    description='Iso',
    disabled=False,
    indent=False
)


#widgets.HBox([wD,wT])

h1 = widgets.HBox(children=[wD,wT,wSc])


#display(wT,wD)
display(h1)

Confirm¶

Click on the cell below to confirm the selected scans

wT_L = list(wT.value)
wD_L = list(wD.value)
wSc_B = bool(wSc.value)

wSc_B

False

Sample name construction¶

Can I create an intermediate step to select only one number from the many per temperature when I don`t want to plot the isotherms

create input button (boolean that allow to choose between 1 scan or isotherm)

#date = ['2020_09_16','2020_09_17']
#temp = ['60K','80K']
#spl = 'ASW'

z = []
value_1 = []
value_2 = []

for x in wD_L:
    for y in wT_L:

        value = str(XP_Ramp_df_I.loc[XP_Ramp_df_I.index == x, y].values[0])
        print(value)
        print(type(value))
        value_1 = re.findall(r"[-+]?\d*\.\d+|\d+", value)
        print(value_1)
        
        #Isotherm (all values but first one - Warm-up scan)
        value_2 = value_1[1:]   #here we selct all but first value (warm-up)
        
        #Multiple scans (T range) - (Only the last value)
        #value_2 = value_1[-1:]  #here we selct only the last value of the list
        
        print(value_2)
        for items in value_2:
        
            to_plot = str('{}_{}_{}'.format(spl, x, items))
            z.append({
                
               'Name' : str(to_plot),
               'Temp' : y,
               'Date' : x, 
                
         })

dat= pd.DataFrame(z)
data_df = dat.set_index('Name')

[1, 2]
<class 'str'>
['1', '2']
['2']
nan
<class 'str'>
[]
[]
[3, 4, 5, 6]
<class 'str'>
['3', '4', '5', '6']
['4', '5', '6']
[7, 8, 9, 10]
<class 'str'>
['7', '8', '9', '10']
['8', '9', '10']
[11, 12, 13, 14]
<class 'str'>
['11', '12', '13', '14']
['12', '13', '14']
nan
<class 'str'>
[]
[]
[15, 16, 17, 18]
<class 'str'>
['15', '16', '17', '18']
['16', '17', '18']
nan
<class 'str'>
[]
[]
[19, 20, 21, 22]
<class 'str'>
['19', '20', '21', '22']
['20', '21', '22']
[23, 24, 25, 26]
<class 'str'>
['23', '24', '25', '26']
['24', '25', '26']
[27, 28, 29, 30]
<class 'str'>
['27', '28', '29', '30']
['28', '29', '30']
[31, 32, 33, 34]
<class 'str'>
['31', '32', '33', '34']
['32', '33', '34']
[39, 40, 41, 42, 43, 44, 45, 46]
<class 'str'>
['39', '40', '41', '42', '43', '44', '45', '46']
['40', '41', '42', '43', '44', '45', '46']
[51, 52, 53, 54]
<class 'str'>
['51', '52', '53', '54']
['52', '53', '54']
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]
[59, 60, 61, 62]
<class 'str'>
['59', '60', '61', '62']
['60', '61', '62']
nan
<class 'str'>
[]
[]
[63, 64, 65, 66]
<class 'str'>
['63', '64', '65', '66']
['64', '65', '66']
[67, 68, 69, 70, 71, 72, 73, 74]
<class 'str'>
['67', '68', '69', '70', '71', '72', '73', '74']
['68', '69', '70', '71', '72', '73', '74']
[75, 76, 77, 78]
<class 'str'>
['75', '76', '77', '78']
['76', '77', '78']
[79, 80]
<class 'str'>
['79', '80']
['80']
nan
<class 'str'>
[]
[]
nan
<class 'str'>
[]
[]

Plot pre-formating¶

Temperature¶

def Temp_color(row):
    if row['Temp'] == '20':
        return int(1)
    elif row['Temp'] == '30':
        return int(2) 
    elif row['Temp'] == '40':
        return int(3)
    elif row['Temp'] == '50':
        return int(4)
    elif row['Temp'] == '60':
        return int(5)
    elif row['Temp'] == '70':
        return int(6)
    elif row['Temp'] == '80':
        return int(7)
    elif row['Temp'] == '90':
        return int(8)
    elif row['Temp'] == '100':
        return int(9)
    elif row['Temp'] == '110':
        return int(10)
    elif row['Temp'] == '120':
        return int(11)
    elif row['Temp'] == '125':
        return int(12)
    elif row['Temp'] == '130':
        return int(13)
    elif row['Temp'] == '132':
        return int(14)
    elif row['Temp'] == '134':
        return int(15)
    elif row['Temp'] == '135':
        return int(16)
    elif row['Temp'] == '136':
        return int(17)
    elif row['Temp'] == '137':
        return int(18)
    elif row['Temp'] == '138':
        return int(19)
    elif row['Temp'] == '140':
        return int(20)
    elif row['Temp'] == '145':
        return int(21)
    elif row['Temp'] == '150':
        return int(21)
    elif row['Temp'] == '155':
        return int(22)
    elif row['Temp'] == '160':
        return int(23)
    elif row['Temp'] == '180':
        return int(24)
    elif row['Temp'] == '200':
        return int(25)

data_df['Colour'] = data_df.apply (lambda row: Temp_color(row), axis=1)

Linestyle (date)¶

linestyle = ['-',':','--','-.']

LD = dict(zip(wD_L, linestyle))

data_df['linestyle'] = data_df['Date'].map(LD)

LD

{'2020_09_28': '-'}

Sanity Check¶

data_df

	Temp	Date	Colour	linestyle
Name
ASW_2020_09_28_2	20	2020_09_28	1	-
ASW_2020_09_28_4	30	2020_09_28	2	-
ASW_2020_09_28_5	30	2020_09_28	2	-
ASW_2020_09_28_6	30	2020_09_28	2	-
ASW_2020_09_28_8	40	2020_09_28	3	-
ASW_2020_09_28_9	40	2020_09_28	3	-
ASW_2020_09_28_10	40	2020_09_28	3	-
ASW_2020_09_28_12	50	2020_09_28	4	-
ASW_2020_09_28_13	50	2020_09_28	4	-
ASW_2020_09_28_14	50	2020_09_28	4	-
ASW_2020_09_28_16	60	2020_09_28	5	-
ASW_2020_09_28_17	60	2020_09_28	5	-
ASW_2020_09_28_18	60	2020_09_28	5	-
ASW_2020_09_28_20	70	2020_09_28	6	-
ASW_2020_09_28_21	70	2020_09_28	6	-
ASW_2020_09_28_22	70	2020_09_28	6	-
ASW_2020_09_28_24	80	2020_09_28	7	-
ASW_2020_09_28_25	80	2020_09_28	7	-
ASW_2020_09_28_26	80	2020_09_28	7	-
ASW_2020_09_28_28	90	2020_09_28	8	-
ASW_2020_09_28_29	90	2020_09_28	8	-
ASW_2020_09_28_30	90	2020_09_28	8	-
ASW_2020_09_28_32	100	2020_09_28	9	-
ASW_2020_09_28_33	100	2020_09_28	9	-
ASW_2020_09_28_34	100	2020_09_28	9	-
ASW_2020_09_28_40	120	2020_09_28	11	-
ASW_2020_09_28_41	120	2020_09_28	11	-
ASW_2020_09_28_42	120	2020_09_28	11	-
ASW_2020_09_28_43	120	2020_09_28	11	-
ASW_2020_09_28_44	120	2020_09_28	11	-
ASW_2020_09_28_45	120	2020_09_28	11	-
ASW_2020_09_28_46	120	2020_09_28	11	-
ASW_2020_09_28_52	130	2020_09_28	13	-
ASW_2020_09_28_53	130	2020_09_28	13	-
ASW_2020_09_28_54	130	2020_09_28	13	-
ASW_2020_09_28_60	140	2020_09_28	20	-
ASW_2020_09_28_61	140	2020_09_28	20	-
ASW_2020_09_28_62	140	2020_09_28	20	-
ASW_2020_09_28_64	145	2020_09_28	21	-
ASW_2020_09_28_65	145	2020_09_28	21	-
ASW_2020_09_28_66	145	2020_09_28	21	-
ASW_2020_09_28_68	150	2020_09_28	21	-
ASW_2020_09_28_69	150	2020_09_28	21	-
ASW_2020_09_28_70	150	2020_09_28	21	-
ASW_2020_09_28_71	150	2020_09_28	21	-
ASW_2020_09_28_72	150	2020_09_28	21	-
ASW_2020_09_28_73	150	2020_09_28	21	-
ASW_2020_09_28_74	150	2020_09_28	21	-
ASW_2020_09_28_76	155	2020_09_28	22	-
ASW_2020_09_28_77	155	2020_09_28	22	-
ASW_2020_09_28_78	155	2020_09_28	22	-
ASW_2020_09_28_80	160	2020_09_28	23	-

Plotting¶

Scans¶

DR3¶

nscan = len(list(data_df['Date'].values.tolist()))

print(nscan)

fig, ax= plt.subplots(figsize=(12,10))

normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet

for i in dat['Name']:
    
#`DR3

    x = DR3_full_df.Wavenumber
    y = DR3_full_df['{}'.format(i)]
    
    plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle'])) 


plt.title('DR3 ')
plt.axis([3800,2800,0,0.45])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()

scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)


#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))

plt.show()

DR2¶

nscan = len(list(data_df['Date'].values.tolist()))

print(nscan)

fig, ax= plt.subplots(figsize=(10,10))

normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet

for i in dat['Name']:
    

    x = DR2_full_df.Wavenumber
    y = DR2_full_df['{}'.format(i)]
    
    plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle'])) 


plt.title('DR2')
plt.axis([3800,2800,0,0.60])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()

scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)


#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))

plt.show()

DR1¶

Why:

Compare the DR2 and DR1 to see if the difference observed arise from the reduction process.

nscan = len(list(data_df['Date'].values.tolist()))

print(nscan)

fig, ax= plt.subplots(figsize=(10,10))

normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet

for i in dat['Name']:
    

    x = DR1_full_df.Wavenumber
    y = DR1_full_df['{}'.format(i)]
    
    plt.plot(x,y, label="{}_{}".format(str(data_df.loc[i]['Date']), str(data_df.loc[i]['Temp'])), color=colormap(normalize(int(data_df.loc[i]['Temp']))), linestyle=(data_df.loc[i]['linestyle'])) 


plt.title('DR1')
plt.axis([3800,2800,-0.2,0.6])
plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)
plt.ylabel('Absorbance').set_fontsize(13)
#ax = fig.gca()
plt.grid()
plt.legend()

scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)


#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))

plt.show()

Data Annex¶

Plotting¶

Use interact to play with the data I plot

wP = widgets.SelectMultiple(
    options= Param,
    description='Parameters',
    disabled=False,
)

display(wP)

wP_L = list(wP.value)

# Widget selection



#interact(wP = widgets.SelectMultiple(
#    options= Param,
#    description='Parameters',
#    disabled=False,
#));




# Figure

fig, ax= plt.subplots(figsize=(10,10))

normalize = mcolors.Normalize(vmin=20, vmax=200)
colormap = cm.jet

for i in dat['Name']:
    for j in wP_L:

    #print(i)
    #print(j)
        
        x = Data_Annex_full_df[('{}'.format(j))].loc[('{}'.format(i))]
        #y = Data_Annex_full_df['{}'.format(j)]
        #y = Data_Annex_full_df['{}'.format(j)].loc[Data_Annex_full_df.index == ('{}'.format(i))]
        #y = Data_Annex_full_df.loc[(Data_Annex_full_df['{}'.format(j)])] & [(Data_Annex_full_df['column_name'] == x)]
        #print(x,y)
        #print(i)    
        #print(x)
    
        

        plt.plot(i,x, '+', mew=2, ms=8, color=colormap(normalize(int(data_df.loc[i]['Temp']))) )

plt.title('Data Annex')
#plt.axis([3800,2800,0,0.45])
# Set number of ticks for x-axis

# Set ticks labels for x-axis

#plt.xlabel('Wavenumber (cm-1)').set_fontsize(13)

#plt.ylabel('Absorbance').set_fontsize(13)
plt.xticks(rotation=90)
#plt.tight_layout()
ax = fig.gca()
#plt.grid()
#plt.legend()

scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(nscan)
plt.colorbar(scalarmappaple)


#plt.savefig('D:\PhD-WS\Projects\PAC\XP_1-1\DATA\{0}\Plots\DR1\DR1_{0}_All_scans.png'.format(date))

plt.show()

<ipython-input-98-4732c201c243>:16: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig, ax= plt.subplots(figsize=(10,10))

More¶

fit linear model to desorption great

PhD

DATA Analysis - Binder version

Contents

DATA Analysis - Binder version¶

Further implementation:¶

Libraries¶

Data import¶

Download¶

Data-frame construction¶

Sanity Check¶

Scan selection¶

Parameter list¶

Widget selection¶

Confirm¶

Sample name construction¶

Plot pre-formating¶

Temperature¶

Linestyle (date)¶

Sanity Check¶

Plotting¶

Scans¶

DR3¶

DR2¶

DR1¶

Data Annex¶

Plotting¶

More¶