Smoothing Script¶

import os
import sys
import numpy as np
import pandas as pd

sys.path.insert(0, '/home/cabsel/gfa/')
from gfapy.curve_fit import curve_fitter

mainDir = '/home/cabsel/gfa/'
inputDir = os.path.join(mainDir, 'inputfiles')

Read GFA model from Excel¶

expt_data = pd.read_excel(os.path.join(inputDir, 'smoothed_data_medpH.xlsx'), sheet_name=['vcd', 'titer', 'frac', 'q_prod_matched'])
expt_data['VCD'] = (expt_data.pop('vcd').rename(columns={'VCD_1e6cells_mL': 'VCD (1E6 VC/mL)',
                                                         'Time_days': 'Time (WD)',
                                                         'fit_VCD_1e6cells_mL': 'fit_VCD (1E6 VC/mL)'}).
                    set_index('Time (WD)'))
expt_data['Titer'] = (expt_data.pop('titer').rename(columns={'Titer_g_L': 'Titer (g/L)',
                                                             'Time_days': 'Time (WD)',
                                                             'fit_Titer_g_L': 'fit_Titer (g/L)'}).
                      set_index('Time (WD)'))
expt_data['Fractions'] = (expt_data.pop('frac').rename(columns={'Time_days': 'Time (WD)',
                                                                'G0FplusGlcNAc': 'G0F+GlcNAc',
                                                                'G0FplusGlcNac': 'G0F+GlcNAc',
                                                                'G0F_GlcNac': 'G0F-GlcNAc',
                                                                'G0F_GlcNAc': 'G0F-GlcNAc',
                                                                'G0F_GlcNac': 'G0F-GlcNAc',
                                                                'G0_GlcNAc': 'G0-GlcNAc',
                                                                'G0_GlcNac': 'G0-GlcNAc',
                                                                'G1prime': 'G1a/b',
                                                                'Man7prime': 'Man7'}).
                          assign(**{'G1Fa/b': lambda x: x['G1F']+x['G1Fprime']}).
                          drop(columns=['G1F', 'G1Fprime']).
                          set_index('Time (WD)'))
expt_data['q_prod'] = (expt_data.pop('q_prod_matched').
                       rename(columns={'Spec_prod_pg_cells_day': 'Spec Prod (pg/cells/day)',
                                       'Time_days': 'Time (WD)',
                                       'Time_windows_days': 'Time window (days)'}).
                       set_index('Time (WD)'))
expt_data['Concentrations'] = expt_data['Fractions'].mul(expt_data['Titer']['Titer (g/L)'], axis=0).dropna()
for k, v in expt_data.items():
    expt_data[k] = expt_data[k].sort_index()

Smooth data and record all necessary variables in new dictionary (Trial and error to find correct function)¶

smoothed_data = expt_data.copy()

retrieve_timepoints = np.arange(expt_data['VCD'].index.min(), expt_data['VCD'].index.max()+1, 1)

VCD¶

fit_vcd = curve_fitter()
fit_vcd.ingest_data(expt_data['VCD'].reset_index(), x_col='Time (WD)')

Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']

Logistic: A / (exp(B * x) + C * exp(-D * x))

vcd_col = 'VCD (1E6 VC/mL)'
fit_vcd.fit_jupyter(vcd_col)

smoothed_data['VCD']['fit_'+vcd_col] = fit_vcd.current_stats['fitted'].copy()
smoothed_data['VCD']['diff_'+vcd_col] = fit_vcd.current_stats['deriv'].copy()
display(smoothed_data['VCD'])

	VCD (1E6 VC/mL)	fit_VCD (1E6 VC/mL)	diff_VCD (1E6 VC/mL)
Time (WD)
1	0.784138	0.812027	0.435732
2	1.265309	1.374260	0.705857
3	2.100869	2.256844	1.073020
4	3.601440	3.532264	1.472161
5	5.214883	5.156431	1.735699
6	7.192178	6.893591	1.675505
7	8.165785	8.391412	1.275581
8	9.280572	9.396539	0.731072
9	9.786835	9.877230	0.252563
10	9.869698	9.953180	-0.074785
11	9.952553	9.773722	-0.264768
12	9.787621	9.454708	-0.361261
13	9.111461	9.069461	-0.402502
14	8.353707	8.659642	-0.413579

Titer¶

fit_titer = curve_fitter()
fit_titer.ingest_data(expt_data['Titer'].reset_index(), x_col='Time (WD)')

Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']

titer_col = 'Titer (g/L)'
fit_titer.fit_jupyter(titer_col)

smoothed_data['Titer']['fit_'+titer_col] = fit_titer.current_stats['fitted'].copy()
smoothed_data['Titer']['diff_'+titer_col] = fit_titer.current_stats['deriv'].copy()
display(smoothed_data['Titer'])

	Titer (g/L)	fit_Titer (g/L)	diff_Titer (g/L)
Time (WD)
1	0.038894	0.000785	0.002834
2	0.062556	0.009576	0.017251
3	0.100022	0.041069	0.048891
4	0.163121	0.113737	0.099474
5	0.269600	0.245077	0.165003
6	0.429320	0.445240	0.234872
7	0.660026	0.711372	0.294462
8	1.038621	1.026431	0.331127
9	1.348201	1.364166	0.339716
10	1.699189	1.697561	0.323568
11	2.026516	2.005844	0.291056
12	2.340039	2.277226	0.251123
13	2.440604	2.507908	0.210547
14	2.706803	2.699458	0.173308

Glycoform Concentrations¶

smoothed_data['Fit_Concentrations'] = (pd.DataFrame(0, index=retrieve_timepoints, columns=smoothed_data['Concentrations'].columns).
                                       rename_axis(index='Time (WD)'))
smoothed_data['Diff_Concentrations'] = (pd.DataFrame(0, index=retrieve_timepoints, columns=smoothed_data['Concentrations'].columns).
                                        rename_axis(index='Time (WD)'))

fit_fracs = curve_fitter()
fit_fracs.ingest_data(expt_data['Concentrations'].reset_index(), x_col='Time (WD)')

Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']

Man7¶

frac_col = 'Man7'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

8.77715414835226e-8*exp(-1.04979682849682*x)/(2.51606683299321e-5 + exp(-1.04979682849682*x))**2

Man6¶

frac_col = 'Man6'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

1.53674532320441e-5/(x**3.63015880132957*(x**(-2.63015880132957) + 0.000730149475676811)**2)

Man5¶

frac_col = 'Man5'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

7.82512472492164e-5/(x**3.57898807718615*(x**(-2.57898807718615) + 1.48099279331054e-5)**2)

G0-GlcNAc¶

frac_col = 'G0-GlcNAc'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

3.20970296762035e-6/(x**3.9857986207357*(x**(-2.9857986207357) - 8.91079207537524e-5)**2)

G0F¶

frac_col = 'G0F'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

0.00106965824298514/(x**4.9728043624615*(x**(-3.9728043624615) + 9.42588445419789e-5)**2)

G1a/b¶

frac_col = 'G1a/b'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

1.81526914872763e-6*(17.2297568100532*exp(-0.514316349820139*x) + 0.0374374429383844*exp(-0.0374374429383844*x))/(exp(-0.514316349820139*x) + 0.0298504706415847*exp(-0.0374374429383844*x))**2

G1Fa/b¶

frac_col = 'G1Fa/b'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

0.00238522708675479/(x**4.03582275597052*(x**(-3.03582275597052) + 0.00176704968125774)**2)

G0¶

frac_col = 'G0'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

9.41441888399936e-5/(x**4.140653237447*(x**(-3.140653237447) + 0.000437837528509634)**2)

G0F-GlcNAc¶

frac_col = 'G0F-GlcNAc'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

1.38019843466825e-5/(x**4.28619872306324*(x**(-3.28619872306324) + 4.59783223097008e-5)**2)

G0F+GlcNAc¶

frac_col = 'G0F+GlcNAc'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

2.51813290846332e-5*exp(-0.504806299908199*x)/(0.00380914878275068 + exp(-0.504806299908199*x))**2

G2F¶

frac_col = 'G2F'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

0.000585155264255064/(x**3.59120437541022*(x**(-2.59120437541022) + 0.00854648647909327)**2)

A1G1F¶

frac_col = 'A1G1F'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

1.04061189355436e-5/(x**4.70728372947352*(x**(-3.70728372947352) + 0.000396022275776027)**2)

A1G2F¶

frac_col = 'A1G2F'
fit_fracs.fit_jupyter(frac_col)

smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)

0.000116773396805464/(x**3.22003059056402*(x**(-2.22003059056402) + 0.00621613842908323)**2)

Summary¶

display(smoothed_data['VCD'])

	VCD (1E6 VC/mL)	fit_VCD (1E6 VC/mL)	diff_VCD (1E6 VC/mL)
Time (WD)
1	0.784138	0.812027	0.435732
2	1.265309	1.374260	0.705857
3	2.100869	2.256844	1.073020
4	3.601440	3.532264	1.472161
5	5.214883	5.156431	1.735699
6	7.192178	6.893591	1.675505
7	8.165785	8.391412	1.275581
8	9.280572	9.396539	0.731072
9	9.786835	9.877230	0.252563
10	9.869698	9.953180	-0.074785
11	9.952553	9.773722	-0.264768
12	9.787621	9.454708	-0.361261
13	9.111461	9.069461	-0.402502
14	8.353707	8.659642	-0.413579

display(smoothed_data['Titer'])

	Titer (g/L)	fit_Titer (g/L)	diff_Titer (g/L)
Time (WD)
1	0.038894	0.000785	0.002834
2	0.062556	0.009576	0.017251
3	0.100022	0.041069	0.048891
4	0.163121	0.113737	0.099474
5	0.269600	0.245077	0.165003
6	0.429320	0.445240	0.234872
7	0.660026	0.711372	0.294462
8	1.038621	1.026431	0.331127
9	1.348201	1.364166	0.339716
10	1.699189	1.697561	0.323568
11	2.026516	2.005844	0.291056
12	2.340039	2.277226	0.251123
13	2.440604	2.507908	0.210547
14	2.706803	2.699458	0.173308

display(smoothed_data['Fractions'])

	A1G1F	A1G2F	G0	G0F	G0F+GlcNAc	G0F-GlcNAc	G0-GlcNAc	G1a/b	G2F	Man5	Man6	Man7	G1Fa/b
Time (WD)
5	0.003273	0.006889	0.017714	0.596477	0.001061	0.003714	0.000694	0.002173	0.035255	0.008395	0.001183	0.000395	0.319791
8	0.003273	0.002323	0.014989	0.737378	0.002281	0.003429	0.000488	0.001561	0.016277	0.006052	0.001187	-0.000018	0.208716
10	0.002805	0.002952	0.015032	0.778474	0.002989	0.004449	0.000684	0.001322	0.012044	0.006597	0.001141	0.001089	0.170732
12	0.002338	0.002234	0.015394	0.799511	0.003371	0.005429	0.000900	0.001249	0.009708	0.007928	0.001109	0.001096	0.149998
14	0.002338	0.002062	0.015970	0.809785	0.003985	0.007143	0.001373	0.001215	0.008540	0.009961	0.001285	0.001295	0.136380

display(smoothed_data['Concentrations'])

	A1G1F	A1G2F	G0	G0F	G0F+GlcNAc	G0F-GlcNAc	G0-GlcNAc	G1a/b	G2F	Man5	Man6	Man7	G1Fa/b
Time (WD)
5	0.000882	0.001857	0.004776	0.160811	0.000286	0.001001	0.000187	0.000586	0.009505	0.002263	0.000319	0.000106	0.086216
8	0.003399	0.002413	0.015568	0.765856	0.002369	0.003561	0.000507	0.001621	0.016906	0.006285	0.001233	-0.000019	0.216777
10	0.004767	0.005016	0.025542	1.322774	0.005079	0.007560	0.001162	0.002246	0.020465	0.011210	0.001940	0.001850	0.290106
12	0.005470	0.005227	0.036023	1.870887	0.007887	0.012703	0.002105	0.002923	0.022717	0.018551	0.002594	0.002564	0.351002
14	0.006328	0.005583	0.043227	2.191928	0.010787	0.019334	0.003716	0.003288	0.023116	0.026962	0.003477	0.003504	0.369154

display(smoothed_data['Fit_Concentrations'])

	A1G1F	A1G2F	G0	G0F	G0F+GlcNAc	G0F-GlcNAc	G0-GlcNAc	G1a/b	G2F	Man5	Man6	Man7	G1Fa/b
1	0.000003	0.000052	0.000030	0.000269	0.000082	0.000004	0.000001	0.000097	0.000224	0.000030	0.000006	2.388571e-07	0.000784
2	0.000036	0.000238	0.000263	0.004221	0.000135	0.000041	0.000009	0.000158	0.001294	0.000181	0.000036	6.823406e-07	0.006352
3	0.000161	0.000563	0.000932	0.021011	0.000223	0.000155	0.000029	0.000253	0.003392	0.000516	0.000104	1.948752e-06	0.021022
4	0.000449	0.001006	0.002255	0.064869	0.000365	0.000398	0.000068	0.000396	0.006258	0.001083	0.000218	5.561665e-06	0.047231
5	0.000949	0.001534	0.004397	0.152474	0.000594	0.000825	0.000133	0.000601	0.009412	0.001924	0.000383	1.584083e-05	0.084312
6	0.001651	0.002109	0.007427	0.297708	0.000956	0.001490	0.000231	0.000875	0.012424	0.003078	0.000602	4.486123e-05	0.128615
7	0.002479	0.002695	0.011289	0.504784	0.001511	0.002447	0.000370	0.001209	0.015049	0.004577	0.000870	1.250369e-04	0.175136
8	0.003323	0.003266	0.015813	0.763591	0.002327	0.003740	0.000559	0.001581	0.017217	0.006453	0.001182	3.339065e-04	0.219469
9	0.004092	0.003804	0.020746	1.051493	0.003453	0.005403	0.000811	0.001957	0.018954	0.008733	0.001529	8.039687e-04	0.258881
10	0.004740	0.004297	0.025815	1.341389	0.004876	0.007455	0.001139	0.002306	0.020327	0.011445	0.001901	1.584894e-03	0.292310
11	0.005258	0.004742	0.030774	1.610677	0.006491	0.009900	0.001562	0.002614	0.021407	0.014611	0.002288	2.401272e-03	0.319840
12	0.005661	0.005139	0.035439	1.845968	0.008112	0.012721	0.002106	0.002877	0.022260	0.018254	0.002679	2.929413e-03	0.342132
13	0.005970	0.005490	0.039695	2.042813	0.009553	0.015883	0.002807	0.003103	0.022937	0.022393	0.003066	3.173732e-03	0.360037
14	0.006205	0.005799	0.043490	2.202836	0.010701	0.019336	0.003717	0.003301	0.023478	0.027047	0.003442	3.269164e-03	0.374388

display(smoothed_data['Diff_Concentrations'])

	A1G1F	A1G2F	G0	G0F	G0F+GlcNAc	G0F-GlcNAc	G0-GlcNAc	G1a/b	G2F	Man5	Man6	Man7	G1Fa/b
1	0.000010	0.000115	0.000094	0.001069	0.000041	0.000014	0.000003	0.000048	0.000575	0.000078	0.000015	2.507334e-07	0.002377
2	0.000067	0.000257	0.000412	0.008373	0.000068	0.000067	0.000013	0.000076	0.001595	0.000234	0.000047	7.161719e-07	0.009503
3	0.000195	0.000389	0.000962	0.027620	0.000111	0.000170	0.000029	0.000117	0.002554	0.000443	0.000090	2.044594e-06	0.020267
4	0.000389	0.000492	0.001712	0.062965	0.000179	0.000326	0.000051	0.000172	0.003094	0.000698	0.000139	5.828847e-06	0.032039
5	0.000609	0.000558	0.002585	0.114683	0.000286	0.000537	0.000080	0.000239	0.003140	0.000992	0.000192	1.655038e-05	0.041484
6	0.000783	0.000586	0.003466	0.176578	0.000447	0.000803	0.000117	0.000307	0.002843	0.001321	0.000244	4.645938e-05	0.046252
7	0.000854	0.000583	0.004230	0.235860	0.000675	0.001118	0.000163	0.000359	0.002398	0.001682	0.000291	1.263242e-04	0.046037
8	0.000818	0.000557	0.004774	0.277831	0.000966	0.001473	0.000218	0.000379	0.001943	0.002074	0.000331	3.153108e-04	0.042175
9	0.000712	0.000516	0.005046	0.293292	0.001283	0.001856	0.000287	0.000366	0.001543	0.002492	0.000361	6.398031e-04	0.036481
10	0.000582	0.000470	0.005051	0.282654	0.001545	0.002250	0.000372	0.000330	0.001215	0.002935	0.000381	8.702574e-04	0.030401
11	0.000457	0.000421	0.004837	0.253702	0.001653	0.002637	0.000479	0.000285	0.000957	0.003401	0.000391	6.992117e-04	0.024775
12	0.000352	0.000373	0.004474	0.216192	0.001558	0.002999	0.000616	0.000243	0.000757	0.003888	0.000391	3.642221e-04	0.019954
13	0.000269	0.000329	0.004030	0.177821	0.001304	0.003317	0.000795	0.000210	0.000603	0.004394	0.000383	1.496321e-04	0.015997
14	0.000205	0.000289	0.003559	0.143035	0.000988	0.003578	0.001037	0.000187	0.000484	0.004917	0.000369	5.556953e-05	0.012826

Write to excel¶

with pd.ExcelWriter(os.path.join(inputDir, 'Lee et al', 'medpH_processed.xlsx')) as writer:
    for k, v in smoothed_data.items():
        v.to_excel(writer, sheet_name=k)

Smoothing Script¶

Read GFA model from Excel¶

Smooth data and record all necessary variables in new dictionary (Trial and error to find correct function)¶

VCD¶

Titer¶

Glycoform Concentrations¶

Man7¶

Man6¶

Man5¶

G0-GlcNAc¶

G0F¶

G1a/b¶

G1Fa/b¶

G0¶

G0F-GlcNAc¶

G0F+GlcNAc¶

G2F¶

A1G1F¶

A1G2F¶

Summary¶

Write to excel¶

iGFA

Navigation

Related Topics