Smoothing Script

import os
import sys
import numpy as np
import pandas as pd
sys.path.insert(0, '/home/cabsel/gfa/')
from gfapy.curve_fit import curve_fitter
mainDir = '/home/cabsel/gfa/'
inputDir = os.path.join(mainDir, 'inputfiles')

Read GFA model from Excel

expt_data = pd.read_excel(os.path.join(inputDir, 'smoothed_data_medpH.xlsx'), sheet_name=['vcd', 'titer', 'frac', 'q_prod_matched'])
expt_data['VCD'] = (expt_data.pop('vcd').rename(columns={'VCD_1e6cells_mL': 'VCD (1E6 VC/mL)',
                                                         'Time_days': 'Time (WD)',
                                                         'fit_VCD_1e6cells_mL': 'fit_VCD (1E6 VC/mL)'}).
                    set_index('Time (WD)'))
expt_data['Titer'] = (expt_data.pop('titer').rename(columns={'Titer_g_L': 'Titer (g/L)',
                                                             'Time_days': 'Time (WD)',
                                                             'fit_Titer_g_L': 'fit_Titer (g/L)'}).
                      set_index('Time (WD)'))
expt_data['Fractions'] = (expt_data.pop('frac').rename(columns={'Time_days': 'Time (WD)',
                                                                'G0FplusGlcNAc': 'G0F+GlcNAc',
                                                                'G0FplusGlcNac': 'G0F+GlcNAc',
                                                                'G0F_GlcNac': 'G0F-GlcNAc',
                                                                'G0F_GlcNAc': 'G0F-GlcNAc',
                                                                'G0F_GlcNac': 'G0F-GlcNAc',
                                                                'G0_GlcNAc': 'G0-GlcNAc',
                                                                'G0_GlcNac': 'G0-GlcNAc',
                                                                'G1prime': 'G1a/b',
                                                                'Man7prime': 'Man7'}).
                          assign(**{'G1Fa/b': lambda x: x['G1F']+x['G1Fprime']}).
                          drop(columns=['G1F', 'G1Fprime']).
                          set_index('Time (WD)'))
expt_data['q_prod'] = (expt_data.pop('q_prod_matched').
                       rename(columns={'Spec_prod_pg_cells_day': 'Spec Prod (pg/cells/day)',
                                       'Time_days': 'Time (WD)',
                                       'Time_windows_days': 'Time window (days)'}).
                       set_index('Time (WD)'))
expt_data['Concentrations'] = expt_data['Fractions'].mul(expt_data['Titer']['Titer (g/L)'], axis=0).dropna()
for k, v in expt_data.items():
    expt_data[k] = expt_data[k].sort_index()

Smooth data and record all necessary variables in new dictionary (Trial and error to find correct function)

smoothed_data = expt_data.copy()
retrieve_timepoints = np.arange(expt_data['VCD'].index.min(), expt_data['VCD'].index.max()+1, 1)

VCD

fit_vcd = curve_fitter()
fit_vcd.ingest_data(expt_data['VCD'].reset_index(), x_col='Time (WD)')
Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']

Logistic: A / (exp(B * x) + C * exp(-D * x))

vcd_col = 'VCD (1E6 VC/mL)'
fit_vcd.fit_jupyter(vcd_col)
smoothed_data['VCD']['fit_'+vcd_col] = fit_vcd.current_stats['fitted'].copy()
smoothed_data['VCD']['diff_'+vcd_col] = fit_vcd.current_stats['deriv'].copy()
display(smoothed_data['VCD'])
VCD (1E6 VC/mL) fit_VCD (1E6 VC/mL) diff_VCD (1E6 VC/mL)
Time (WD)
1 0.784138 0.812027 0.435732
2 1.265309 1.374260 0.705857
3 2.100869 2.256844 1.073020
4 3.601440 3.532264 1.472161
5 5.214883 5.156431 1.735699
6 7.192178 6.893591 1.675505
7 8.165785 8.391412 1.275581
8 9.280572 9.396539 0.731072
9 9.786835 9.877230 0.252563
10 9.869698 9.953180 -0.074785
11 9.952553 9.773722 -0.264768
12 9.787621 9.454708 -0.361261
13 9.111461 9.069461 -0.402502
14 8.353707 8.659642 -0.413579

Titer

fit_titer = curve_fitter()
fit_titer.ingest_data(expt_data['Titer'].reset_index(), x_col='Time (WD)')
Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']
titer_col = 'Titer (g/L)'
fit_titer.fit_jupyter(titer_col)
smoothed_data['Titer']['fit_'+titer_col] = fit_titer.current_stats['fitted'].copy()
smoothed_data['Titer']['diff_'+titer_col] = fit_titer.current_stats['deriv'].copy()
display(smoothed_data['Titer'])
Titer (g/L) fit_Titer (g/L) diff_Titer (g/L)
Time (WD)
1 0.038894 0.000785 0.002834
2 0.062556 0.009576 0.017251
3 0.100022 0.041069 0.048891
4 0.163121 0.113737 0.099474
5 0.269600 0.245077 0.165003
6 0.429320 0.445240 0.234872
7 0.660026 0.711372 0.294462
8 1.038621 1.026431 0.331127
9 1.348201 1.364166 0.339716
10 1.699189 1.697561 0.323568
11 2.026516 2.005844 0.291056
12 2.340039 2.277226 0.251123
13 2.440604 2.507908 0.210547
14 2.706803 2.699458 0.173308

Glycoform Concentrations

smoothed_data['Fit_Concentrations'] = (pd.DataFrame(0, index=retrieve_timepoints, columns=smoothed_data['Concentrations'].columns).
                                       rename_axis(index='Time (WD)'))
smoothed_data['Diff_Concentrations'] = (pd.DataFrame(0, index=retrieve_timepoints, columns=smoothed_data['Concentrations'].columns).
                                        rename_axis(index='Time (WD)'))
fit_fracs = curve_fitter()
fit_fracs.ingest_data(expt_data['Concentrations'].reset_index(), x_col='Time (WD)')
Choose a model from below keys or "Custom:"
['Polynomial', 'Exponential', 'Power', 'Logarithmic', 'Fourier', 'Gaussian', 'Weibull', 'Hill-type', 'Sigmoidal']

Man7

frac_col = 'Man7'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
8.77715414835226e-8*exp(-1.04979682849682*x)/(2.51606683299321e-5 + exp(-1.04979682849682*x))**2

Man6

frac_col = 'Man6'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
1.53674532320441e-5/(x**3.63015880132957*(x**(-2.63015880132957) + 0.000730149475676811)**2)

Man5

frac_col = 'Man5'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
7.82512472492164e-5/(x**3.57898807718615*(x**(-2.57898807718615) + 1.48099279331054e-5)**2)

G0-GlcNAc

frac_col = 'G0-GlcNAc'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
3.20970296762035e-6/(x**3.9857986207357*(x**(-2.9857986207357) - 8.91079207537524e-5)**2)

G0F

frac_col = 'G0F'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
0.00106965824298514/(x**4.9728043624615*(x**(-3.9728043624615) + 9.42588445419789e-5)**2)

G1a/b

frac_col = 'G1a/b'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
1.81526914872763e-6*(17.2297568100532*exp(-0.514316349820139*x) + 0.0374374429383844*exp(-0.0374374429383844*x))/(exp(-0.514316349820139*x) + 0.0298504706415847*exp(-0.0374374429383844*x))**2

G1Fa/b

frac_col = 'G1Fa/b'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
0.00238522708675479/(x**4.03582275597052*(x**(-3.03582275597052) + 0.00176704968125774)**2)

G0

frac_col = 'G0'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
9.41441888399936e-5/(x**4.140653237447*(x**(-3.140653237447) + 0.000437837528509634)**2)

G0F-GlcNAc

frac_col = 'G0F-GlcNAc'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
1.38019843466825e-5/(x**4.28619872306324*(x**(-3.28619872306324) + 4.59783223097008e-5)**2)

G0F+GlcNAc

frac_col = 'G0F+GlcNAc'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
2.51813290846332e-5*exp(-0.504806299908199*x)/(0.00380914878275068 + exp(-0.504806299908199*x))**2

G2F

frac_col = 'G2F'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
0.000585155264255064/(x**3.59120437541022*(x**(-2.59120437541022) + 0.00854648647909327)**2)

A1G1F

frac_col = 'A1G1F'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
1.04061189355436e-5/(x**4.70728372947352*(x**(-3.70728372947352) + 0.000396022275776027)**2)

A1G2F

frac_col = 'A1G2F'
fit_fracs.fit_jupyter(frac_col)
smoothed_data['Fit_Concentrations'][frac_col] = fit_fracs.get_fitmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
smoothed_data['Diff_Concentrations'][frac_col] = fit_fracs.get_diffmodel(fit_fracs.current_stats['params'])(retrieve_timepoints)
0.000116773396805464/(x**3.22003059056402*(x**(-2.22003059056402) + 0.00621613842908323)**2)

Summary

display(smoothed_data['VCD'])
VCD (1E6 VC/mL) fit_VCD (1E6 VC/mL) diff_VCD (1E6 VC/mL)
Time (WD)
1 0.784138 0.812027 0.435732
2 1.265309 1.374260 0.705857
3 2.100869 2.256844 1.073020
4 3.601440 3.532264 1.472161
5 5.214883 5.156431 1.735699
6 7.192178 6.893591 1.675505
7 8.165785 8.391412 1.275581
8 9.280572 9.396539 0.731072
9 9.786835 9.877230 0.252563
10 9.869698 9.953180 -0.074785
11 9.952553 9.773722 -0.264768
12 9.787621 9.454708 -0.361261
13 9.111461 9.069461 -0.402502
14 8.353707 8.659642 -0.413579
display(smoothed_data['Titer'])
Titer (g/L) fit_Titer (g/L) diff_Titer (g/L)
Time (WD)
1 0.038894 0.000785 0.002834
2 0.062556 0.009576 0.017251
3 0.100022 0.041069 0.048891
4 0.163121 0.113737 0.099474
5 0.269600 0.245077 0.165003
6 0.429320 0.445240 0.234872
7 0.660026 0.711372 0.294462
8 1.038621 1.026431 0.331127
9 1.348201 1.364166 0.339716
10 1.699189 1.697561 0.323568
11 2.026516 2.005844 0.291056
12 2.340039 2.277226 0.251123
13 2.440604 2.507908 0.210547
14 2.706803 2.699458 0.173308
display(smoothed_data['Fractions'])
A1G1F A1G2F G0 G0F G0F+GlcNAc G0F-GlcNAc G0-GlcNAc G1a/b G2F Man5 Man6 Man7 G1Fa/b
Time (WD)
5 0.003273 0.006889 0.017714 0.596477 0.001061 0.003714 0.000694 0.002173 0.035255 0.008395 0.001183 0.000395 0.319791
8 0.003273 0.002323 0.014989 0.737378 0.002281 0.003429 0.000488 0.001561 0.016277 0.006052 0.001187 -0.000018 0.208716
10 0.002805 0.002952 0.015032 0.778474 0.002989 0.004449 0.000684 0.001322 0.012044 0.006597 0.001141 0.001089 0.170732
12 0.002338 0.002234 0.015394 0.799511 0.003371 0.005429 0.000900 0.001249 0.009708 0.007928 0.001109 0.001096 0.149998
14 0.002338 0.002062 0.015970 0.809785 0.003985 0.007143 0.001373 0.001215 0.008540 0.009961 0.001285 0.001295 0.136380
display(smoothed_data['Concentrations'])
A1G1F A1G2F G0 G0F G0F+GlcNAc G0F-GlcNAc G0-GlcNAc G1a/b G2F Man5 Man6 Man7 G1Fa/b
Time (WD)
5 0.000882 0.001857 0.004776 0.160811 0.000286 0.001001 0.000187 0.000586 0.009505 0.002263 0.000319 0.000106 0.086216
8 0.003399 0.002413 0.015568 0.765856 0.002369 0.003561 0.000507 0.001621 0.016906 0.006285 0.001233 -0.000019 0.216777
10 0.004767 0.005016 0.025542 1.322774 0.005079 0.007560 0.001162 0.002246 0.020465 0.011210 0.001940 0.001850 0.290106
12 0.005470 0.005227 0.036023 1.870887 0.007887 0.012703 0.002105 0.002923 0.022717 0.018551 0.002594 0.002564 0.351002
14 0.006328 0.005583 0.043227 2.191928 0.010787 0.019334 0.003716 0.003288 0.023116 0.026962 0.003477 0.003504 0.369154
display(smoothed_data['Fit_Concentrations'])
A1G1F A1G2F G0 G0F G0F+GlcNAc G0F-GlcNAc G0-GlcNAc G1a/b G2F Man5 Man6 Man7 G1Fa/b
1 0.000003 0.000052 0.000030 0.000269 0.000082 0.000004 0.000001 0.000097 0.000224 0.000030 0.000006 2.388571e-07 0.000784
2 0.000036 0.000238 0.000263 0.004221 0.000135 0.000041 0.000009 0.000158 0.001294 0.000181 0.000036 6.823406e-07 0.006352
3 0.000161 0.000563 0.000932 0.021011 0.000223 0.000155 0.000029 0.000253 0.003392 0.000516 0.000104 1.948752e-06 0.021022
4 0.000449 0.001006 0.002255 0.064869 0.000365 0.000398 0.000068 0.000396 0.006258 0.001083 0.000218 5.561665e-06 0.047231
5 0.000949 0.001534 0.004397 0.152474 0.000594 0.000825 0.000133 0.000601 0.009412 0.001924 0.000383 1.584083e-05 0.084312
6 0.001651 0.002109 0.007427 0.297708 0.000956 0.001490 0.000231 0.000875 0.012424 0.003078 0.000602 4.486123e-05 0.128615
7 0.002479 0.002695 0.011289 0.504784 0.001511 0.002447 0.000370 0.001209 0.015049 0.004577 0.000870 1.250369e-04 0.175136
8 0.003323 0.003266 0.015813 0.763591 0.002327 0.003740 0.000559 0.001581 0.017217 0.006453 0.001182 3.339065e-04 0.219469
9 0.004092 0.003804 0.020746 1.051493 0.003453 0.005403 0.000811 0.001957 0.018954 0.008733 0.001529 8.039687e-04 0.258881
10 0.004740 0.004297 0.025815 1.341389 0.004876 0.007455 0.001139 0.002306 0.020327 0.011445 0.001901 1.584894e-03 0.292310
11 0.005258 0.004742 0.030774 1.610677 0.006491 0.009900 0.001562 0.002614 0.021407 0.014611 0.002288 2.401272e-03 0.319840
12 0.005661 0.005139 0.035439 1.845968 0.008112 0.012721 0.002106 0.002877 0.022260 0.018254 0.002679 2.929413e-03 0.342132
13 0.005970 0.005490 0.039695 2.042813 0.009553 0.015883 0.002807 0.003103 0.022937 0.022393 0.003066 3.173732e-03 0.360037
14 0.006205 0.005799 0.043490 2.202836 0.010701 0.019336 0.003717 0.003301 0.023478 0.027047 0.003442 3.269164e-03 0.374388
display(smoothed_data['Diff_Concentrations'])
A1G1F A1G2F G0 G0F G0F+GlcNAc G0F-GlcNAc G0-GlcNAc G1a/b G2F Man5 Man6 Man7 G1Fa/b
1 0.000010 0.000115 0.000094 0.001069 0.000041 0.000014 0.000003 0.000048 0.000575 0.000078 0.000015 2.507334e-07 0.002377
2 0.000067 0.000257 0.000412 0.008373 0.000068 0.000067 0.000013 0.000076 0.001595 0.000234 0.000047 7.161719e-07 0.009503
3 0.000195 0.000389 0.000962 0.027620 0.000111 0.000170 0.000029 0.000117 0.002554 0.000443 0.000090 2.044594e-06 0.020267
4 0.000389 0.000492 0.001712 0.062965 0.000179 0.000326 0.000051 0.000172 0.003094 0.000698 0.000139 5.828847e-06 0.032039
5 0.000609 0.000558 0.002585 0.114683 0.000286 0.000537 0.000080 0.000239 0.003140 0.000992 0.000192 1.655038e-05 0.041484
6 0.000783 0.000586 0.003466 0.176578 0.000447 0.000803 0.000117 0.000307 0.002843 0.001321 0.000244 4.645938e-05 0.046252
7 0.000854 0.000583 0.004230 0.235860 0.000675 0.001118 0.000163 0.000359 0.002398 0.001682 0.000291 1.263242e-04 0.046037
8 0.000818 0.000557 0.004774 0.277831 0.000966 0.001473 0.000218 0.000379 0.001943 0.002074 0.000331 3.153108e-04 0.042175
9 0.000712 0.000516 0.005046 0.293292 0.001283 0.001856 0.000287 0.000366 0.001543 0.002492 0.000361 6.398031e-04 0.036481
10 0.000582 0.000470 0.005051 0.282654 0.001545 0.002250 0.000372 0.000330 0.001215 0.002935 0.000381 8.702574e-04 0.030401
11 0.000457 0.000421 0.004837 0.253702 0.001653 0.002637 0.000479 0.000285 0.000957 0.003401 0.000391 6.992117e-04 0.024775
12 0.000352 0.000373 0.004474 0.216192 0.001558 0.002999 0.000616 0.000243 0.000757 0.003888 0.000391 3.642221e-04 0.019954
13 0.000269 0.000329 0.004030 0.177821 0.001304 0.003317 0.000795 0.000210 0.000603 0.004394 0.000383 1.496321e-04 0.015997
14 0.000205 0.000289 0.003559 0.143035 0.000988 0.003578 0.001037 0.000187 0.000484 0.004917 0.000369 5.556953e-05 0.012826

Write to excel

with pd.ExcelWriter(os.path.join(inputDir, 'Lee et al', 'medpH_processed.xlsx')) as writer:
    for k, v in smoothed_data.items():
        v.to_excel(writer, sheet_name=k)