Estimation

First, let’s select the cells that undergo a switch (i.e. the first frame occured before a switch to lactose, and the last frame after (or at) the switch). We “prolonge” each cell by summing the variables (length and total GFP) of its two daughters, and discard cells for which the acquisition stopped less than 30’ after the switch (10 points; it is difficult to distinguish growth from lag on shorter traces).

Based on the log of the length traces, the growth lag is computed by fitting a flat lag followed by a linear increase to the subset of each trace where the length is smaller than 3 times the length at the switch (note that the cells do more than doubling because daughters’ lengths are summed).

Estimating the induction lag is more cumbersome because GFP traces do not follow a general shape. We compute two lag estimates:

  • the GFP lag is computed by fitting a flat lag followed by a linear increase to the subset of each GFP trace where it is lower than the GFP at the switch plus 1000 GFP molecules (NB: the typical value at the switch is 100 molecules while fully induced cells have ca. 6000 molecules in average).
  • the lag_200 is computed by finding the delay until the cell increases its GFP by 200 molecules after the switch.
myframes_switching <- myframes %>% 
  ungroup %>% 
  filter(!(condition %in% c('glucose', 'lactose', 'mg1655'))) %>% 
  filter(!discard_start, !discard_top) %>%
  select(condition, date, pos, gl, id, genealogy, start_time, end_time, end_type, dt,
         time_sec, medium, m_start, m_end, gfp_nb, length_um, m_cycle, cell_num_in_lane, total_cell_in_lane) %>% 
  # prolonge traces with daughters' data
  partition(date, pos, gl, cluster=mycluster) %>%
  # group_by(date, pos, gl) %>%
  do((function(.dfgl) {
    # browser()
    .dfgl %>% 
      group_by(id) %>% 
      mutate(is_lac=str_detect(medium, 'lactose') | str_detect(medium, 'lactulose') | 
               str_detect(medium, fixed('+TMG')) | str_detect(medium, 'lacCM'), 
             t_lac_switch=min(m_start[is_lac]), t_lac_end=min(m_end[is_lac])) %>% 
      filter(first(time_sec)<=t_lac_switch & last(time_sec)>=unique(t_lac_switch)) %>% 
      do((function(.dfc) bind_rows(
        mutate(.dfc, type='cell'),
        .dfc %>% 
          ungroup %>% slice(1L) %>% # head(1) %>% 
          (function(.df)
            # find the genealogy of the daughters and extract the corresponding rows from myframes
           bind_rows(mutate(.df, join_gen=paste0(genealogy, 'B')),
                     mutate(.df, join_gen=paste0(genealogy, 'T')) ) %>% 
            semi_join(.dfgl %>% mutate(join_gen=genealogy),
                      ., by='join_gen') ) %>%
         ungroup %>% mutate(genealogy=get_parent_cid(genealogy)) %>% 
         select(condition, date, pos, gl, id, genealogy, dt,
                time_sec, medium, m_cycle, m_start, m_end, gfp_nb, length_um) %>% 
         # group by time, compute the stat of both daughters' and keep only the first row
         # compared to summarise, this keeps all other variables
         group_by(genealogy, time_sec) %>% 
         mutate(gfp_nb=ifelse(n()==2, sum(gfp_nb), 2*gfp_nb), 
                length_um=ifelse(n()==2, sum(length_um), 2*length_um), 
                # medium=unique(medium), m_cycle=unique(m_cycle),
                type=paste0(n(), "dg")) %>%
         slice(1L) 
     ) )(.))
   })(.) ) %>% 
  collect() %>% 
# myframes_switching <- myframes_switching %>% 
  # fill in missing values
  group_by(date, pos, gl, genealogy) %>%
  fill(start_time, end_time, t_lac_switch, t_lac_end) %>% 
  mutate(ugen=paste(date, pos, gl, genealogy, sep='.'),
         pre_switch=time_sec>t_lac_switch+60 & time_sec<t_lac_switch+10*60) %>% 
  # select data used for lag estimation
  group_by(condition, ugen) %>%
  mutate(switch_idx=first(m_cycle),
         fit_lag=ifelse(
    type!='1dg' & # discard traces after one daughter is lost (because of discontinuity)
      tail(time_sec[type!='1dg']-t_lac_switch[type!='1dg'], 1) > first(4 * dt) & # require at least 4 frames after the switch (one point after the gfp_ini window)
      m_start==t_lac_switch # focus on the medium after the switch
    , TRUE, FALSE) )

Let’s visualise the data used to estimate the lags:

myframes_switching %>% 
  filter(fit_lag) %>% 
  filter(condition!='switch_long_lac_hiExpr') %>%
  ggplot(aes(time_sec - 2*3600, gfp_nb, col=ugen, alpha=type)) +
  facet_wrap(~condition+date, dir="v", ncol=2, labeller=labeller(.cols=label_value, .multi_line=FALSE)) +
  # geom_rect(aes(xmin=t_start - 2*3600, xmax=t_end - 2*3600, ymin=-Inf, ymax=Inf, x=NaN, y=NaN, group=1), fill=rgb(1, 0, 0, .1), col='red', data=filter(condition_ts, condition=='switch', medium=='lactose')) +
  geom_path() + # geom_point() +
  scale_colour_periodic(guide='none') +
  scale_alpha_discrete(range=c(0.1, 0.6)) +
  scale_x_hours() + #, limits=c(19*3600, 23*3600)) +
  ylim(-100, 1e4) +   expand_limits(x=0, y=0) +
  labs(y='total fluorescence (GFP molecules)') +
  theme(legend.position="top")

# myframes_switching %>% 
#   filter(condition=='switch_long_lac_hiExpr') %>% 
#   filter(fit_lag) %>% 
#   # group_by(date, pos, gl, id) %>% summarise(n=n()) %>% qplot(n, data=., binwidth=10)
# ggplot(aes(time_sec - 2*3600, length_um, col=ugen)) +
#   facet_wrap(~condition, ncol=1) +
#   # geom_rect(aes(xmin=t_start - 2*3600, xmax=t_end - 2*3600, ymin=-Inf, ymax=Inf, x=NaN, y=NaN, group=1), fill=rgb(1, 0, 0, .1), col='red', data=filter(condition_ts, condition=='switch', medium=='lactose')) +
#   # geom_path() + # geom_point() +
#   stat_smooth(se=FALSE, geom='line', alpha=.2) +
#   scale_colour_periodic(guide='none') +
#   # scale_alpha_discrete(range=c(0.1, 0.6)) +
#   scale_x_hours(limits=c(3*3600, 12*3600)) +
#   scale_y_continuous(trans='log2')
mycells_switching <- myframes_switching %>%
  filter(fit_lag) %>% 
  # group_by(condition, switch_idx, date, pos, gl, ugen) %>%
  partition(condition, switch_idx, date, pos, gl, ugen, cluster=mycluster %>% cluster_assign_func(compute_lag) ) %>%
  do((function(.df){
    # browser()
    # print(unique(.df$ugen))
    # if (unique(.df$ugen)=="20150703.0.11.0:BBBBTBB") browser()
    # if (unique(.df$ugen)=="20150703.3.11.0:BBTBBB") browser()
    
    .cond <- head(.df$condition)
    .preswitch_gfp <- mean(filter(.df, pre_switch)$gfp_nb)
    .preswitch_length <- mean(filter(.df, pre_switch)$length_um)
    # compute growth lag
    .growth_lag <- .df %>% filter(time_sec>=t_lac_switch, length_um<3*.preswitch_length) %>%
      pull(length_um) %>% log %>% compute_lag(.scaling_factor=1) #,.plot=TRUE)
    if (length(.growth_lag) == 0) { warning('growth lag estimation failed')
      .growth_lag <- list(tau_idx=NaN, x_lag=NaN, slope_after=NaN)
    }

    if (.df$condition %in% c('switch_long_lac_hiExpr')) {
      .gfp_lag <- list(tau_idx=NaN, x_lag=NaN, slope_after=NaN)
      .t_200 <- NaN
      .t_200_fail <- NaN
      .t_1000 <- NaN
      .t_1000_fail <- NaN
    } else {
    .gfp_lag <- .df %>% filter(time_sec>=t_lac_switch, gfp_nb<.preswitch_gfp+ifelse(.cond!='switch_iptg', 1000, 2000)) %>%
      pull(gfp_nb) %>% compute_lag #(.plot=TRUE)
    if (length(.gfp_lag) == 0)
      .gfp_lag <- list(tau_idx=NaN, x_lag=NaN, slope_after=NaN)

    # compute induction lag
    .tmin <- filter(.df, pre_switch)$time_sec %>% max
    .t_200 <- .df %>% filter(time_sec>.tmin, gfp_nb>.preswitch_gfp+200) %>% pull(time_sec) %>% first() # returns NA if empty
    .t_200_fail <- .df %>% filter(time_sec>.t_200, gfp_nb<.preswitch_gfp+200) %>% pull(time_sec) %>% first()
    .t_1000 <- .df %>% filter(time_sec>.tmin, gfp_nb>.preswitch_gfp+1000) %>% pull(time_sec) %>% first() # returns NA if empty
    .t_1000_fail <- .df %>% filter(time_sec>.t_1000, gfp_nb<.preswitch_gfp+1000) %>% pull(time_sec) %>% first()
    }
    
    .dt <- first(.df$dt) # don't use unique as some values are NA
    return(data.frame(dt=.dt, n_fit=nrow(.df), genealogy=gsub(":", "", unique(.df$genealogy)), 
                      t_lac_switch=first(.df$t_lac_switch), t_lac_end=first(.df$t_lac_end),
                      tmax_lag_fit=max(.df$time_sec), is_tmax_end=max(.df$time_sec)==first(.df$t_lac_end),
                      cell_num=filter(.df, time_sec==t_lac_switch)$cell_num_in_lane,
                      cell_num_from_top=filter(.df, time_sec==t_lac_switch)$cell_num_in_lane, 
                      cell_num_from_bottom=filter(.df, time_sec==t_lac_switch)$total_cell_in_lane-filter(.df, time_sec==t_lac_switch)$cell_num_in_lane,
                      # switch_idx=filter(.df, is_lac)$m_cycle %>% first, 
                      time_birth=unique(.df$start_time), time_div=unique(.df$end_time),
                      growth_lag=(.growth_lag$tau_idx-1)*.dt, growth_lag_length=exp(.growth_lag$x_lag), growth_lag_rate_after=.growth_lag$slope_after / .dt,
                      gfp_lag=(.gfp_lag$tau_idx-1)*.dt, gfp_lag_length=.gfp_lag$x_lag, gfp_lag_slope_after=.gfp_lag$slope_after / .dt,
                      gfp_ini=.preswitch_gfp, length_ini=.preswitch_length, 
                      lac_200=.t_200, lac_200_fail=.t_200_fail,
                      lac_1000=.t_1000, lac_1000_fail=.t_1000_fail))
    })(.)) %>%
  collect() %>% 
  arrange(condition, ugen) %>%  # sort data after `partition()`
  # mycells_switching <- mycells_switching %>% 
  mutate(lag_200=ifelse(is.na(lac_200) & !is.nan(lac_200), Inf, lac_200-t_lac_switch), # set lag_200 to Inf if no restart (is `& !is.nan(lac_200)` really needed?)
         lag_1000=ifelse(is.na(lac_1000) & !is.nan(lac_1000), Inf, lac_1000-t_lac_switch)) %>%  
  # # more complicated heuristic (dropped during MoMA_Methods revisions)
  # mutate(lac_200=ifelse(is.na(lac_200), Inf, lac_200), # set lag_200 to Inf if no restart
  #        lag_200 = ifelse(is.na(lac_200_fail) | lac_200_fail-lac_200 > 20*60, lac_200-t_lac_switch, NaN) ) %>%
# discard lag fit with slope too close to 0
  mutate(gfp_lag=ifelse(!is.na(gfp_lag_slope_after) & between(gfp_lag_slope_after, 0.12, Inf), gfp_lag, Inf),
         gfp_lag_slope_after=ifelse(!is.na(gfp_lag_slope_after) & between(gfp_lag_slope_after, 0.12, NaN), gfp_lag_slope_after, NaN),
         growth_lag=ifelse(!is.na(growth_lag_rate_after) & between(growth_lag_rate_after, 4e-5, 2.5e-4), growth_lag, Inf),
         growth_lag_rate_after=ifelse(!is.na(growth_lag_rate_after) & between(growth_lag_rate_after, 4e-5, 2.5e-4), growth_lag_rate_after, NaN)) %>% 
  # set to NA lags of uninduced cells with traces shorter than the lactose episode
  mutate(reach_lac_end = ifelse(!is_tmax_end, near(tmax_lag_fit, t_lac_end-dt, to=1e-3), 
                                near(tmax_lag_fit, t_lac_end, to=1e-3)) ) %>% 
  mutate(lag_200=ifelse(is.infinite(lag_200) & !reach_lac_end, NA, lag_200), 
             lag_1000=ifelse(is.infinite(lag_1000) & !reach_lac_end, NA, lag_1000), 
             gfp_lag=ifelse(is.infinite(gfp_lag) & !reach_lac_end, NA, gfp_lag),
         growth_lag=ifelse(is.infinite(growth_lag) & !reach_lac_end, NA, growth_lag) )

# ggplot(mycells_switching) +
#   facet_grid(condition~., margins=TRUE, scales='free') +
#   geom_vline(xintercept=0.12, col='red') +
#   geom_histogram(aes(gfp_lag_slope_after), binwidth=0.025)
# 
# ggplot(mycells_switching) +
#   facet_grid(condition~., margins=TRUE, scales='free') +
#   geom_vline(xintercept=4e-5, col='red') +
#   geom_histogram(aes(growth_lag_rate_after), binwidth=2e-6)

  
# add fit prediction and residual to myframes_switching
myframes_switching <- myframes_switching %>% 
  group_by(ugen) %>% 
  mutate(switch_idx=m_cycle[is_lac] %>% first) %>% 
  ungroup %>% 
  left_join(mycells_switching %>% ungroup %>% 
              select(ugen, growth_lag, growth_lag_length, growth_lag_rate_after, 
                     gfp_lag, gfp_lag_length, gfp_lag_slope_after,
                     gfp_ini, lag_200, lac_200, lag_1000, lac_1000)) %>%
  mutate(length_predict=ifelse(time_sec<t_lac_switch+growth_lag, growth_lag_length,
    growth_lag_length * exp(growth_lag_rate_after*(time_sec-t_lac_switch-growth_lag) )),
         length_predict=ifelse(time_sec>=t_lac_switch, length_predict, NA),
         llength_residual=log(length_um)-log(length_predict))

# # visual control
# for(i in 1:1000) {
# filter(myframes_switching, cell==nth(unique(ugen), i)) %>%
# filter(myframes_switching, ugen==unique(ugen)[i]) %>%
#   (function(.x) ggplot(.x)+
#   geom_vline(aes(xintercept = unique(t_lac_switch-start_time+growth_lag)), lty='dashed') +
#   geom_point(aes(time_sec-start_time, log(length_um), col=medium)) +
#   geom_line(aes(time_sec-start_time, log(length_predict))) ) %>%
#     print
# scan("")
# }

We use the following convention:

  • NaN: lag estimation failed (or no lac-GFP data)
  • NA: lag longer than the gfp (resp. length) time series (when time series is shorter than the lactose episode)
  • Inf: lag longer than the lactose episode
mycells_switching <- 
  # extract relevant frames
  # don't discard NA lags otherwise filter(!discard_arrested) also discard NA lags
  bind_rows(
    # switching cells
    mycells_switching %>% ungroup %>% 
      # filter(!is.na(lag_200)) %>% 
      mutate(type='pre', switch_ugen=ugen) %>% 
      select(type, ugen, switch_ugen, switch_idx, t_lac_switch) %>% 
      left_join(myframes) %>% 
      filter(time_sec <= t_lac_switch),
    # parent cells
    mycells_switching %>% ungroup %>% 
      # filter(!is.na(lag_200)) %>% 
      left_join(select(myframes, date, gl, pos, id, parent_id, ugen) %>% group_by(ugen) %>% slice(1)) %>% 
      mutate(type='parent', switch_ugen=ugen, 
             # cell=paste(date, pos, gl, parent_id, sep='.')) %>% 
             ugen=get_parent_cid(ugen) ) %>% 
      select(type, ugen, switch_ugen, switch_idx, t_lac_switch) %>% 
      left_join(myframes) %>% 
      filter(time_sec <= t_lac_switch)
  ) %>% 
  # compute elongation rate for these cells
  group_by(type, ugen, switch_ugen) %>% 
  do((function(.df) {
    # browser()
    if (dim(.df)[1] < 5) return(data.frame())
    
    .mod <- lm( log(length_um)~time_sec, .df)
    data.frame(logl_time_slope=.mod$coefficients[2],
               logl_time_r2=cor(.df$time_sec, log(.df$length_um))^2)
  })(.)) %>% 
  # reshape and join to mycells_switching
  ungroup %>% 
  mutate(ugen=switch_ugen, switch_ugen=NULL) %>% 
  gather(var, val, -type, -ugen) %>% 
  mutate(var=paste(var, type, sep='_'), type=NULL) %>% 
  spread(var, val) %>% 
  right_join(mycells_switching) %>% 
  mutate(n_preswitch=(t_lac_switch-time_birth)/dt,
         logl_time_slope_before=ifelse(n_preswitch<5, 
                                       logl_time_slope_parent, logl_time_slope_pre),
         discard_arrested=ifelse(condition=='switch_gly_lac', NA, logl_time_slope_before <= min_growth_rate),
         discard_arrested_=discard_arrested,
         discard_arrested=FALSE, # disable this filtering to keep analysis description simple
         )
  

# ggplot(mycells_switching) +
#   geom_abline() +
#   geom_point(aes(logl_time_slope_parent, logl_time_slope_pre, size=n_preswitch), alpha=0.2) +
#   coord_cartesian(ylim=c(0, 3e-4))

# ggplot(mycells_switching) +
#   geom_point(aes(n_preswitch, 1-logl_time_r2_pre), size=0.2, alpha=0.2) +
#   scale_y_log10() +
#   xlim(0, 50)

Validation of the lag estimation

Let’s look at the proportion of cells for which a lag can be estimated and identify the different types of indetermination (based on the interaction between the different estimates):

options(knitr.kable.NA = '')
mycells_switching %>% 
  filter(! condition %in% c('switch_long_lac_hiExpr')) %>%  # lag_200 is NaN
  mutate(growth_lag=as.character(growth_lag), growth_lag=ifelse(is.na(growth_lag), 'NA', growth_lag),
         growth_lag=ifelse(growth_lag %in% c('NaN', 'NA', 'Inf'), growth_lag, 'numeric'),
         gfp_lag=as.character(gfp_lag), gfp_lag=ifelse(is.na(gfp_lag), 'NA', gfp_lag),
         gfp_lag=ifelse(gfp_lag %in% c('NaN', 'NA', 'Inf'), gfp_lag, 'numeric'),
         lag_200=as.character(lag_200), lag_200=ifelse(is.na(lag_200), 'NA', lag_200),
         lag_200=ifelse(lag_200 %in% c('NaN', 'NA', 'Inf'), lag_200, 'numeric')) %>% 
  group_by(growth_lag, gfp_lag, lag_200) %>% 
  summarise(n=n()) %>% 
  ungroup %>% mutate(p=n/sum(n)) %>% 
  left_join(tribble(
    ~growth_lag, ~gfp_lag , ~lag_200 , ~comment,
    #----------,----------,----------,----------------------,
    "Inf"      , "Inf"    , "Inf"    , "no restart",
    "Inf"      , "Inf"    , "NA"     , "no restart but wiggly fluo",
    "Inf"      , "Inf"    , "numeric", "no restart (with minor fluo increase)",
    "Inf"      , "numeric", "Inf"    , "fluo only restart but lower than 200",
    "Inf"      , "numeric", "NA"     , "fluo only restart but wiggly",
    "Inf"      , "numeric", "numeric", "fluo restart but no growth",
    "NA"       , "numeric", "numeric", "fluo restart but no growth",
    "NA"       , "NA"     , "NA"     , "aborted time series (cell exit)",
    "numeric"  , "NA"     , "NA"     , "growth restart but not fluo",
    "numeric"  , "Inf"    , "Inf"    , "growth restart but not fluo",
    "numeric"  , "Inf"    , "numeric", "(missed gfp_lag?)",
    "numeric"  , "numeric", "NA"     , "(fluo either lower than 200 or wiggly)",
    "numeric"  , "numeric", "numeric", "success: growth and induction lags estimated"
  )) %>% 
  knitr::kable(digits=2) %>% 
  kableExtra::kable_styling() %>%
  identity()
growth_lag gfp_lag lag_200 n p comment
Inf Inf Inf 140 0.02 no restart
Inf Inf numeric 169 0.02 no restart (with minor fluo increase)
Inf numeric Inf 3 0.00 fluo only restart but lower than 200
Inf numeric numeric 112 0.01 fluo restart but no growth
NA NA NA 341 0.04 aborted time series (cell exit)
NA NA numeric 68 0.01
NA numeric NA 83 0.01
NA numeric numeric 332 0.04 fluo restart but no growth
numeric Inf Inf 21 0.00 growth restart but not fluo
numeric Inf numeric 9 0.00 (missed gfp_lag?)
numeric NA NA 27 0.00 growth restart but not fluo
numeric NA numeric 24 0.00
numeric numeric NA 15 0.00 (fluo either lower than 200 or wiggly)
numeric numeric numeric 7524 0.85 success: growth and induction lags estimated
options(knitr.kable.NA = NULL)

For the growth lag, the residuals around the switch show less than 2% deviation from our model in average. In order to understand the distribution of residuals, it must be stratified by time: the residuals are the largest at the switch and decrease monotonically until the end of the lag exponential fit. During the post-lag exponential growth, the residuals continue to decrease.

myframes_switching %>% 
  filter(is.finite(growth_lag)) %>% 
  mutate(t_switch=time_sec-t_lac_switch-growth_lag, rel_resid=llength_residual/log(length_predict)) %>% 
  group_by(t_switch) %>% 
  summarise(mean=mean(rel_resid, na.rm=TRUE), n=n(), sem=sd(rel_resid, na.rm=TRUE)/sqrt(n)) %>%
  ungroup() %>% filter(n>max(n)/20) %>% 
ggplot(aes(t_switch/60, mean, alpha=n)) +
  geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem)) +
  geom_line() +
  coord_cartesian(xlim=c(-100, 150)) +
  labs(x='time after switch (min)', y='scaled residuals average')

Comparison of lag types

ggplot(data=mycells_switching, aes(lag_200/60, gfp_lag/60)) +
  # facet_grid(switch_idx~condition, scales='free', margins=TRUE) +
  facet_wrap(~condition+switch_idx, scales='free') +
  geom_abline(col='red', alpha=0.5) +
  geom_point(alpha=.2, size=.2) +
  labs(x='induction lag (+200 GFP molecules; min)', y='GFP lag (min)') +
  expand_limits(x=0, y=0)

ggplot(data=mycells_switching, aes(growth_lag/60, gfp_lag/60)) +
  # facet_grid(switch_idx~condition, scales='free') + #, margins=TRUE) +
  facet_wrap(~condition+switch_idx, scales='free') +
  geom_abline(col='red', alpha=0.5) +
  geom_point(alpha=.2, size=.2) +
  labs(x='growth lag (min)', y='GFP lag (min)') +
  expand_limits(x=0, y=0) 

ggplot(data=mycells_switching, aes(growth_lag/60, lag_200/60)) +
  # facet_grid(switch_idx~condition, scales='free', margins=TRUE) +
  facet_wrap(~condition+switch_idx, scales='free') +
  geom_abline(col='red', alpha=0.5) +
  geom_point(alpha=.2, size=.2) +
  expand_limits(x=0, y=0) 

NB: the lower bound of the GFP lags probably comes from GFP maturation time.

(myplots[['lags_types_correl']] <- function(.min_psize=.5, .max_psize=3)
  mycells_switching %>% ungroup() %>% 
   filter(!date %in% discarded_dates) %>%
   filter(!discard_arrested) %>% 
   filter(str_detect(condition, '^switch_[0-9]+h$')) %>%
   filter(switch_idx==1) %>%
   mutate(growth_lag=ifelse(growth_lag>240*60, Inf, growth_lag) ) %>% 
   group_by(growth_lag, lag_200) %>% 
   summarise(n=n()) %>% ungroup %>% 
   mutate(infinite=ifelse(is.infinite(growth_lag) | is.infinite(lag_200), TRUE, FALSE),
          growth_lag=ifelse(is.infinite(growth_lag), 240*60, growth_lag),
          lag_200=ifelse(is.infinite(lag_200), 240*60, lag_200)) %>% 
   ( function(.df)
     ggplot(.df, aes(lag_200/60, growth_lag/60, size=n)) +
       geom_abline(alpha=0.9, size=.25) +
       geom_point(stroke=0) +
       labs(x=lac_lags_label, y='growth lag (min)') +
       scale_size_continuous(name='nb of obs.', range=c(.min_psize, .max_psize), breaks=c(5, 20, 80)) +
       # scale_shape_manual(breaks=c(TRUE, FALSE), values=c(2, 1)) +
       expand_limits(x=0, y=0) +
       scale_x_continuous(breaks=seq(0, 200, 50)) + scale_y_continuous(breaks=seq(0, 200, 50)) +
       theme(legend.position=c(1, .01), legend.justification=c(0.99, 0), legend.direction='horizontal') +
       NULL
   )
)()

 # ggsave('plots/BZSposter2017/gfp_growth_lags.pdf', width=180/25.4/2, height=100/25.4/2)

mycells_switching %>% ungroup() %>%
  filter(!date %in% discarded_dates) %>%
  filter(!discard_arrested) %>%
  filter(str_detect(condition, '^switch_[0-9]+h$')) %>%
  filter(switch_idx==1) %>%
  mutate(growth_lag=ifelse(growth_lag>240*60, Inf, growth_lag) ) %>%
  # group_by(growth_lag, lag_200) %>%
  # summarise(n=n()) %>% ungroup %>%
  
  # filter(is.infinite(growth_lag) & is.finite(lag_200)) %>% 
  # pull(lag_200) %>% (function(.x) .x/60) %>% table()
  
  summarise(n=n(), n_na=sum(is.na(growth_lag)), n_inf=sum(is.infinite(growth_lag)),
            n_induce_arrested = sum( is.infinite(growth_lag) & is.finite(lag_200) ),
  )
## # A tibble: 1 x 4
##       n  n_na n_inf n_induce_arrested
##   <int> <int> <int>             <int>
## 1  1633   152    97                45
    # with(sum( is.infinite(growth_lag) & is.finite(lag_200) ) /
    #      sum( !is.na(growth_lag) & !is.na(lag_200)) ) 

  # filter(is.finite(growth_lag), is.finite(lag_200)) %>%
  # with(cor(growth_lag, lag_200)^2)

Overview of lag distributions

mycells_switching %>% ungroup %>% 
  gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>% 
  mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
         lag=ifelse(is.na(lag), -1, lag)) %>% 
  ggplot(aes(lag/60)) +
  facet_grid(condition~switch_idx, labeller=as_labeller(rename_conds)) +
  stat_ecdf(aes(y=1-..y.., col=variable)) +
  coord_cartesian(xlim=c(0, 240)) +
  labs(x='lag after the switch (min)', y='rev. cumulative proba.') +
  theme(legend.position="top")

mycells_switching %>% ungroup %>% 
  gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>% 
  mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
         lag=ifelse(is.na(lag), -1, lag)) %>% 
  ggplot(aes(lag/60)) +
  facet_grid(condition~variable, labeller=as_labeller(rename_conds)) +
  stat_ecdf(aes(y=1-..y.., col=factor(switch_idx))) +
  coord_cartesian(xlim=c(0, 240)) +
  scale_y_continuous(breaks=seq(0, 1, by=0.5)) +
  labs(x='lag after the switch (min)', y='rev. cumulative proba.', col='switch') +
  theme(legend.position="top")

# ggsave('Rplot.pdf', width=7.5, height=4.5)


# mycells_switching %>% ungroup %>% 
#   # filter(!condition %in% c('switch_08h')) %>% 
#   gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>% 
#   mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
#          lag=ifelse(is.na(lag), -1, lag)) %>% 
#   ggplot(aes(lag/60)) +
#   facet_grid(variable~switch_idx, labeller=as_labeller(rename_conds)) +
#   stat_ecdf(aes(y=1-..y.., col=condition, group=date)) +
#   coord_cartesian(xlim=c(0, 240)) +
#   scale_y_continuous(breaks=seq(0, 1, by=0.5)) +
#   ggplot2::scale_color_discrete() +
#   labs(x='lag after the switch (min)', y='rev. cumulative proba.') +
#   theme(legend.position="top")

Focus on the first switch:

mycells_switching %>% 
  ungroup() %>% 
  mutate(type=as.character(lag_200),
         type=ifelse(is.finite(lag_200) & lag_200<48*60, 'short', type),
         type=ifelse(is.finite(lag_200) & lag_200>=48*60, 'long', type)) %>% 
  group_by(condition, date, switch_idx) %>%
  do((function(.df){
    # if (first(.df$date)=="20170926") 
    # browser()
    # print(first(.df$date))
    table(.df$type, exclude=NULL, useNA="ifany") %>% data.frame() %>% setNames(c("var", "value")) %>% 
      mutate(var=as.character(var), value=value/sum(value)) %>%
      bind_rows(tibble(var="n", value=nrow(.df)))
    })(.)) %>% 
  spread("var", "value", fill=0) %>% 
  select(condition, date, switch_idx, n, short, long, "Inf", everything()) %>% 
  knitr::kable(digits = 3) %>%
  kableExtra::kable_styling() %>% 
  identity()
condition date switch_idx n short long Inf <NA>
switch_04h 20150703 1 155 0.271 0.645 0.013 0.071
switch_04h 20150703 2 155 0.961 0.006 0.000 0.032
switch_04h 20150703 3 152 0.993 0.000 0.000 0.007
switch_04h 20150708 1 171 0.205 0.655 0.064 0.076
switch_04h 20150708 2 172 0.878 0.076 0.000 0.047
switch_04h 20150708 3 193 0.964 0.005 0.010 0.021
switch_06h 20151204 1 191 0.251 0.618 0.063 0.068
switch_06h 20151204 2 187 0.936 0.027 0.016 0.021
switch_06h 20151204 3 170 0.959 0.000 0.000 0.041
switch_08h 20151218 1 196 0.265 0.638 0.020 0.077
switch_08h 20151218 2 181 0.956 0.028 0.000 0.017
switch_08h 20151218 3 176 0.977 0.011 0.000 0.011
switch_08h 20180206 1 170 0.306 0.624 0.006 0.065
switch_08h 20180206 2 162 0.938 0.006 0.000 0.056
switch_12h 20180207 1 175 0.166 0.794 0.000 0.040
switch_12h 20180207 2 178 0.860 0.107 0.000 0.034
switch_12h 20180216 1 170 0.235 0.694 0.000 0.071
switch_12h 20180216 2 174 0.879 0.092 0.011 0.017
switch_12h_old 20160526 1 164 0.189 0.726 0.043 0.043
switch_12h_old 20160526 2 171 0.731 0.222 0.018 0.029
switch_16h 20160912 1 225 0.258 0.667 0.009 0.067
switch_16h 20160912 2 219 0.694 0.251 0.009 0.046
switch_20h 20161014 1 193 0.171 0.653 0.093 0.083
switch_20h 20161014 2 189 0.302 0.635 0.005 0.058
switch_24h 20161007 1 183 0.262 0.661 0.033 0.044
switch_24h 20161007 2 167 0.323 0.629 0.000 0.048
switch_24h 20180313 1 59 0.203 0.593 0.136 0.068
switch_glcLac_lac 20171114 1 180 0.400 0.550 0.000 0.050
switch_glcLac_lac 20180108 1 174 0.448 0.517 0.011 0.023
switch_glcLac_lac 20180606 1 53 0.604 0.396 0.000 0.000
switch_gly_lac 20170919 1 137 0.518 0.358 0.022 0.102
switch_gly_lac 20170920 1 142 0.458 0.437 0.035 0.070
switch_lac001_10h 20190614 1 115 0.357 0.530 0.026 0.087
switch_lac001_6h 20190605 1 193 0.435 0.394 0.093 0.078
switch_lacIoe 20180116 1 177 0.040 0.853 0.017 0.090
switch_lacIoe 20180123 1 163 0.025 0.902 0.018 0.055
switch_lacIoe 20180214 1 157 0.045 0.860 0.000 0.096
switch_lacIoe_preIPTG10uM 20180604 1 174 0.454 0.466 0.000 0.080
switch_lacIoe_withIPTG10uM 20180119 1 162 0.296 0.679 0.000 0.025
switch_lacIPTG5uM 20180316 1 182 0.489 0.456 0.000 0.055
switch_lactose_priming 20161212 1 230 0.222 0.709 0.000 0.070
switch_late 20161021 1 130 0.177 0.492 0.231 0.100
switch_late 20170108 1 158 0.285 0.652 0.019 0.044
switch_late 20180516 1 166 0.211 0.723 0.000 0.066
switch_late 20180615 1 55 0.836 0.091 0.000 0.073
switch_preIPTG5uM 20180514 1 178 0.601 0.337 0.000 0.062
switch_preIPTG5uM 20180531 1 170 0.529 0.406 0.012 0.053
switch_ramp15min 20170901 1 139 0.540 0.367 0.050 0.043
switch_ramp40min 20171121 1 164 0.244 0.695 0.000 0.061
switch_ramp40min 20180319 1 195 0.277 0.672 0.000 0.051
switch_withIPTG1uM 20161130 1 225 0.382 0.560 0.000 0.058
switch_withIPTG5uM 20161207 1 178 0.646 0.331 0.006 0.017
switch_withIPTG5uM 20180122 1 173 0.913 0.058 0.000 0.029
mycells_switching %>% ungroup %>% 
  filter(str_detect(condition, '^switch_[0-9]+h') | condition=='switch_long_lac' ) %>%
  filter(switch_idx==1) %>% 
  gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>% 
  mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
         lag=ifelse(is.na(lag), -1, lag)) %>% 
  ggplot(aes(lag/60)) +
  facet_grid(condition~variable, scales='free_y', labeller=as_labeller(rename_conds)) +
  geom_histogram(aes(fill=factor(date)), alpha=.2, position='identity', binwidth=6) +
  geom_step_hist(aes(col=factor(date)), position='identity', binwidth=6) +
  xlim(0, 240) +
  ggplot2::scale_color_discrete() + ggplot2::scale_fill_discrete() +
  labs(x='lag after the switch (min)')

mycells_switching %>% ungroup %>% 
  filter(!date %in% discarded_dates) %>% 
  filter(str_detect(condition, '^switch_[0-9]+h') | condition=='switch_long_lac' | str_detect(condition, 'IPTG') ) %>%
  mutate(condition=ifelse(str_detect(condition, '^switch_[0-9]+h$') | condition=='switch_long_lac',
                          'switch', condition)) %>%
  filter(switch_idx==1) %>% 
  gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>% 
  mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
         lag=ifelse(is.na(lag), -1, lag)) %>% 
  ggplot(aes(lag/60)) +
  facet_grid(condition~variable, scales='free_y', labeller=as_labeller(rename_conds)) +
  geom_histogram(aes(fill=factor(date), group=date), alpha=.2, position='identity', binwidth=6) +
  geom_step_hist(aes(col=factor(date), group=date), position='identity', binwidth=6) +
  xlim(0, 240) +
  ggplot2::scale_color_discrete() + ggplot2::scale_fill_discrete() +
  labs(x='lag after the switch (min)')

# mycells_switching %>% ungroup %>%
#   # filter(!condition %in% c('switch_08h')) %>%
#   filter(switch_idx==1) %>%
#   mutate(condition=ifelse(str_detect(condition, '^switch_[0-9]+h$') | condition=='switch_long_lac',
#                           'switch', condition)) %>%
#   gather(variable, lag, growth_lag, gfp_lag, lag_200, factor_key=TRUE) %>%
#   mutate(lag=ifelse(!is.infinite(lag), lag, max(lag[is.finite(lag)])+60),
#          lag=ifelse(is.na(lag), -1, lag)) %>%
#   ggplot(aes(lag/60)) +
#   facet_grid(variable~., scales='free_y', labeller=as_labeller(rename_conds)) +
#   stat_ecdf(aes(y=1-..y.., col=condition), alpha=.8) + # , group=date
#   xlim(0, 240) +
#   ggplot2::scale_color_discrete() +
#   labs(x='lag after the switch (min)')