Esta es una pregunta de seguimiento de una que ya publiqué aquí: Cuente la ocurrencia de ID dentro de los últimos x días en R Estoy tratando de hacer otro conteo continuo. Tengo los siguientes datos: date = c ("2014-04 -...

1
statman123 15 mar. 2021 a las 02:22

3 respuestas

La mejor respuesta

Una opción que usa non-equi join:

DT[, onemthago := date - 30L]    
DT[, count := 
    DT[.SD, on=.(group, date>=onemthago, date<=date),
        by=.EACHI, length(unique(ID))]$V1
]

Salida:

    group       date ID  onemthago count
 1:     G 2014-04-01  2 2014-03-02     1
 2:     G 2014-04-12  3 2014-03-13     2
 3:     F 2014-04-07  4 2014-03-08     1
 4:     G 2014-05-03  2 2014-04-03     2
 5:     E 2014-04-14  3 2014-03-15     1
 6:     E 2014-05-04  1 2014-04-04     2
 7:     H 2014-03-31  2 2014-03-01     1
 8:     H 2014-04-18  4 2014-03-19     2
 9:     H 2014-04-23  2 2014-03-24     2
10:     A 2014-04-01  1 2014-03-02     1

Datos:

date = as.Date(c("2014-04-01", "2014-04-12", "2014-04-07", "2014-05-03", "2014-04-14", "2014-05-04", "2014-03-31", "2014-04-18", "2014-04-23", "2014-04-01"))
group = c("G","G","F","G","E","E","H","H","H","A")
ID = c(2, 3, 4, 2, 3, 1, 2, 4, 2, 1)
library(data.table)
DT <- data.table(group, date, ID)
1
chinsoon12 15 mar. 2021 a las 01:58

Creo que tu código funciona conmigo.



date = c("2014-04-01", "2014-04-12", "2014-04-07", "2014-05-03", "2014-04-14", "2014-05-04", "2014-03-31", "2014-04-18", "2014-04-23", "2014-04-01")
group = c("G","G","F","G","E","E","H","H","H","A")
ID = c(2, 3, 4, 2, 3, 1, 2, 4, 2, 1)

dt <- data.table( date=as.Date(date), group, ID )

dt[ date <= first(date) + 30, count := uniqueN(ID) ,group ]
dt[, count := do.call( coalesce, as.list(count) ) ]

dt

Sin embargo, inicialicé las fechas como fechas antes.

Produce esto:

          date group ID count
 1: 2014-04-01     A  1     1
 2: 2014-04-14     E  3     1
 3: 2014-05-04     E  1     1
 4: 2014-04-07     F  4     1
 5: 2014-04-01     G  2     2
 6: 2014-04-12     G  3     2
 7: 2014-05-03     G  2     2
 8: 2014-03-31     H  2     2
 9: 2014-04-18     H  4     2
10: 2014-04-23     H  2     2
1
Sirius 14 mar. 2021 a las 23:48

Si entendiera su problema correctamente, una forma alternativa dentro del tidyverse sería esta:

library(tidyverse)

tb <- dplyr::tibble(date = c("2014-04-01", "2014-04-12", "2014-04-07", "2014-05-03", "2014-04-14", "2014-05-04", "2014-03-31", "2014-04-18", "2014-04-23", "2014-04-01"),
                    group = c("G","G","F","G","E","E","H","H","H","A"),
                    ID = c(2, 3, 4, 2, 3, 1, 2, 4, 2, 1))

tb %>% 
  dplyr::group_by(group) %>% 
  dplyr::mutate(as.numeric(difftime(Sys.Date(), date)) < 31) %>% 
  dplyr::distinct(ID) %>% 
  dplyr::count(group) %>% 
  dplyr::right_join(tb) %>% 
  dplyr::select(group, date, ID, Count = n)

   group date          ID Count
    <chr> <chr>      <dbl> <int>
 1 A     2014-04-01     1     1
 2 E     2014-04-14     3     2
 3 E     2014-05-04     1     2
 4 F     2014-04-07     4     1
 5 G     2014-04-01     2     2
 6 G     2014-04-12     3     2
 7 G     2014-05-03     2     2
 8 H     2014-03-31     2     2
 9 H     2014-04-18     4     2
10 H     2014-04-23     2     2

Para el tipo de función de ventana rodante, esto debería ser una solución:

tb %>% 
  dplyr::full_join(tb, by = "group") %>% 
  dplyr::filter(as.numeric(difftime(as.Date(date.x), as.Date(date.y), units = "days")) >= 0 & as.numeric(difftime(date.x, date.y, units = "days")) < 31) %>% 
  dplyr::distinct(group, date.x, ID.y) %>% 
  dplyr::count(group, date.x) %>% 
  # you might want to cut the pipe here and look at the result (do not forget to delete the %>% in the line above when removing the part below
  dplyr::right_join(tb, by = c("group", "date.x" = "date")) %>% 
  dplyr::select(group, date = date.x, ID, count = n)

   group date          ID count
   <chr> <chr>      <dbl> <int>
 1 A     2014-04-01     1     1
 2 E     2014-04-14     3     1
 3 E     2014-05-04     1     2
 4 F     2014-04-07     4     1
 5 G     2014-04-01     2     1
 6 G     2014-04-12     3     2
 7 G     2014-05-03     2     2
 8 H     2014-03-31     2     1
 9 H     2014-04-18     4     2
10 H     2014-04-23     2     2
1
DPH 15 mar. 2021 a las 00:19