1

我在 O'Reilly 的演示文稿中找到了下图(http://cdn.oreillystatic.com/en/assets/1/event/85/Case%20Study_%20What_s%20a%20Customer%20Worth_%20Presentation.pdf

在此处输入图像描述

是否可以在 R 中使用基本图形、格子或 ggplot2 重新创建此图?

这是一些玩具数据:

set.seed(123)

number_of_observations <- 20
number_of_customers <- 5
day_start <- "2013/01/01"
day_end <- "2013/12/31"

d <- data.frame(id=letters[sample(number_of_customers, size=number_of_observations, replace=TRUE)], 
                date=sample(seq.Date(as.Date(day_start),as.Date(day_end),by="day"), size=number_of_observations, replace=TRUE),
                amount=sample(1000, size=number_of_observations, replace=TRUE))

d_sorted <- d[order(d$id, d$date),]

d_sorted

# id        date amount
# a   2013-01-09    561
# a   2013-03-20    754
# a   2013-09-16    139
# b   2013-04-27    896
# b   2013-10-04    128
# b   2013-11-21    143
# c   2013-02-23    858
# c   2013-04-16    266
# c   2013-07-18    234
# c   2013-08-22    414
# c   2013-10-18    122
# c   2013-11-26    443
# d   2013-09-10    415
# d   2013-09-10    799
# e   2013-03-26    375
# e   2013-06-24    207
# e   2013-08-05    466
# e   2013-08-28    153
# e   2013-12-18     46
# e   2013-12-29    369

谢谢你的帮助。

4

2 回答 2

2

我认为以下是尽可能接近的。

编辑:使用data.table它可以更简单

D <- data.table(d)
MaxDate <- D[, max(date)] + 10
MinDate <- D[, min(date)]
D2 <- D[, list( date, firstdate = min(date)), by = id]
D2[, plot(date, id, type='n', bty='n',axes=FALSE, xlab = "", ylab ="")]
D2[date!=firstdate, points(date, id, pch =4)]
D2[date==firstdate, points(date, id)]
D2[date==firstdate, arrows(x0=date, y0=as.numeric(id), x1=MaxDate, length=0.1)]
abline(v=MaxDate - 5)
abline(v=MinDate)
axis(side= 2, tick = FALSE, at=D[, unique(id)], labels=D[, paste0("ID = ",  as.character(unique(id)))], las=2)
axis(side = 1, tick= FALSE, at=c(MinDate, MaxDate), labels = strftime(c(MinDate, MaxDate),format="Week %W, %Y") )

在此处输入图像描述

于 2013-10-29T10:26:08.253 回答
0

是的,这是可能的(在某种程度上)。这应该让你开始。

# load packages
require(data.table)
require(ggplot2)
# prepare data for plotting
dt <- data.table(d_sorted, key='id')
dt[, first:=as.numeric(min(date)==date), by=id]
dt <- rbindlist(list(dt, data.table(id=dt[,unique(dt$id)], 
                                    date=as.Date(day_end)+5,
                                    amount=NA, 
                                    first=2)))
# plot (here you can change many things, depending on what is important to 
#       reproduce exactly as in original and where you are willing to have some 
#       differences)
ggplot(dt, aes(date, id)) + 
  geom_point(aes(shape=factor(first)), size=3) + 
  scale_shape_manual(values = c('1'=5, '0'=4, '2'=17), guide=FALSE) +
  scale_x_date(limits=c(as.Date(day_start), as.Date(day_end)+5)) +
  geom_line()
于 2013-10-29T10:26:19.203 回答