Estoy tratando de usar ggplot para trazar una serie de gráficos.

El codigo es;

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, ymax = ave_Networth + sd_Networth)) +
  facet_wrap(~AGE_bin, scales = "free")

Sin embargo, el "decil" distorsiona todos los otros "deciles". ¿Cómo puedo normalizar cada facet_wrap de modo que "Decile 10" puede ser mejor en comparación con los otros "deciles"

Datos:

df <- structure(list(AGE_bin = c("bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90"), decile = c(1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L), ave_Networth = c(-42799.9514563107, -13326.7549019608, 
-3804.56310679612, 182.656862745098, 2367.56310679612, 5490.49019607843, 
10219.3786407767, 15573.6666666667, 32942.0873786408, 1215195, 
-88543.938547486, -24661.6629213483, -6073.77653631285, 1068.8595505618, 
4448.57541899441, 9035.94943820225, 18357.6983240223, 38919.7191011236, 
75015.1117318436, 360729.747191011, -83359.7058823529, -10225.6590909091, 
645.52036199095, 6519.20454545455, 16950.520361991, 32910.3090909091, 
59947, 99614.9363636364, 193918.529411765, 4559636.43636364, 
-43682.3646341463, -2316.69918699187, 3812.26016260163, 14740.2073170732, 
31149.2520325203, 61549.8536585366, 111223.390243902, 220390.162601626, 
431319.044715447, 39707858.5813008, -47304.6305970149, 908.816479400749, 
9145.00746268657, 31857.7191011236, 76431.6940298507, 136961.548689139, 
239810.029850746, 409516.632958802, 799214.123134328, 23437733.3146067, 
-23534.0347222222, 5107.85069444444, 19872.9652777778, 62279.3229166667, 
141796.600694444, 254556.736111111, 481769.951388889, 928658.868055556, 
2675194.20138889, 43276345.59375, -18186.3404255319, 7353.51671732523, 
40047.4984802432, 106741.513677812, 201771.617021277, 349972.689969605, 
632600.917933131, 1395636.24620061, 7467362.75075988, 100107189.018237, 
-12872.2715053763, 12844.7601078167, 64099.5698924731, 162562.291105121, 
324875.215053763, 672549.58490566, 1464271.26344086, 3975236.26684636, 
15764846.3172043, 133524703.185984, -2200.25284090909, 29720.5982905983, 
108347.6875, 241293.168091168, 464230.673295455, 805069.43019943, 
1655039.94318182, 4463594.18803419, 15625284.53125, 129427645.128205, 
-12766.4538361508, 46987.2743823147, 140018.637191157, 279912.055006502, 
506132.146423927, 860558.538361508, 1646383.63328999, 4331098.02275683, 
20196055.0429129, 222960808.747984), sd_Networth = c(19952.4205187352, 
4009.59002234056, 1755.86187802571, 354.750993275092, 875.657161288449, 
983.36347182754, 1154.42818471179, 2673.13307234081, 9814.53960254566, 
3229068.69348881, 80763.4718472531, 6641.16873426075, 3948.88255496786, 
823.703039739828, 1163.67857190816, 1530.58811957344, 4659.8855411689, 
7753.20657773506, 14988.8062067764, 391183.089014281, 79403.7440792276, 
6771.38822139849, 1076.50097149467, 2085.51169306169, 3538.12985729517, 
6773.87617091665, 8136.08653692693, 15575.370906716, 43499.5408140372, 
16251487.9059923, 35314.6097298394, 2791.20649192616, 2213.05829515479, 
3292.91147796933, 6575.35932388955, 11179.6713837163, 22475.5169477255, 
39758.4468521584, 119334.223663411, 174371376.396929, 49949.5306903766, 
1799.05664503879, 4185.55767385215, 9485.99298648255, 15351.1273951797, 
23735.7079084032, 38061.3561426131, 60915.0623003272, 222652.345949324, 
94889492.5724926, 40634.4838428703, 3486.55103511871, 6022.01536051466, 
18357.8033065045, 30008.4145616776, 43065.4085235003, 91012.8666376759, 
203097.385703473, 1053542.62119673, 58091928.9133239, 31388.6889295018, 
5191.21573011365, 14192.8835953361, 22709.198055496, 33034.8868226208, 
54945.0489348437, 119298.977766417, 450266.641660294, 4096090.77500322, 
156293273.663792, 33679.4592685038, 7624.74535501237, 24662.3647632881, 
29814.2874815741, 66391.9192226496, 123491.617620793, 406935.703862311, 
1212704.00461397, 7023794.80821185, 141166857.287318, 16746.6945744379, 
14991.3779599531, 26718.8686094867, 49599.5165232508, 69555.903370777, 
142279.335735688, 350387.632009764, 1529856.10479949, 6307011.85646724, 
166820992.513686, 165289.391214998, 21461.3316797954, 33730.6952915096, 
51158.8410213337, 78696.8069684297, 138373.125085833, 394345.528508884, 
1597491.31445124, 10026567.8512041, 269598766.17565)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    AGE_bin = c("bin_18_24", "bin_25_29", "bin_30_34", "bin_35_39", 
    "bin_40_44", "bin_45_49", "bin_50_54", "bin_55_59", "bin_60_64", 
    "bin_65_90"), .rows = list(1:10, 11:20, 21:30, 31:40, 41:50, 
        51:60, 61:70, 71:80, 81:90, 91:100)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
1
user113156 1 jul. 2019 a las 23:29

1 respuesta

La mejor respuesta

editado para incluir pseudo_log Transformado, que es más apropiado que log_10 para representar números negativos.

Tal vez usando una transformación de registro ayudaría?

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_log10(labels = scales::comma) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free", nrow = 2)

enter image description here


EDITAR: Un problema con la transformación de registro estándar es que excluye los números negativos, lo que distorsiona lo visual de dos maneras, al excluir algunos de los deciles bajos, y al omitir la mención de los grandes negativos posibles en el decil 10.

Por lo tanto, otro enfoque relacionado que pueda ser más apropiado aquí es scales::pseudo_log_trans, que combina una transformación de registro firmada (que puede representar números negativos) con una transformación lineal cerca de cero. Al controlar el primer término "Sigma", podemos cambiar la cantidad de espacio que se trata linealmente vs. log. En este caso, Sigma ~ 1000 parecía difundir el espacio visual mejor para este conjunto de datos, pero eso es bastante subjetivo. A costa de alguna distorsión de espacio desigual, la transformada de pseudo-log permite un buen equilibrio entre la precisión y la amplitud.

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_continuous(trans = scales::pseudo_log_trans(1000),
                     labels = scales::comma,
                     breaks = c(-100000000, -10000000, -1000000, -100000, -10000, -1000, 1000, 10000, 100000, 1000000, 10000000, 100000000), minor_breaks = NULL) +
  scale_x_continuous(breaks = 1:10, minor_breaks = NULL) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free_y", nrow = 2)

enter image description here

2
Jon Spring 1 jul. 2019 a las 23:52