У меня есть фрейм данных с категориальными и числовыми переменными. Я хочу избавиться от сильно коррелированных переменных.
Итак, сначала я избавился от всех своих категориальных столбцов, составил матрицу и выяснил, какие числовые столбцы стоит исключить. Теперь я хочу вернуться, а также иметь столбцы с категориальными переменными.
И я запутался в самом простом способе сделать это... Возможно, объединение выбирает, какие столбцы я хочу добавить из второй таблицы...
library(caret)
training_2 <- subset(training, select = -c(user_name ,timestamp, etc))
corr_matrix <- cor(training_2)
highCorr <- findCorrelation(corr_matrix, .75)
training_2<- training_2[,-highCorr]
РЕДАКТИРОВАТЬ: ДАННЫЕ
structure(list(X = 1:15, yaw_belt = c(-94.4, -94.4, -94.4, -94.4,
-94.4, -94.4, -94.4, -94.4, -94.4, -94.4, -94.4, -94.4, -94.4,
-94.4, -94.4), gyros_belt_x = c(0, 0.02, 0, 0.02, 0.02, 0.02,
0.02, 0.02, 0.02, 0.03, 0.03, 0.02, 0.02, 0.02, 0), gyros_belt_y = c(0,
0, 0, 0, 0.02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), gyros_belt_z = c(-0.02,
-0.02, -0.02, -0.03, -0.02, -0.02, -0.02, -0.02, -0.02, 0, -0.02,
-0.02, 0, -0.02, 0), magnet_belt_x = c(-3L, -7L, -2L, -6L, -6L,
0L, -4L, -2L, 1L, -3L, -5L, -2L, -3L, -8L, -1L), magnet_belt_y = c(599L,
608L, 600L, 604L, 600L, 603L, 599L, 603L, 602L, 609L, 596L, 602L,
606L, 598L, 597L), roll_arm = c(-128, -128, -128, -128, -128,
-128, -128, -128, -128, -128, -128, -128, -128, -128, -129),
pitch_arm = c(22.5, 22.5, 22.5, 22.1, 22.1, 22, 21.9, 21.8,
21.7, 21.6, 21.5, 21.5, 21.4, 21.4, 21.4), yaw_arm = c(-161,
-161, -161, -161, -161, -161, -161, -161, -161, -161, -161,
-161, -161, -161, -161), total_accel_arm = c(34L, 34L, 34L,
34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 34L
), gyros_arm_y = c(0, -0.02, -0.02, -0.03, -0.03, -0.03,
-0.03, -0.02, -0.03, -0.03, -0.03, -0.03, -0.02, 0, 0), gyros_arm_z = c(-0.02,
-0.02, -0.02, 0.02, 0, 0, 0, 0, -0.02, -0.02, 0, 0, -0.02,
-0.03, -0.03), magnet_arm_x = c(-368L, -369L, -368L, -372L,
-374L, -369L, -373L, -372L, -369L, -376L, -366L, -363L, -372L,
-371L, -374L), magnet_arm_z = c(516L, 513L, 513L, 512L, 506L,
513L, 509L, 510L, 518L, 516L, 509L, 520L, 509L, 523L, 510L
), roll_dumbbell = c(13.05217456, 13.13073959, 12.85074981,
13.43119971, 13.37871611, 13.38245941, 13.12694911, 12.75083041,
13.15463353, 13.33033728, 13.13073959, 13.10320545, 13.38245941,
13.41047767, 13.07948887), pitch_dumbbell = c(-70.49400371,
-70.63750507, -70.27811982, -70.39379464, -70.42855971, -70.81758832,
-70.24756905, -70.34768359, -70.42520377, -70.85058796, -70.63750507,
-70.45974712, -70.81758832, -70.99594236, -70.67116245),
yaw_dumbbell = c(-84.87393888, -84.71064711, -85.14078134,
-84.87362553, -84.85305745, -84.46500278, -85.09961258, -85.09708174,
-84.91563379, -84.44601983, -84.71064711, -84.89472246, -84.46500278,
-84.28004856, -84.69053461), total_accel_dumbbell = c(37L,
37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L, 37L,
37L, 37L), gyros_dumbbell_y = c(-0.02, -0.02, -0.02, -0.02,
-0.02, -0.02, -0.02, -0.02, -0.02, -0.02, -0.02, -0.02, -0.02,
-0.02, -0.02), magnet_dumbbell_z = c(-65, -64, -63, -60,
-68, -66, -70, -74, -65, -69, -64, -65, -69, -68, -63), roll_forearm = c(28.4,
28.3, 28.3, 28.1, 28, 27.9, 27.9, 27.8, 27.7, 27.7, 27.6,
27.5, 27.2, 27.2, 27.2), pitch_forearm = c(-63.9, -63.9,
-63.9, -63.9, -63.9, -63.9, -63.9, -63.8, -63.8, -63.8, -63.8,
-63.8, -63.9, -63.9, -63.9), yaw_forearm = c(-153, -153,
-152, -152, -152, -152, -152, -152, -152, -152, -152, -152,
-151, -151, -151), total_accel_forearm = c(36L, 36L, 36L,
36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L, 36L
), gyros_forearm_x = c(0.03, 0.02, 0.03, 0.02, 0.02, 0.02,
0.02, 0.02, 0.03, 0.02, 0.02, 0.02, 0, 0, 0), gyros_forearm_z = c(-0.02,
-0.02, 0, 0, -0.02, -0.03, -0.02, 0, -0.02, -0.02, -0.02,
-0.03, -0.03, -0.03, -0.02), accel_forearm_x = c(192L, 192L,
196L, 189L, 189L, 193L, 195L, 193L, 193L, 190L, 193L, 191L,
193L, 193L, 192L), accel_forearm_z = c(-215L, -216L, -213L,
-214L, -214L, -215L, -215L, -213L, -214L, -215L, -214L, -215L,
-215L, -214L, -214L), magnet_forearm_x = c(-17L, -18L, -18L,
-16L, -17L, -9L, -18L, -9L, -16L, -22L, -17L, -11L, -15L,
-14L, -16L), magnet_forearm_y = c(654, 661, 658, 658, 655,
660, 659, 660, 653, 656, 657, 657, 655, 659, 656), magnet_forearm_z = c(476,
473, 469, 469, 473, 478, 470, 474, 476, 473, 465, 478, 472,
478, 472)), .Names = c("X", "yaw_belt", "gyros_belt_x", "gyros_belt_y",
"gyros_belt_z", "magnet_belt_x", "magnet_belt_y", "roll_arm",
"pitch_arm", "yaw_arm", "total_accel_arm", "gyros_arm_y", "gyros_arm_z",
"magnet_arm_x", "magnet_arm_z", "roll_dumbbell", "pitch_dumbbell",
"yaw_dumbbell", "total_accel_dumbbell", "gyros_dumbbell_y", "magnet_dumbbell_z",
"roll_forearm", "pitch_forearm", "yaw_forearm", "total_accel_forearm",
"gyros_forearm_x", "gyros_forearm_z", "accel_forearm_x", "accel_forearm_z",
"magnet_forearm_x", "magnet_forearm_y", "magnet_forearm_z"), row.names = c(NA,
15L), class = "data.frame")
Спасибо
training_2
? Используйтеdput(training_2)
, чтобы отредактировать свой вопрос (илиdput(head(training_2,15))
, если данные большие). - person LyzandeR   schedule 19.11.2015