R 기초 2
파생변수 생성
변수이름 확인
colnames(cars)
colnames(cars) <- c("속도", "거리")
head(cars)
tail(cars)
파생변수 생성
cars$속도X거리 <- cars$속도 * cars$거리
head(cars)
조건에 따라 변수의 값을 새로 생성
cars$속도정도 <- ifelse(cars$속도 > 7, "빠름", "느림")
head(cars)
hflights 패키지 설치
install.packages('hflights')
require(hflights)
hflights <- hflights
summary(hflights)
Select
require(dplyr)
str(hflights)
newhflights <- select(hflights, Origin, Distance, TailNum)
Filter
head(filter(hflights, Month==2))
hflights1 <- filter(hflights, Month==2)
nrow(hflights1)
head(filter(hflights, Month==8))
hflights2 <- filter(hflights, Month==8)
nrow(hflights2)
hflights3 <- filter(hflights, Month==2 | Month==8)
nrow(hflights3)
hflights4 <- filter(hflights, Month==2, Dest=="DFW")
nrow(hflights4)
정렬 Arrange
hflights5 <- arrange(hflights, Month)
head(hflights5)
tail(hflights5)
hflights6 <- arrange(hflights, desc(Month))
head(hflights6)
hflights7 <- arrange(hflights, desc(Month), DayOfWeek)
head(hflights7)
열의 조작
head(
mutate(hflights,
gain = ArrDelay - DepDelay,
gain_per_hour = gain / (AirTime/60)
)
)
Summary
summarise(hflights,
delay = mean(DepDelay,
na.rm = TRUE)
)
그룹별 요약 %>%
hflights %>%
group_by(TailNum) %>%
select(Month, DayofMonth, DayOfWeek) %>%
summarise(
AveMonth = mean(Month, na.rm = TRUE),
AveDayofMonth = mean(DayofMonth, na.rm = TRUE),
AveDayofWeek = mean(DayOfWeek, na.rm = TRUE)
)
연습문제 1
1
hflights8 <- select(hflights, FlightNum, TailNum, AirTime, Distance, Origin)
head(hflights8)
2
hflights9 <- filter(hflights8, Origin == "HOU", Distance >1000)
head(hflights9)
3
hflights9 %>%
group_by(TailNum) %>%
select(AirTime, Distance) %>%
summarise(
AveAirTime = mean(AirTime, na.rm = TRUE),
AveDistance = mean(Distance, na.rm = TRUE)
)
ggplot2
패키지 설치
install.packages('ggplot2')
require(ggplot2)
데이터 불러오기
data("midwest", package = "ggplot2")
summary(midwest)
options(scipen = 999)
ggplot(midwest, aes(x=area, y=poptotal))
Scatter Plot 생성
midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point()
리그레이션 라인 추가
midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point() +
geom_smooth(method = "lm")
그래프의 범위 조정
g <-midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point() +
geom_smooth(method = "lm")
g + xlim(c(0,0.1)) + ylim(c(0,1000000))
줌
g1 <- g + coord_cartesian(xlim = c(0,0.75), ylim = c(0, 500000))
g1
g1 <- g + xlim(c(0,0.1)) + ylim(c(0, 1000000))
제목과 축 이름
g1
g1 + labs(title = "Area VS Population",
subtitle = "midwest",
y = "Population",
x = "Area",
caption = "Midwest Population")
색변경
g <-midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point(col = "red", size = 1) +
geom_smooth(method = "lm")
g + xlim(c(0,0.1)) + ylim(c(0,1000000))
색에 데이터 정보 반영
색변경
g <-midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point(aes(col=state), size = 1) +
geom_smooth(method = "lm")
g + xlim(c(0,0.1)) + ylim(c(0,1000000))
그래프 분할
g <-midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point() +
geom_smooth(method = "lm")
g + facet_wrap(~state, nrow = 2)
midwest
hist(midwest$popdensity)
midwest$degreeofdensity <- ifelse(midwest$popdensity >= 10000, "high", "low")
table(midwest$degreeofdensity)
g <-midwest %>%
ggplot(aes(x=area, y=poptotal)) +
geom_point(aes(col = degreeofdensity)) +
geom_smooth(method = "lm")
g + facet_wrap(~state, nrow=2)
ggplot 실습
mtcars <- mtcars
summary(mtcars)
plot(mtcars)
연습문제 2
mtcars 데이터를 활용해서 두개의 연속형 변수를 선택한 후 scatter plot 그려보기
mtcars %>%
ggplot(aes(x=mpg, y=hp)) +
geom_point()
점에 카테고리컬 변수의 정보를 활용해서 점의 색깔에 반영하기
g <-mtcars %>%
ggplot(aes(x=mpg, y=hp)) +
geom_point(aes(col = as.factor(cyl)))
g
mtcars$names <- rownames(mtcars)
colnames(mtcars)
g <-mtcars %>%
ggplot(aes(x=wt, y=mpg)) +
geom_point(aes(col = as.factor(vs))) +
geom_smooth(method = "lm") +
geom_text(aes(label = names), size=3)
g
Comments