3 + 4

4 - 2

17 %% 3 # 나머지

6 * 3

7 / 2

7 %/% 2 # 몫

1

"String"

TRUE

FALSE

c(1,2,3)

c("a",2,3)

c(TRUE,1,2)

c("a","b")

c(1, 2, 3) + c(5, 6, 7) # 각 요소끼리 덧셈

c(1, 2, 3) + 5 # 각 요소에 각각 덧셈(5를)

c(1, 2, 3) + c(3, 4) # 길이가 맞지 않을 경우 3, 4 순으로 해서 순서대로 더한다.

Warning message in c(1, 2, 3) + c(3, 4):
“두 객체의 길이가 서로 배수관계에 있지 않습니다”

c(1, 2, 3,4) + c(3, 4) # 1 + 3, 2 + 4, 3 + 3, 4 + 4

people <- 40 # 엑셀의 A1, B1 등과 같다고 보면 된다. 
people

people <- c(1,2,3)
people

ls() # 메모리에 저장되어있는 변수 명을 출력.

rm(people)

rm(list=ls())

people <- c(1,2,3)

people * 3

people 
# 변수의 값을 연산하고 끝날뿐이지 결과값을 다시 할당하지 않는다면 일회성이라고 생각하면된다.

people <- people * 2
people # 재할당.

people[1] # 1번만

people[1:2] # 1번부터 2번까지

floor(3.4)

ceiling(2.1)

c(round(3.4),round(3.5))

user_add <- function(a,b){
    c <- a + a + b + b
    return(c)
}

user_add(1,2)

a <- list(1,2,3,'a',TRUE)
a

b <- list(a=1,b=3,c=TRUE,d='a')
b

b$a

car <- read.csv('automobile.csv')
# 다양한 속성을 통해 header를 가지고 올지, row를 새로 만들지 등을 정할 수 있다.

head(car) # 상위 5줄을 볼 수 있다.

car2 <- read.csv(file.choose()) # R-Studio를통해서 수행하면 된다.

Error in file.choose(): 파일선택이 취소되었습니다
Traceback:

1. read.csv(file.choose())
2. read.table(file = file, header = header, sep = sep, quote = quote, 
 .     dec = dec, fill = fill, comment.char = comment.char, ...)
3. file.choose()

View(car) 
# 이 또한 Rstudio를 통해서 하면 된다. 새로운 창을 통해 data를 전체적으로 볼 수 있다.

Error in View(car): ‘View()’ not yet supported in the Jupyter R kernel
Traceback:

1. View(car)
2. stop(sQuote("View()"), " not yet supported in the Jupyter R kernel")

dim(car) # rows x columns

car[1,]

car[1:5,]

head(car['maker'])

head(car[3]) # 컬럼의 위치를 선택하여서 볼 수 있다.

head(car[c("maker","body")])

head(car[c(3,7)])

head(car[,c(3,7)])

car[1:2,3:7]

subset(car, horsepower > 145)

subset(car, horsepower < 60)

subset(car, horsepower <= 60)

subset(car, horsepower == 60)

head(subset(car, body != 'sedan'))

subset(car, horsepower > 100 & fuel == 'diesel')

head(subset(car, body == 'wagon' | wheels == '4wd'))

head(subset(car, !(body == 'wagon' | wheels == '4wd')))

dat = data.frame(num=c(36925,36941,12345), name=c('abc', 'def', 'abd'))
dat

subset(dat, num %/% 100 == 369) # 100으로 나눈 몫이 369인 경우

subset(dat, startsWith(as.character(name), 'ab')) # name의 시작이 ab인 경우

subset(dat, endsWith(as.character(name), 'bc')) # name의 끝이 bc인경우

wagon <- subset(car, body=='wagon')
head(wagon)

write.csv(wagon,'wagon.csv')

write.table(wagon,'clipboard',sep = '\t') # r_studio 에서 가능.

Error in file(file, ifelse(append, "a", "w")): Unix에서 클립보드에 대한 'mode'는 반드시 'r'이어야 합니다
Traceback:

1. write.table(wagon, "clipboard", sep = "\t")
2. file(file, ifelse(append, "a", "w"))

max(car$length)

min(car$length)

var(car$length)

sd(car$length)

summary(car$length) # 전체 변수에 대해서 다 보려면 $length를 없애면 된다.

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  141.1   165.7   172.4   172.4   177.8   202.6

is.na(NA)

install.packages('dplyr')

library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

car <- read.csv('automobile.csv')
head(car)

summarise(car, mean(length))

summarise(car, age=mean(length))

summarise(car, mean(length), mean(width))

summarise(car, mean_car = mean(price))

car %>% summarise(mean(length))

car %>% filter(body == 'sedan') %>% head()

car %>% 
    filter(body=='sedan') %>% 
    summarise(mean(length))

car %>% 
    filter(body=='hatchback' & price > 10000) %>% 
    summarise(mean(engine_size))

car %>% 
    group_by(fuel) %>%
    summarise(mean(city_mpg))

car %>% filter(horsepower < 100)%>% 
    group_by(fuel) %>%
    summarise(mean(city_mpg))

car %>% mutate(diff_mpg = highway_mpg - city_mpg) %>% head()

car %>% select(1) %>% head()

car %>% slice(1:10)

car %>% arrange(symboling) %>% head()

car %>% arrange(desc(symboling)) %>% head()

x = data.frame(name = c('a','b','c'), v1 = 1:3)
y = data.frame(name = c('b', 'b', 'c', 'd'), v2 = 1:4)

x

y

left_join(x,y) # x의 데이터의 name(같은 이름의 컬럼) 기준으로 결합

Joining, by = "name"
Warning message:
“Column `name` joining factors with different levels, coercing to character vector”

right_join(x,y) # y의 데이터의 기준으로.

Joining, by = "name"
Warning message:
“Column `name` joining factors with different levels, coercing to character vector”

inner_join(x,y) # x,y 의 name에 동일한 값이 있는 것만

Joining, by = "name"
Warning message:
“Column `name` joining factors with different levels, coercing to character vector”

full_join(x,y) # 카디션 곱

Joining, by = "name"
Warning message:
“Column `name` joining factors with different levels, coercing to character vector”

semi_join(x,y)

Joining, by = "name"
Warning message:
“Column `name` joining factors with different levels, coercing to character vector”

?semi_join

symboling	normalized_losses	maker	fuel	aspiration	doors	body	wheels	engine_location	wheel_base	⋯	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
2	164	audi	gas	std	four	sedan	fwd	front	99.8	⋯	109	mpfi	3.19	3.4	10.0	102	5500	24	30	13950
2	164	audi	gas	std	four	sedan	4wd	front	99.4	⋯	136	mpfi	3.19	3.4	8.0	115	5500	18	22	17450
1	158	audi	gas	std	four	sedan	fwd	front	105.8	⋯	136	mpfi	3.19	3.4	8.5	110	5500	19	25	17710
1	158	audi	gas	turbo	four	sedan	fwd	front	105.8	⋯	131	mpfi	3.13	3.4	8.3	140	5500	17	20	23875
2	192	bmw	gas	std	two	sedan	rwd	front	101.2	⋯	108	mpfi	3.50	2.8	8.8	101	5800	23	29	16430
0	192	bmw	gas	std	four	sedan	rwd	front	101.2	⋯	108	mpfi	3.50	2.8	8.8	101	5800	23	29	16925

symboling	normalized_losses	maker	fuel	aspiration	doors	body	wheels	engine_location	wheel_base	⋯	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
2	164	audi	gas	std	four	sedan	fwd	front	99.8	⋯	109	mpfi	3.19	3.4	10.0	102	5500	24	30	13950
2	164	audi	gas	std	four	sedan	4wd	front	99.4	⋯	136	mpfi	3.19	3.4	8.0	115	5500	18	22	17450
1	158	audi	gas	std	four	sedan	fwd	front	105.8	⋯	136	mpfi	3.19	3.4	8.5	110	5500	19	25	17710
1	158	audi	gas	turbo	four	sedan	fwd	front	105.8	⋯	131	mpfi	3.13	3.4	8.3	140	5500	17	20	23875
2	192	bmw	gas	std	two	sedan	rwd	front	101.2	⋯	108	mpfi	3.50	2.8	8.8	101	5800	23	29	16430

maker	fuel	aspiration	doors	body
audi	gas	std	four	sedan
audi	gas	std	four	sedan

	symboling	normalized_losses	maker	fuel	aspiration	doors	body	wheels	engine_location	wheel_base	⋯	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
33	0	145	jaguar	gas	std	four	sedan	rwd	front	113.0	⋯	258	mpfi	3.63	4.17	8.1	176	4750	15	19	32250
49	3	142	mercedes-benz	gas	std	two	convertible	rwd	front	96.6	⋯	234	mpfi	3.46	3.10	8.3	155	4750	16	18	35056
72	0	128	nissan	gas	std	four	sedan	fwd	front	100.4	⋯	181	mpfi	3.43	3.27	9.0	152	5200	17	22	13499
73	0	108	nissan	gas	std	four	wagon	fwd	front	100.4	⋯	181	mpfi	3.43	3.27	9.0	152	5200	17	22	14399
74	0	108	nissan	gas	std	four	sedan	fwd	front	100.4	⋯	181	mpfi	3.43	3.27	9.0	152	5200	19	25	13499
75	3	194	nissan	gas	std	two	hatchback	rwd	front	91.3	⋯	181	mpfi	3.43	3.27	9.0	160	5200	19	25	17199
76	3	194	nissan	gas	turbo	two	hatchback	rwd	front	91.3	⋯	181	mpfi	3.43	3.27	7.8	200	5200	17	23	19699
77	1	231	nissan	gas	std	two	hatchback	rwd	front	99.2	⋯	181	mpfi	3.43	3.27	9.0	160	5200	19	25	18399
96	3	150	saab	gas	turbo	two	hatchback	fwd	front	99.1	⋯	121	mpfi	3.54	3.07	9.0	160	5500	19	26	18150
97	2	104	saab	gas	turbo	four	sedan	fwd	front	99.1	⋯	121	mpfi	3.54	3.07	9.0	160	5500	19	26	18620
138	3	197	toyota	gas	std	two	hatchback	rwd	front	102.9	⋯	171	mpfi	3.27	3.35	9.3	161	5200	20	24	16558
139	3	197	toyota	gas	std	two	hatchback	rwd	front	102.9	⋯	171	mpfi	3.27	3.35	9.3	161	5200	19	24	15998
140	-1	90	toyota	gas	std	four	sedan	rwd	front	104.5	⋯	171	mpfi	3.27	3.35	9.2	156	5200	20	24	15690
153	-2	103	volvo	gas	turbo	four	sedan	rwd	front	104.3	⋯	130	mpfi	3.62	3.15	7.5	162	5100	17	22	18420
154	-1	74	volvo	gas	turbo	four	wagon	rwd	front	104.3	⋯	130	mpfi	3.62	3.15	7.5	162	5100	17	22	18950
156	-1	95	volvo	gas	turbo	four	sedan	rwd	front	109.1	⋯	141	mpfi	3.78	3.15	8.7	160	5300	19	25	19045

	symboling	normalized_losses	maker	fuel	aspiration	doors	body	wheels	engine_location	wheel_base	⋯	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
9	2	121	chevrolet	gas	std	two	hatchback	fwd	front	88.4	⋯	61	2bbl	2.91	3.03	9.5	48	5100	47	53	5151
20	2	137	honda	gas	std	two	hatchback	fwd	front	86.6	⋯	92	1bbl	2.91	3.41	9.6	58	4800	49	54	6479
61	1	128	nissan	diesel	std	two	sedan	fwd	front	94.5	⋯	103	idi	2.99	3.47	21.9	55	4800	45	50	7099
118	0	91	toyota	diesel	std	four	sedan	fwd	front	95.7	⋯	110	idi	3.27	3.35	22.5	56	4500	34	36	7898
119	0	91	toyota	diesel	std	four	hatchback	fwd	front	95.7	⋯	110	idi	3.27	3.35	22.5	56	4500	38	47	7788
141	2	122	volkswagen	diesel	std	two	sedan	fwd	front	97.3	⋯	97	idi	3.01	3.40	23.0	52	4800	37	46	7775
143	2	94	volkswagen	diesel	std	four	sedan	fwd	front	97.3	⋯	97	idi	3.01	3.40	23.0	52	4800	37	46	7995

	symboling	normalized_losses	maker	fuel	aspiration	doors	body	wheels	engine_location	wheel_base	⋯	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
18	-1	110	dodge	gas	std	four	wagon	fwd	front	103.3	⋯	122	2bbl	3.34	3.46	8.5	88	5000	24	30	8921
26	0	78	honda	gas	std	four	wagon	fwd	front	96.5	⋯	92	1bbl	2.92	3.41	9.2	76	6000	30	34	7295
46	-1	93	mercedes-benz	diesel	turbo	four	wagon	rwd	front	110.0	⋯	183	idi	3.58	3.64	21.5	123	4350	22	25	28248
64	1	103	nissan	gas	std	four	wagon	fwd	front	94.5	⋯	97	2bbl	3.15	3.29	9.4	69	5200	31	37	7349
68	1	103	nissan	gas	std	four	wagon	fwd	front	94.5	⋯	97	2bbl	3.15	3.29	9.4	69	5200	31	37	7999
73	0	108	nissan	gas	std	four	wagon	fwd	front	100.4	⋯	181	mpfi	3.43	3.27	9.0	152	5200	17	22	14399

[DataMining] 3. 데이터 전처리 (0)	2017.09.25
[DataMining] 2. 시각화를 통한 탐색적 데이터 분석(EDA) (0)	2017.09.16
#12. 고급 시각화 (0)	2016.07.13
#11. 기초 시각화 [ R 내장 함수 ] (0)	2016.07.12
#10 .기술통계 [ 예제 ] (0)	2016.07.12

num	name
36925	abc
36941	def
12345	abd

num	name
36925	abc
36941	def

	num	name
1	36925	abc
3	12345	abd

fuel	mean(city_mpg)
diesel	30.40000
gas	26.11806

R Programming install & basic¶

R & R-Studio install¶

1. Basic Calculation¶

2. Scala(스칼라)¶

3. Vector(벡터)¶

4. Variable(변수)¶

4-1 Assign Variable¶

4-2 Delete Variable¶

4-3 Calculate Variable¶

5. Indexing¶

6. Function (함수)¶

7. List (리스트)¶

8. DataFrame(데이터프레임)¶

Size of data¶

Row indexing¶

Columns indexing¶

Compare values in data¶

논리¶

데이터 저장¶

클립보드에 복사¶

Statistics¶

Handling data¶

NA¶

자주 쓰이는 데이터 다루기 관련 함수¶

문자열 관련 함수¶

dplyr¶

Install Package¶

Load Package¶

Load data¶

Summarise¶

Chain ( %>% )¶

Filter¶

Grouping¶

컬럼더하기(mutate)¶

컬럼 선택¶

행 선택¶

정렬¶

Join¶

semi_join()¶

'BIGDATA > R' 카테고리의 다른 글

티스토리툴바