library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
trumpton <- read_delim("trumpton.txt")
## Rows: 7 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (2): LastName, FirstName
## dbl (3): Age, Weight, Height
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
trumpton
trumpton %>%
ggplot(aes(x=Age, y=Weight)) +
geom_point()
Generally, the older people in the trumpton dataset are heavier.
Find the person who weighs more than 100kg.
trumpton %>%
filter(Weight > 100) %>%
select(FirstName, LastName)
trumpton %>%
ggplot(aes(x=LastName, y=Age)) +
geom_col(fill="magenta2", colour="black")
child <- read_delim("Child_Variants.csv")
## Rows: 25822 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): CHR, dbSNP, REF, ALT, GENE, ENST
## dbl (5): POS, QUAL, MutantReads, COVERAGE, MutantReadPercent
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
child
Select all of the rows (variants) which occur in the first 5Mbp of Chr X.
x_filtered <- child %>%
filter(CHR=="X") %>%
filter(POS <= 5000000)
x_filtered
x_filtered %>%
ggplot(aes(x=MutantReads, y=COVERAGE, colour=QUAL)) +
geom_point()
The low quality calls have low coverage and a small number of mutant reads.
child %>%
filter(dbSNP != ".") %>%
ggplot(aes(x=POS, y=COVERAGE)) +
geom_line(colour="grey", size=1)
Remove any variants with a coverage > 200
child %>%
filter(dbSNP != ".") %>%
filter(COVERAGE <= 200) %>%
ggplot(aes(x=POS, y=COVERAGE)) +
geom_line(colour="grey", size=1)