## ----echo=FALSE---------------------------------------------------------------
knitr::opts_chunk$set(comment = "#>", collapse = TRUE)
## -----------------------------------------------------------------------------
library(rvest)
## -----------------------------------------------------------------------------
html <- read_html("http://rvest.tidyverse.org/")
class(html)
## -----------------------------------------------------------------------------
html <- minimal_html("
This is a paragraph
")
html
## -----------------------------------------------------------------------------
html <- minimal_html("
This is a heading
This is a paragraph
This is an important paragraph
")
## -----------------------------------------------------------------------------
html |> html_element("h1")
html |> html_elements("p")
html |> html_elements(".important")
html |> html_elements("#first")
## -----------------------------------------------------------------------------
html <- minimal_html("
- apple & pear
- banana
- pineapple
")
html |>
html_elements("li") |>
html_text2()
## -----------------------------------------------------------------------------
html |>
html_elements("li") |>
html_text()
## -----------------------------------------------------------------------------
html <- minimal_html("
This is
a
paragraph.
This is another paragraph.
It has two sentences.
")
## -----------------------------------------------------------------------------
html |>
html_element("body") |>
html_text2() |>
cat()
## -----------------------------------------------------------------------------
html |>
html_element("body") |>
html_text() |>
cat()
## -----------------------------------------------------------------------------
html <- minimal_html("
cats
")
## -----------------------------------------------------------------------------
html |>
html_elements("a") |>
html_attr("href")
html |>
html_elements("img") |>
html_attr("src")
## -----------------------------------------------------------------------------
html |>
html_elements("img") |>
html_attr("width")
html |>
html_elements("img") |>
html_attr("width") |>
as.integer()
## -----------------------------------------------------------------------------
html <- minimal_html("
x |
y |
1.5 |
2.7 |
4.9 |
1.3 |
7.2 |
8.1 |
")
## -----------------------------------------------------------------------------
html |>
html_node("table") |>
html_table()
## -----------------------------------------------------------------------------
html <- minimal_html("
- C-3PO is a droid that weighs 167 kg
- R2-D2 is a droid that weighs 96 kg
- Yoda weighs 66 kg
- R4-P17 is a droid
")
## -----------------------------------------------------------------------------
html |> html_elements("b") |> html_text2()
html |> html_elements("i") |> html_text2()
html |> html_elements(".weight") |> html_text2()
## -----------------------------------------------------------------------------
characters <- html |> html_elements("li")
characters |> html_element("b") |> html_text2()
characters |> html_element("i") |> html_text2()
characters |> html_element(".weight") |> html_text2()
## -----------------------------------------------------------------------------
data.frame(
name = characters |> html_element("b") |> html_text2(),
species = characters |> html_element("i") |> html_text2(),
weight = characters |> html_element(".weight") |> html_text2()
)