## ----echo=FALSE--------------------------------------------------------------- knitr::opts_chunk$set(comment = "#>", collapse = TRUE) ## ----------------------------------------------------------------------------- library(rvest) ## ----------------------------------------------------------------------------- html <- read_html("http://rvest.tidyverse.org/") class(html) ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a paragraph

") html ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a heading

This is a paragraph

This is an important paragraph

") ## ----------------------------------------------------------------------------- html |> html_element("h1") html |> html_elements("p") html |> html_elements(".important") html |> html_elements("#first") ## ----------------------------------------------------------------------------- html <- minimal_html("
  1. apple & pear
  2. banana
  3. pineapple
") html |> html_elements("li") |> html_text2() ## ----------------------------------------------------------------------------- html |> html_elements("li") |> html_text() ## ----------------------------------------------------------------------------- html <- minimal_html("

This is a paragraph.

This is another paragraph. It has two sentences.

") ## ----------------------------------------------------------------------------- html |> html_element("body") |> html_text2() |> cat() ## ----------------------------------------------------------------------------- html |> html_element("body") |> html_text() |> cat() ## ----------------------------------------------------------------------------- html <- minimal_html("

cats

") ## ----------------------------------------------------------------------------- html |> html_elements("a") |> html_attr("href") html |> html_elements("img") |> html_attr("src") ## ----------------------------------------------------------------------------- html |> html_elements("img") |> html_attr("width") html |> html_elements("img") |> html_attr("width") |> as.integer() ## ----------------------------------------------------------------------------- html <- minimal_html("
x y
1.5 2.7
4.9 1.3
7.2 8.1
") ## ----------------------------------------------------------------------------- html |> html_node("table") |> html_table() ## ----------------------------------------------------------------------------- html <- minimal_html(" ") ## ----------------------------------------------------------------------------- html |> html_elements("b") |> html_text2() html |> html_elements("i") |> html_text2() html |> html_elements(".weight") |> html_text2() ## ----------------------------------------------------------------------------- characters <- html |> html_elements("li") characters |> html_element("b") |> html_text2() characters |> html_element("i") |> html_text2() characters |> html_element(".weight") |> html_text2() ## ----------------------------------------------------------------------------- data.frame( name = characters |> html_element("b") |> html_text2(), species = characters |> html_element("i") |> html_text2(), weight = characters |> html_element(".weight") |> html_text2() )