# store passwords
library(config)
# data wrangling
library(plyr)
library(tidyverse)
library(purrr)
library(glue)
# google APIs
library(googleway)
library(googledrive)
# webscraping
library(rvest)
# send emails
library(mailR)
library(XML)
library(RCurl)
# html widgets
library(DT)
library(leaflet)
How to Organise a Wedding Using R: Google Search API, Google Drive, Web Scraping, and Automated Emails
This post first appeared on my {blogdown} site, now archived. After a recent request from someone looking to use the same approach, I decided to bring it back on my current Quarto site, unchanged from the original.
See Section 1.4 for my latest notes.
Planning a wedding is a challenge. For R users, we have one advantage: automation. One of the trickiest parts is finding a venue. There are plenty, but many will already be booked for your date. Here is how I created a list of potential venues with the Google Search API, stored it on Google Drive, scraped emails, and sent messages, all with R.
Setup
<- config::get("gmail_wedding")
gmail_wedding <- config::get("google_cloud") google_key
Creating a venue list with the Google Places API
Since R offers a package for almost everything, I used {googleway} to pull venue data from Google Places. This API includes several services like Directions, Geolocation and Places. To use it, you must register a card on Google Cloud to get an API key. Used moderately, this is free. I found this Stack Overflow answer helpful when learning googleway.
Targeted cities
I wanted my wedding in the Auvergne-Rhone-Alpes region of France. A single search term like “Auvergne-Rhone-Alpes” might not catch all options, so I built a loop that searches by city. My list of cities comes from their department codes (e.g. the department codes 01, 07, 26, 38, 69, 73 and 74 correspond to Ain, Ardèche, Drôme, Isère, Rhône, Savoie and Haute-Savoie in France).
<- c(01, 07, 26, 38, 69, 73, 74)
dept_target #
<- read.csv(
list_city file = url("https://sql.sh/ressources/sql-villes-france/villes_france.csv"),
header = FALSE
%>%
) ::select(dept = V2, city = V5, pop2010 = V15) %>%
dplyr::mutate(city = as.character(city)) %>%
dplyr::filter(dept %in% dept_target) %>% # filter by target departments
dplyr::filter(pop2010 > 5000) %>% # filter by city population size
dplyr::use_series(city) magrittr
Querying Google Places
Once the cities are ready, I run a loop querying Google Places for each one. If a next page token is found, the script fetches results from subsequent pages until all results are retrieved.
<- NULL
df_places_final for (city in list_city) {
# print(city)
<- googleway::google_places(
df_places search_string = paste("mariage", city, "france"),
key = google_key$key
# replace by your Google API key
)
if (length(df_places$results) == 0) next
<- df_places$results
df_places_results <- df_places_results$geometry$location
geometry <- df_places_results[, c("name", "formatted_address", "place_id", "types")]
df_places_results <- cbind(df_places_results, geometry)
df_places_results
while (!is.null(df_places$next_page_token)) {
<- googleway::google_places(
df_places search_string = paste("mariage", city, "france"),
page_token = df_places$next_page_token,
key = google_key$key
)
<- df_places$results
df_places_next
if (length(df_places_next) > 0) {
<- df_places_next$geometry$location
geometry <- df_places_next[, c("name", "formatted_address", "place_id", "types")]
df_places_next <- cbind(df_places_next, geometry)
df_places_next <- rbind(df_places_results, df_places_next)
df_places_results
}Sys.sleep(2) # time to not overload the google API
}<- rbind(df_places_final, df_places_results)
df_places_final }
The raw results include caterers, photographers and shops. I filtered them to keep only venues such as castles, mansions and estates. Duplicates are also removed.
<- df_places_final %>%
df_places_filtered ::filter(grepl("castle|chateau|domaine|manoir|ferme", name, ignore.case = TRUE)) %>%
dplyr::distinct(place_id, .keep_all = TRUE) dplyr
With {leaflet}, I visualised the locations on a map.
leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng = df_places_filtered$lng, lat = df_places_filtered$lat, popup = df_places_filtered$name)
Getting venue websites
The first API call does not return website URLs, but google_place_details()
does. Using {purrr}, I applied a small function to fetch them.
<- function(place_id) {
get_website # print(place_id)
<- as.character(place_id)
place_id <- googleway::google_place_details(place_id = place_id, key = google_key$key)
dat <- ifelse(is.null(dat$result$website), "no_website", dat$result$website)
res return(res)
}
<- df_places_filtered$place_id %>%
website_list ::map(get_website) %>%
purrrunlist()
$website <- website_list df_places_filtered
I removed venues without websites and cleaned up the remaining URLs for later use in web scraping.
<- df_places_filtered %>%
df_places_filtered ::filter(website != "no_website") %>%
dplyr::mutate(website = gsub("\\,.*", "", website)) %>%
dplyr::mutate(website = gsub("com/fr", "com", website)) %>%
dplyr::mutate(website_contact = paste0(website, "contact")) dplyr
The list of venues is now “clean” we can start the web scraping to obtain venues’ emails.
Scraping websites for emails
Google does not provide emails, so I scraped the websites using {rvest}. Most venues list emails on their home or contact page. A simple function handles this, with tryCatch()
to skip broken URLs.
<- function(website) {
extract_email # print(website)
<- tryCatch(xml2::read_html(website), error = function(e) print("url_error"))
url_test if (url_test == "url_error") {
return(NA)
else {
} <- xml2::read_html(website) %>%
text_web ::html_text()
rvest<- unlist(regmatches(text_web, gregexpr("([_a-z0-9-]+(\\.[_a-z0-9-]+)*@[a-z0-9-]+(\\.[a-z0-9-]+)*(\\.[a-z]{2,4}))", text_web)))
email_text <- gsub("\n", "", email_text)
email_text <- gsub(" ", "", email_text)
email_text return(email_text[1])
}
}# web scraping home page
<- df_places_filtered$website %>%
email_list ::map(extract_email) %>%
purrrunlist()
$email <- email_list
df_places_filtered# web scraping contact page
<- df_places_filtered$website_contact %>%
email_list ::map(extract_email) %>%
purrrunlist()
$email_contact <- email_list
df_places_filtered# merge email and email_contact
<- df_places_filtered %>%
df_places_filtered ::mutate(email = ifelse(is.na(email), email_contact, email)) %>%
dplyr::filter(!is.na(email)) %>%
dplyr::select(-email_contact, -types) dplyr
%>%
df_places_filtered ::select(name, website) %>%
dplyr::datatable(options = list(pageLength = 5)) DT
We now have a list of venues to contact.
Google Drive and automated emails
It helps to create a separate email account just for wedding planning. Google makes this easy and also offers Google Drive for storing documents. With the {googledrive} package, sharing and updating files with your partner is straightforward (see https://googledrive.tidyverse.org/index.html for some information about {googledrive}).
Uploading the list to Google Drive
First, save the data frame locally, then upload.
# first save the list of venues local
write.csv(df_places_filtered, "list_venues.csv", row.names = FALSE)
# upload to google drive
drive_upload(media = "list_venues.csv", name = "list_venues", type = "spreadsheet")
Downloading from Google Drive
You can then download and reload the file when needed.
# select file id from google drive
<- drive_find() %>%
list_venues_id ::filter(name == "list_venues") %>%
dplyr::use_series(id)
magrittr# download list of venues locally
drive_download(as_id(list_venues_id), overwrite = TRUE, type = "csv")
# read local list of venues file
<- read.csv("list_venues.csv", row.names = NULL) %>%
list_venues ::mutate_if(is.factor, as.character) dplyr
Sending emails
With the list ready, I sent emails in a simple loop. The script extracts each venue name and email and sends a standard message asking about availability. Make sure to allow less secure apps in Gmail settings.
<- list_venues
email_to_send #
# Email to send
<- "<p>Dear owner/manager of '{name}', <br><br>We are contacting you because we would like to organise our wedding <b>Sunday 9 of June 2019</b> and your plac would be amazing for it.<br><br>That's why we would like to know if your venue '{name}' is available <b>Sunday 9 of June 2019</b>?</b><br><br>Best regards,<br><br>YOUR NAMES</p>"
email_text #
for (i in 1:nrow(email_to_send)) {
<- email_to_send[i, ]
df <- as.character(df$name)
name ################################
send.mail(
from = gmail_wedding$email,
to = as.character(df$email),
subject = "Availability for a wedding on the 09/06/2019",
body = glue::glue(email_text),
smtp = list(
host.name = "smtp.gmail.com", port = 465,
user.name = gmail_wedding$email,
passwd = gmail_wedding$passwd, ssl = TRUE
),authenticate = TRUE,
send = TRUE,
html = TRUE
) }
After sending, I updated the contact date in the data to avoid duplicates.
<- email_to_send %>%
email_to_send ::mutate(date_contact = as.character(as.Date(Sys.Date()))) %>%
dplyr::mutate(type_contact = "automatic email")
dplyr# Checks in case of different batch of email sending
<- match(list_venues$name, email_to_send$name, nomatch = 0L)
id $date_contact[id != 0] <- email_to_send$date_contact[id]
list_venues$type_contact[id != 0] <- email_to_send$type_contact[id]
list_venues# Write data on local and Upload data from local to google drive
write.csv(list_venues, "ist_venues.csv", row.names = FALSE)
drive_update(file = "list_venues", media = "list_venues.csv")
I hope these scripts help you find the perfect venue. Best of luck with your planning.
Updates and Comments
Since I wrote this code in 2018, things have changed. I would rely less on for loops and use {purrr} map or walk functions instead.
To send batch emails with Gmail, I now use {blastula} instead of {mailR}.