Launch an R script using github actions

sitemap_scraping.R
main.yml

Last updated


Last updated
#Load library
library(tidyverse)
library(rvest)
# declare XML sitemap url
url <- 'https://www.rforseo.com/sitemap.xml'
# grab html
url_html <- read_html(url)
# Select all the <loc>'s
# and count them
nbr_url <- url_html %>%
html_nodes("loc") %>%
length()
# create a new row of data, with todayd's date and urls number
row <- data.frame(Sys.Date(), nbr_url)
# append at the end of the csv the new data
write_csv(row,paste0('data/xml_url_count.csv'),append = T) name: sitemap_scraping
# Controls when the action will run.
on:
schedule:
- cron: '0 13 * * *'
jobs:
autoscrape:
# The type of runner that the job will run on
runs-on: macos-latest
# Load repo and install R
steps:
- uses: actions/checkout@master
- uses: r-lib/actions/setup-r@master
# Set-up R
- name: Install packages
run: |
R -e 'install.packages("tidyverse")'
R -e 'install.packages("rvest")'
# Run R script
- name: Scrape
run: Rscript sitemap_scraping.R
# Add new files in data folder, commit along with other modified files, push
- name: Commit files
run: |
git config --local user.name actions-user
git config --local user.email "[email protected]"
git add data/*
git commit -am "GH ACTION Headlines $(date)"
git push origin main
env:
REPO_KEY: ${{secrets.GITHUB_TOKEN}}
username: github-actions