Grab keywords search volume from DataForSeo API using R'

What is DataForSeo?

DataForSEO is an all-in-one paid API that provides SEO data.

it allows, for example, to retrieve keywords ranking and keyword search volume directly from Google Ads or Bings Ads.

How to use the DataForSeo API?

>> Create an accout <<

When you create your account, you are given a few dollars to test the service.

API Authentication

Grab your credentials can be found inside your personal dashboard

# We need to load a few packages to run this script 
#
# If you don't have them installed yet, 
# you need to run the dedicated instruction: install.packages('demoPackageName')

# Package for working with HTTP requests
library(httr)

# Package for working with JSON files
library(jsonlite)


# here you will the values with your own credentials 
# They can be found here https://app.dataforseo.com/api-dashboard

username <- "APILOGIN"
password <- "APIPWD"

# This will create a header that we'll use to authenticate ourselves each time we use the API. 
# This code block needs to be kept private as anyone could use your credits.

headers = c(
  `Authorization` = paste('Basic',base64_enc(paste0(username,":",password))),
  `Content-Type` = 'application/json'
)

Request Google Ads Search Volume for a keyword

Before making the script run over a list, we'll run it for the keyword and break down each steps

# here is the list of the parameters of our request
data = paste0('[{"device":"all", "search_partners":false, "keywords":["','R for SEO',
              '"], "location_code":2840, "language_code":"en", "sort_by":"search_volume"}]')


# keyword: R for SEO
# device: all,  we want desktop, tablet and mobile search volume
# search_partners:false, because If we are doing SEO, we don't care about Google Ads being displayed outside of Google Search.
# location_code":2840 is for the united states
# all the location codes can be downloaded from this page https://docs.dataforseo.com/v3/serp/google/locations/?bash
# language_code:en, we want English
# sort_by: search_volume, this only matters when requesting several keywords at the same time.

# This is our data request
res <- httr::POST(url = 'https://api.dataforseo.com/v3/keywords_data/google_ads/search_volume/live', httr::add_headers(.headers=headers), body = data)
# the httr:: is here to be sure to use the function inside the httr package

# This will transcribe the reply into characters
res_text <- httr::content(res, "text")

# This will transcribe between JSON format data to an easy to read R object.
res_json <- jsonlite::fromJSON(res_text, flatten = TRUE)

Search volume can be displayed by using this cmd in the terminal.

res_json[["tasks"]][["result"]][[1]][["search_volume"]]

If the reply is NULL something is wrong and you might want to explore the full API response by displaying it in full like this:

View(res_json)

If you are happy with the results you can now save the value.

search_volume <- res_json[["tasks"]][["result"]][[1]][["search_volume"]]

I would suggest to also store the 'competition', 'competition_index', 'low_top_of_page_bid', 'high_top_of_page_bid'.

Better to have more data, its up to you to use it or not later.

competition <- res_json[["tasks"]][["result"]][[1]][["competition"]]
competition_index <- res_json[["tasks"]][["result"]][[1]][["competition_index"]]
klow_top_of_page_bid <- res_json[["tasks"]][["result"]][[1]][["low_top_of_page_bid"]]
high_top_of_page_bid <- res_json[["tasks"]][["result"]][[1]][["high_top_of_page_bid"]]

Now that the script is validated we can run the script run over the full list

Request Google Ads Search Volume for a batch of keywords

The first step is to load our keywords list

# This will prompt a file selector which can be a text file or a CSV file,
# as long as, if its a csv, there is keywords in the first one column
kwds <- read.csv(file.choose())

# This will remove duplicate values
kwds <- unique(kwds)

# We will rename the first column name for convenience
# and to make the rest of the R script easier to read
colnames(kwds)[1] <- "Kwd"

Then we run a keyword request through a loop

for(i in 1:nrow(kwds)) {       # for-loop over rows
  
  # if the search volume is already defined we skip to next keyword
  if(is.null(kwds[i,"search_volume"]) || is.na(kwds[i,"search_volume"])){


  data = paste0('[{"device":"all", "search_partners":false, "keywords":["',kwds[i,"Kwd"],
'"], "location_code":2840, "language_code":"en", "sort_by":"search_volume"}]')

# We don't want the script to stop if one query fails.
# So we are using a tryCatch function to avoid that
tryCatch(
  expr = {
    
    
    res <- httr::POST(url = 'https://api.dataforseo.com/v3/keywords_data/google_ads/search_volume/live', httr::add_headers(.headers=headers), body = data)
    res_text <- httr::content(res, "text")
    res_json <- jsonlite::fromJSON(res_text, flatten = TRUE)
    
    kwds[i,"search_volume"] <- res_json[["tasks"]][["result"]][[1]][["search_volume"]]
    kwds[i,"competition"] <- res_json[["tasks"]][["result"]][[1]][["competition"]]
    kwds[i,"competition_index"] <- res_json[["tasks"]][["result"]][[1]][["competition_index"]]
    kwds[i,"low_top_of_page_bid"] <- res_json[["tasks"]][["result"]][[1]][["low_top_of_page_bid"]]
    kwds[i,"high_top_of_page_bid"] <- res_json[["tasks"]][["result"]][[1]][["high_top_of_page_bid"]]
    
    message(i, " ",kwds[i,"Kwd"], " ok")
    
    # (Optional)
    # make the script sleep between each request
    # we don't want to go over the API hit rate limit
    Sys.sleep(2)
    
    # (Optional)
    # save on the hard drive the results, for the paranoid
    write.csv(kwds, "kwds.csv")
  },
  error = function(e){ 
    # (Optional)
    # Do this if an error is caught...
    message(i, " ",kwds[i,"Kwd"], " error")
  },
  warning = function(w){
    # (Optional)
    # Do this if an warning is caught...
    message(i, " ",kwds[i,"Kwd"], " warning")
  },
  finally = {
    # (Optional)
    # Do this at the end before quitting the tryCatch structure...
  }
)


}
}

Request Google Ads Search Volume for a big batch of keywords

⚠️ DataForSEO is actually charging per request. So if you have lots of keywords to check will be much cheaper to group keywords.

This is the script that will help you request queries in batches of 100.

pas <- 100
i <- 1

# we prepare the kwds to receive the data by adding the proper column
kwds[ ,c("spell", "location_code", "language_code", "search_partners", "competition", "competition_index","search_volume", "low_top_of_page_bid", "high_top_of_page_bid")] <- NA

for(i in seq(from=1, to=nrow(kwds), by=pas)){       # for-loop over rows
  
  if(is.null(kwds[i,"search_volume"]) || is.na(kwds[i,"search_volume"])){
    
    # building the list of kwd to request
    data <- paste0('[{"device":"all", "search_partners":false, "keywords":["', kwds[i,"Kwd"])
    for (idkwd in 1:(pas-1)) { 
      if(!is.null(kwds[i+idkwd,"Kwd"]) && !is.na(kwds[i+idkwd,"Kwd"])){
        data <- paste0(data,'", "',kwds[i+idkwd,"Kwd"]) 
      }
    }
    data <- paste0(data, '"], "location_code":2840, "language_code":"en", "sort_by":"search_volume"}]')
    
    
    
    tryCatch(
      expr = {
        
        
        res <- httr::POST(url = 'https://api.dataforseo.com/v3/keywords_data/google_ads/search_volume/live', httr::add_headers(.headers=headers), body = data)
        res_text <- httr::content(res, "text")
        res_json <- jsonlite::fromJSON(res_text, flatten = TRUE)
        
        # cleaning results
        batch <- as.data.frame(do.call(cbind, res_json[["tasks"]][["result"]][[1]]))
        batch <- data.frame(lapply(batch, as.character), stringsAsFactors=FALSE)
        data.table::setnames(batch, "keyword", "Kwd")
        batch$monthly_searches <- NULL
        
        # inserting result inside our main data frame kwds
        kwds[match(batch$Kwd, kwds$Kwd), ] <- batch
        
        message(i, " ",kwds[i,"Kwd"], " OK")

        # (Optional)
        # make the script sleep between each request
        # we don't want to go over the API hit rate limit
        Sys.sleep(5)
        
        
        # (Optional)
        # save on the hard drive the results, for the paranoid
        write.csv(kwds, "kwds.csv")
      },
      error = function(e){ 
        # (Optional)
        # Do this if an error is caught...
        message(i, " ",kwds[i,"Kwd"], " error")
        break
      },
      warning = function(w){
        # (Optional)
        # Do this if an warning is caught...
        message(i, " ",kwds[i,"Kwd"], " warning")
      },
      finally = {
        # (Optional)
        # Do this at the end before quitting the tryCatch structure...
      }
    )
    
    
  }
}

Last updated