Hourly percentage change for top 100 cryptos

In this post, I use the httr package to access bitcoin’s price chart from CoinGecko’s crypto market API. The rvest package is used to scrape the hourly percentage change for the top 100 cryptos (by market cap) from CG’s website. The cron task scheduler (macOS) is then set to run the web scraping script every 15 minutes and write the data to a spread sheet, giving me the percentage change on 15 minute intervals.

Once enough data has been collected, I plan to use it for predicting the percentage change for Bitcoin. In order for this to work, the task scheduler and web scraping script will need to run in the cloud. Ideally, this post will be turned into a Shiny app so that it can be used for real time analysis and price prediction.

Currently this post shows several ways to view the hourly percentage changes as a static snapshot. I’ve categorized the hourly percentage change as positive and negative, as well as creating a more nuanced scale with the following categories: ice age (< -6%), freezing (-4-6%), cold (-2-4%), chilly (-0-2%), neutral (0%), luke warm (0-2%), warm (2-4%), toasty (4-6%), hot (>6%).

library(tidyverse)
library(tidyr)
library(plotly)
library(lubridate)
library(httr)
library(jsonlite)
library(viridis)
library(scales)
library(rvest)
library(cronR)
library(patchwork)
theme_set(theme_light())
url_cg1 <- "https://www.coingecko.com/en"
url_cg2 <- "https://www.coingecko.com/en?page=2"

names <- read_html(url_cg1) %>% 
  html_nodes('.font-bold.tw-justify-between') %>% 
  html_text() %>% 
  as_tibble()

names2 <- read_html(url_cg2) %>% 
  html_nodes('.font-bold.tw-justify-between') %>% 
  html_text() %>% 
  as_tibble()

hourly <- read_html(url_cg1) %>% 
  html_nodes('.change1h span') %>% 
  html_text() %>% 
  as_tibble() 
 
hourly2 <- read_html(url_cg2) %>% 
  html_nodes('.change1h span') %>% 
  html_text() %>% 
  as_tibble()

hourly_df <- bind_cols(names, hourly) %>% 
  rename("coin" = `value...1`, "hourly" = `value...2`) %>% 
  mutate(coin = str_remove_all(coin, "\n"),
         hourly = parse_number(hourly))

# hourly_df2 <- bind_cols(names2, hourly2) %>% 
#   rename("coin" = `value...1`, "hourly" = `value...2`) %>% 
#   mutate(coin = str_remove_all(coin, "\n"),
#          hourly = parse_number(hourly))
# 
# hourly_df200 <- bind_rows(hourly_df, hourly_df2)

granular_hourly <- hourly_df %>% 
  mutate(change = case_when(hourly > 0 ~ "positive",
                            hourly <= 0 ~ "negative"),
         granular = case_when((hourly > 6) ~ "hot (>6%)",
                              (hourly > 4) & (hourly <= 6) ~ "toasty (4-6%)",
                              (hourly > 2) & (hourly <= 4) ~ "warm (2-4%)",
                              (hourly > 0) & (hourly <= 2) ~ "luke warm (0-2%)",
                              (hourly == 0) ~ "neutral (0%)",
                              (hourly <= 0) & (hourly >= -2)  ~ "chilly (-0-2%)",
                              (hourly <= -2) & (hourly > -4) ~ "cold (-2-4%)",
                              (hourly <= -4) &(hourly > -6) ~ "freezing (-4-6%)",
                              TRUE ~ "ice age (< -6%)"),
         granular = factor(granular, 
                           levels = c("ice age (< -6%)", "freezing (-4-6%)", "cold (-2-4%)", 
                                      "chilly (-0-2%)", "neutral (0%)", "luke warm (0-2%)", "warm (2-4%)", 
                                      "toasty (4-6%)", "hot (>6%)")),
         time_stamp =  as_datetime(Sys.time()))


# write_tsv(
#   granular_hourly,
#   'granular_15min.tsv',
#   na = "NA",
#   col_names = TRUE,
#   append = TRUE
# )
library(viridis)
granular_hourly %>% 
  mutate(granular = fct_rev(granular)) %>% 
  group_by(change, granular) %>% 
  count(change) %>% 
  ggplot(aes(change, n, fill = granular)) +
  geom_col() +
  #scale_fill_viridis_d(option = "viridis") +
  #scale_fill_discrete(guide = guide_legend(reverse = TRUE)) +
  labs(title =  paste0("Hourly change for top 100 cryptos: ", format(Sys.time(), "%B %d, %Y @ %I:%M %p", tz = "PST8PDT")),
       fill = "hourly % change",
       y = "", x = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

#ggsave("hrly-barchart.png")
library(hpackedbubble)

hpackedbubble(granular_hourly$granular, 
              granular_hourly$coin, granular_hourly$hourly,
              title = "Hourly percentage change",
              titleColor = "#F0FFF0",
              pointFormat = "<br>{point.name} </br> Volume: {point.value}",
              dataLabelsFilter = 100,
              packedbubbleMinSize = "50%",
              packedbubbleMaxSize = "250%",
              theme = "darkunica",
              packedbubbleZMin = 0,
              packedbubbleZmax = 10000, split = 0,
              gravitational = 0.02,
              parentNodeLimit = 1,
              dragBetweenSeries = 0,
              width = "100%",
              dataLabelsColor = "white")
pct_change_plot <- granular_hourly %>% 
  count(granular) %>% 
  ggplot(aes(granular, n, fill = granular)) +
  geom_col() +
  labs(title = paste0("Hourly percent change for top 100 cryptos: ", format(Sys.time(), "%B %d @ %I:%M %p", tz = "PST8PDT")),
       x = "", y = "Coin count") + 
  theme(legend.position = "none")

pct_change_plot

pct_change_btc_eth_removed <- granular_hourly %>% 
  filter(!coin %in% c("Bitcoin", "Ethereum")) %>% 
  count(granular) %>% 
  ggplot(aes(granular, n, fill = granular)) +
  geom_col() +
  labs(title = "Hourly percent change for top 98 cryptos",
       subtitle = "Bitcoin and Ethereum removed",
       x = "", y = "Coin count") + 
  theme(legend.position = "none") 

pct_change_btc_eth_removed

#ggsave("pct_change_plot.png")
api_ohlc <- "https://api.coingecko.com/api/v3/coins/bitcoin/ohlc?vs_currency=usd&days=30"

ohlc <- GET(api_ohlc)
ohlc <- content(ohlc, as = 'text')

ohlc_df <- fromJSON(ohlc, flatten = TRUE) %>% 
  as_tibble()

names(ohlc_df) <- c('time', 'open', 'high', 'low', 'close')


ohlc_df %>% 
  mutate(time = time/1000,
         time = as_datetime(time)) %>% 
  select(time, open) %>% 
  arrange(time) %>% 
  ggplot(aes(time, open, color = open)) +
  geom_line() +
  theme(legend.position = "none") +
  labs(title = "Bitcoin monthly price chart",
       x = "", y = "Value (USD)")

btc_url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=1"

get_historical <- function(url) {
    url %>% 
    GET() %>% 
    content(., as = "text") %>% 
    fromJSON(., flatten = TRUE)  
}

library(hms)
#test function
btc_chart <- get_historical(btc_url) %>% 
  do.call(data.frame, .) %>%
          as_tibble() %>% 
  select('date' = prices.1, 'price' = prices.2,
         'market_cap' = market_caps.2, 'volume' = total_volumes.2) %>% 
  mutate(date = date/1000,
         date = as.POSIXct(date, origin="1970-01-01"),
         time = format(date, format = '%H:%M:%S', tz = "PST8PDT") %>% as_hms())

btc_chart %>% 
  ggplot(aes(date, price, color = volume, group = 1)) +
  geom_line() +
  labs(title = "24hr Bitcoin price chart") +
  scale_x_datetime(
    name = "date",
    labels = label_date("%I:%M %p", tz = "PST8PDT"))

q_minute <- btc_chart %>% 
  filter(date >= '2021-06-26 14:54:57') 

btc_minute_plot <- q_minute %>% 
  ggplot(aes(date, price, color = volume, group = 1)) +
  geom_line() +
  labs(title = paste0("Bitcoin price chart for ", format(Sys.time(), "%B %d %Y")), x = "", y = "") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  scale_x_datetime(
    name = "",
    labels = label_date("%I:%M %p", tz = "PST8PDT")) +
  scale_y_continuous(labels = scales::dollar) 

#ggsave("btc-hrly-pct.png", btc_minute_plot, bg = "transparent")

granular_15min <- read_tsv("granular_15min.tsv") %>% 
  mutate(time_stamp = str_remove(time_stamp, "Z"),
    time_stamp = ymd_hms(time_stamp)) %>% 
  filter(coin != "coin") 

bar_plot <- granular_15min  %>% 
  group_by(time_stamp, granular, change) %>% 
  count(change) %>% 
  ggplot(aes(time_stamp, n, fill = granular)) +
  geom_col() +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  scale_x_datetime(
    name = "time",
    labels = label_date("%I:%M %p", tz = "PST8PDT")) +
  labs(y = NULL)

#ggsave("alt100-15min-pct.png", bar_plot, bg = "transparent")

btc_minute_plot / bar_plot

Gabriel Mednick, PhD
Gabriel Mednick, PhD

Data scientist, biochemist, computational biologist, educator, life enthusiast