4 min read

Assignment A02: TIDYVERSE

Adidas Vs Nike

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)

AN <- read_csv("Adidas Vs Nike.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Product Name` = col_character(),
##   `Product ID` = col_character(),
##   `Listing Price` = col_double(),
##   `Sale Price` = col_double(),
##   Discount = col_double(),
##   Brand = col_character(),
##   Description = col_character(),
##   Rating = col_double(),
##   Reviews = col_double(),
##   `Last Visited` = col_datetime(format = "")
## )
AN <- read_csv(file.choose())
## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Product Name` = col_character(),
##   `Product ID` = col_character(),
##   `Listing Price` = col_double(),
##   `Sale Price` = col_double(),
##   Discount = col_double(),
##   Brand = col_character(),
##   Description = col_character(),
##   Rating = col_double(),
##   Reviews = col_double(),
##   `Last Visited` = col_datetime(format = "")
## )
head(AN)
## # A tibble: 6 x 10
##   `Product Name`      `Product ID` `Listing Price` `Sale Price` Discount Brand  
##   <chr>               <chr>                  <dbl>        <dbl>    <dbl> <chr>  
## 1 Women's adidas Ori~ AH2430                 14999         7499       50 Adidas~
## 2 Women's adidas Ori~ G27341                  7599         3799       50 Adidas~
## 3 Women's adidas Swi~ CM0081                   999          599       40 Adidas~
## 4 Women's adidas Spo~ B44832                  6999         3499       50 Adidas~
## 5 Women's adidas Ori~ D98205                  7999         3999       50 Adidas~
## 6 Women's adidas Spo~ B75586                  4799         1920       60 Adidas~
## # ... with 4 more variables: Description <chr>, Rating <dbl>, Reviews <dbl>,
## #   Last Visited <dttm>
names(AN)
##  [1] "Product Name"  "Product ID"    "Listing Price" "Sale Price"   
##  [5] "Discount"      "Brand"         "Description"   "Rating"       
##  [9] "Reviews"       "Last Visited"

Sale and discount

AN %>%
  group_by(Brand) %>%
  arrange(desc(Discount))
## # A tibble: 3,268 x 10
## # Groups:   Brand [5]
##    `Product Name`      `Product ID` `Listing Price` `Sale Price` Discount Brand 
##    <chr>               <chr>                  <dbl>        <dbl>    <dbl> <chr> 
##  1 Women's adidas Spo~ B75586                  4799         1920       60 Adida~
##  2 Men's adidas Runni~ CI9914                  4999         2000       60 Adida~
##  3 Women's adidas ORI~ S82260                 11999         4800       60 Adida~
##  4 Women's adidas ORI~ BB2344                  9999         4000       60 Adida~
##  5 WOMEN'S ADIDAS SPO~ B96563                  6599         2640       60 Adida~
##  6 WoMen's adidas TRA~ CP9514                  5999         2400       60 Adida~
##  7 Men's adidas RUNNI~ CI1741                  4999         2000       60 Adida~
##  8 MEN'S ADIDAS ORIGI~ G28940                 18999         7600       60 Adida~
##  9 Women's adidas TRA~ BB3293                  4799         1920       60 Adida~
## 10 Women's ADIDAS ORI~ BY2976                 10999         4400       60 Adida~
## # ... with 3,258 more rows, and 4 more variables: Description <chr>,
## #   Rating <dbl>, Reviews <dbl>, Last Visited <dttm>

Total discount

AN %>%
  group_by(Brand) %>%
  summarise(Total_discount = sum(Discount))
## # A tibble: 5 x 2
##   Brand                    Total_discount
##   <chr>                             <dbl>
## 1 Adidas Adidas ORIGINALS              50
## 2 Adidas CORE / NEO                 40330
## 3 Adidas ORIGINALS                  28220
## 4 Adidas SPORT PERFORMANCE          19230
## 5 Nike                                  0

Group & Count

AN %>%
  group_by(Brand) %>%
  count(Rating >= 4)
## # A tibble: 9 x 3
## # Groups:   Brand [5]
##   Brand                    `Rating >= 4`     n
##   <chr>                    <lgl>         <int>
## 1 Adidas Adidas ORIGINALS  TRUE              1
## 2 Adidas CORE / NEO        FALSE           690
## 3 Adidas CORE / NEO        TRUE            421
## 4 Adidas ORIGINALS         FALSE           591
## 5 Adidas ORIGINALS         TRUE            316
## 6 Adidas SPORT PERFORMANCE FALSE           395
## 7 Adidas SPORT PERFORMANCE TRUE            211
## 8 Nike                     FALSE           335
## 9 Nike                     TRUE            308

Pivot

AN %>%
  pivot_wider(
    names_from = Brand,
    values_from = Discount
  )
## # A tibble: 3,268 x 13
##    `Product Name`  `Product ID` `Listing Price` `Sale Price` Description  Rating
##    <chr>           <chr>                  <dbl>        <dbl> <chr>         <dbl>
##  1 Women's adidas~ AH2430                 14999         7499 Channeling ~    4.8
##  2 Women's adidas~ G27341                  7599         3799 A modern ta~    3.3
##  3 Women's adidas~ CM0081                   999          599 These adida~    2.6
##  4 Women's adidas~ B44832                  6999         3499 Inspired by~    4.1
##  5 Women's adidas~ D98205                  7999         3999 This design~    3.5
##  6 Women's adidas~ B75586                  4799         1920 Refine your~    1  
##  7 Women's adidas~ CG4051                  4799         2399 Refine your~    4.4
##  8 Women's adidas~ CM0080                   999          599 These adida~    2.8
##  9 WOMEN'S ADIDAS~ B75990                  5599         2799 These women~    4.5
## 10 Men's adidas O~ EE5761                  6599         3959 The Forest ~    4  
## # ... with 3,258 more rows, and 7 more variables: Reviews <dbl>,
## #   Last Visited <dttm>, Adidas Adidas ORIGINALS <dbl>, Adidas ORIGINALS <dbl>,
## #   Adidas CORE / NEO <dbl>, Adidas SPORT PERFORMANCE <dbl>, Nike <dbl>

This data frame has 6 rows: Product Name, Product ID, Listing Price, Sale Price, Discount, Brand, Description, Rating, reviews, and Last Visited. Total number of rating greater than 4 for Adidas is more than Nike based on the reviewers ratings.