##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(DT)
library(knitr)
# With the recent API change that limits the overall result set size, we now look
# for databases (plural) where we used to use the singular database for this vignette.
# We also exclude patents without an assignee organization.
# We first need to write a query. Our query will look for "databases" in either
# the patent title or abstract...Note, this isn't a terribly good way to ID our
# patents, but it will work for the purpose of demonstration. Users who are
# interested in writing higher-quality queries could consult the large body of
# research that has been done in field of patent document retrieval.
query <- with_qfuns(
and(
neq("assignees_at_grant.organization" = ""),
or(
text_phrase(patent_abstract = "databases"),
contains(patent_title = "databases")
)
)
)
query
## {"_and":[{"_neq":{"assignees_at_grant.organization":""}},{"_or":[{"_text_phrase":{"patent_abstract":"databases"}},{"_contains":{"patent_title":"databases"}}]}]}
#> {"_and":[{"_neq":{"assignees_at_grant.organization":""}},{"_or":[{"_text_phrase":{"patent_abstract":"databases"}},{"_contains":{"patent_title":"databases"}}]}]}
# Create a list of the fields we'll need for the analysis
fields <- c(
"patent_number", "patent_date","patent_year",
"patent_num_us_patents_cited",
"assignees_at_grant.organization", # fulltext field now if used in q:
"assignees_at_grant.assignee_id" # the assignee fields come back in a nested object
)
# Send an HTTP request to the PatentsView API to get the data
pv_out <- search_pv(query, fields = fields, all_pages = TRUE, per_page = 1000)
knitr::opts_chunk$set(
delay = 30
)
# Unnest the data frames that are stored in the assignee list column
dl <- unnest_pv_data(pv_out$data, "patent_number")
dl
## List of 2
## $ assignees_at_grant:'data.frame': 7150 obs. of 3 variables:
## ..$ patent_number: chr [1:7150] "4751635" ...
## ..$ assignee : chr [1:7150] "https://search.patentsview.org/api/v1/assi"..
## ..$ organization : chr [1:7150] "Bell Communications Research, Inc." ...
## $ patents :'data.frame': 7598 obs. of 4 variables:
## ..$ patent_number : chr [1:7598] "4751635" ...
## ..$ patent_date : chr [1:7598] "1988-06-14" ...
## ..$ patent_year : int [1:7598] 1988 1988 ...
## ..$ patent_num_us_patents_cited: int [1:7598] 3 18 ...
#> List of 2
#> $ assignees_at_grant:'data.frame': 7150 obs. of 3 variables:
#> ..$ patent_number: chr [1:7150] "4751635" ...
#> ..$ assignee : chr [1:7150] "https://search.patentsview.org/api/v1/assi"..
#> ..$ organization : chr [1:7150] "Bell Communications Research, Inc." ...
#> $ patents :'data.frame': 7598 obs. of 4 variables:
#> ..$ patent_number : chr [1:7598] "4751635" ...
#> ..$ patent_date : chr [1:7598] "1988-06-14" ...
#> ..$ patent_year : int [1:7598] 1988 1988 ...
#> ..$ patent_num_us_patents_cited: int [1:7598] 3 18 ...
# We shouldn't need the filter (we queried for non empty string orginazations) but
# some are NA somehow
# We don't get the assignee_total_num_patents back from the patents endpoint any longer.
# We'll have to make a call to the assignee endpoint once we know who the top 75
# assignees are.
# Requested assignees_at_grant.assignee_id comes back in the assignees_at_grant object
# with key "assignee" and value like https://search.patentsview.org/api/v1/assignee/35/
# We want to parse out the assignee_ids (35 in the example value)
# Create a data frame with the top 75 assignees:
top_asgns <-
dl$assignees_at_grant %>%
filter(!is.na(organization)) %>% # some patents are assigned to an inventor (not an org)
mutate(assignee_id = gsub(".*/([0-9]+)/$", "\\1", assignee)) %>%
group_by(organization, assignee_id) %>%
summarise(db_pats = n()) %>%
ungroup() %>%
arrange(desc(db_pats)) %>%
slice(1:75)
## `summarise()` has grouped output by 'organization'. You can override using the
## `.groups` argument.
# Now that we have the assignee_id's we can call the assignee endpoint to get the
# total number of patents for each of our top_asgns
assignee_query = qry_funs$eq(assignee_id = as.integer(top_asgns$assignee_id))
assignee_fields <- c(
"assignee_id","organization", "num_patents"
)
# we'll post to the API, the query is a pretty large string (an "or" of 75 assignee_ids)
assignee_out <- search_pv(assignee_query , fields = assignee_fields, all_pages = TRUE,
per_page = 1000, endpoint = "assignees", method = "POST")
assignee_counts <- unnest_pv_data(assignee_out$data, "assignee_id")
assignee_counts
## List of 1
## $ assignees:'data.frame': 75 obs. of 3 variables:
## ..$ assignee_id : int [1:75] 3 6 ...
## ..$ organization: chr [1:75] "Xerox Corporation" ...
## ..$ num_patents : int [1:75] 23495 80931 ...
#> List of 1
#> $ assignees:'data.frame': 75 obs. of 3 variables:
#> ..$ assignee_id : int [1:75] 3 6 ...
#> ..$ organization: chr [1:75] "Xerox Corporation" ...
#> ..$ num_patents : int [1:75] 23495 80931 ...
# Here we redo top_asgns now that we have all the fields we need.
# We join in the total counts and mutate in the percentages
top_asgns <- dl$assignees_at_grant %>%
inner_join(assignee_counts$assignees) %>%
rename(ttl_pats = num_patents) %>%
group_by(organization, ttl_pats) %>%
summarise(db_pats = n()) %>%
mutate(frac_db_pats = round(db_pats / ttl_pats, 3)) %>%
select(c(1, 3, 2, 4)) %>%
arrange(desc(db_pats))
## Joining, by = "organization"
## `summarise()` has grouped output by 'organization'. You can override using the
## `.groups` argument.
# Create datatable
datatable(
data = top_asgns,
rownames = FALSE,
colnames = c(
"Assignee", "DB patents","Total patents", "DB patents / total patents"
),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left; font-style: italic;',
"Table 1: Top assignees in 'databases'"
),
options = list(pageLength = 10)
)