Programming Languages – Ted Laderas, PhD

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(skimr)
library(visdat)
languages <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-03-21/languages.csv')

Rows: 4303 Columns: 49
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (21): pldb_id, title, description, type, creators, website, domain_name,...
dbl (24): appeared, domain_name_registered, isbndb, book_count, semantic_sch...
lgl  (4): features_has_comments, features_has_semantic_indentation, features...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Skim the Dataset

skimr::skim(languages)

Data summary
Name	languages
Number of rows	4303
Number of columns	49
_______________________
Column type frequency:
character	21
logical	4
numeric	24
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
pldb_id	0	1.00	1	52	4303
title	0	1.00	1	56	4267
description	3480	0.19	4	2273	811
type	0	1.00	2	27	40
creators	3203	0.26	2	253	985
website	2928	0.32	13	131	1368
domain_name	3588	0.17	6	32	700
reference	2314	0.46	15	251	1955
github_repo	3402	0.21	25	73	897
github_repo_description	3438	0.20	4	419	853
github_language	3829	0.11	1	30	474
github_language_tm_scope	3837	0.11	4	34	361
github_language_type	3837	0.11	4	11	4
github_language_ace_mode	3838	0.11	1	16	96
github_language_file_extensions	3833	0.11	1	606	466
wikipedia	2731	0.37	32	104	1566
wikipedia_summary	2884	0.33	17	6741	1407
wikipedia_related	3145	0.27	1	1761	1059
line_comment_token	3831	0.11	1	7	23
origin_community	1190	0.72	3	305	2232
file_type	3213	0.25	2	6	4

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
features_has_comments	3683	0.14	1.00	TRU: 617, FAL: 3
features_has_semantic_indentation	3722	0.14	0.11	FAL: 516, TRU: 65
features_has_line_comments	3765	0.13	0.96	TRU: 517, FAL: 21
is_open_source	3792	0.12	0.89	TRU: 453, FAL: 58

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
appeared	0	1.00	1991.11	111.44	-2000	1984.00	1997.0	2012.00	2023	▁▁▁▁▇
domain_name_registered	3801	0.12	2011.33	7.02	1990	2007.00	2013.0	2017.00	2023	▁▃▃▇▆
isbndb	3217	0.25	7.71	33.16	0	0.00	0.0	2.00	400	▇▁▁▁▁
book_count	0	1.00	2.08	17.34	0	0.00	0.0	0.00	401	▇▁▁▁▁
semantic_scholar	3545	0.18	3.79	8.32	0	0.00	0.0	3.00	52	▇▁▁▁▁
language_rank	0	1.00	2151.00	1242.31	0	1075.50	2151.0	3226.50	4302	▇▇▇▇▇
github_repo_stars	3414	0.21	2127.40	7554.02	0	29.00	194.0	1071.00	88526	▇▁▁▁▁
github_repo_forks	3417	0.21	261.29	1203.00	0	2.25	16.0	91.50	23732	▇▁▁▁▁
github_repo_updated	3418	0.21	2021.39	1.76	2012	2022.00	2022.0	2022.00	2023	▁▁▁▁▇
github_repo_subscribers	3418	0.21	62.34	200.88	0	4.00	13.0	44.00	2910	▇▁▁▁▁
github_repo_created	3425	0.20	2015.84	3.48	2006	2013.00	2016.0	2019.00	2022	▁▅▇▇▇
github_repo_issues	3518	0.18	123.03	546.26	0	1.00	9.0	61.00	9522	▇▁▁▁▁
github_repo_first_commit	3567	0.17	2014.74	4.99	1987	2012.00	2015.0	2018.00	2022	▁▁▁▆▇
github_language_repos	3833	0.11	197134.67	1226900.57	0	91.25	725.5	7900.25	16046489	▇▁▁▁▁
wikipedia_daily_page_views	2837	0.34	227.13	783.55	-1	9.00	24.0	99.00	13394	▇▁▁▁▁
wikipedia_backlinks_count	2877	0.33	318.55	1635.29	1	13.00	39.0	126.00	34348	▇▁▁▁▁
wikipedia_page_id	2893	0.33	9167847.21	13506832.90	928	375153.75	2114700.5	12321223.00	63063548	▇▁▁▁▁
wikipedia_appeared	2958	0.31	1991.14	17.03	1830	1980.00	1994.0	2005.00	2019	▁▁▁▃▇
wikipedia_created	3040	0.29	2005.75	3.77	2001	2003.00	2005.0	2007.00	2020	▇▇▂▁▁
wikipedia_revision_count	3130	0.27	330.43	813.26	1	35.00	84.0	242.00	10104	▇▁▁▁▁
last_activity	0	1.00	2000.62	84.60	-900	1992.00	2006.0	2021.00	2023	▁▁▁▁▇
number_of_users	0	1.00	13771.26	227712.95	0	0.00	20.0	230.00	7179119	▇▁▁▁▁
number_of_jobs	0	1.00	422.18	12572.99	0	0.00	0.0	0.00	771996	▇▁▁▁▁
central_package_repository_count	1482	0.66	0.00	0.00	0	0.00	0.0	0.00	0	▁▁▇▁▁

Visdat

suppressWarnings(vis_dat(languages))

Looking at Data Science Languages

ds_langs <- c("R", "Julia", "Python", "SAS", "Excel")

languages |>
  select(-contains("wikipedia")) |>
  filter(title %in% ds_langs) |>
  knitr::kable()

Table 1: Data Science Languages

pldb_id	title	description	type	appeared	creators	website	domain_name	domain_name_registered	reference	isbndb	book_count	semantic_scholar	language_rank	github_repo	github_repo_stars	github_repo_forks	github_repo_updated	github_repo_subscribers	github_repo_created	github_repo_description	github_repo_issues	github_repo_first_commit	github_language	github_language_tm_scope	github_language_type	github_language_ace_mode	github_language_file_extensions	github_language_repos	features_has_comments	features_has_semantic_indentation	features_has_line_comments	line_comment_token	last_activity	number_of_users	number_of_jobs	origin_community	central_package_repository_count	file_type	is_open_source
python	Python	NA	pl	1991	Guido van Rossum	https://www.python.org/	python.org	1995	https://www.programiz.com/python-programming/keyword-list	339	342	52	3	NA	NA	NA	NA	NA	NA	NA	NA	NA	Python	source.python	programming	python	py cgi fcgi gyp gypi lmi py3 pyde pyi pyp pyt pyw rpy smk spec tac wsgi xpy	9300725	TRUE	TRUE	TRUE	#	2022	2818037	46976	Centrum Wiskunde & Informatica	NA	text	NA
r	R	NA	pl	1993	Ross Ihaka and Robert Gentleman	https://www.r-project.org	r-project.org	1999	NA	40	40	9	15	NA	NA	NA	NA	NA	NA	NA	NA	NA	R	source.r	programming	r	r rd rsx	689533	TRUE	FALSE	TRUE	#	2022	1075613	14173	University of Auckland	NA	text	TRUE
sas	SAS	NA	pl	1976	Anthony James Barr	https://www.sas.com	sas.com	1990	NA	94	96	10	35	NA	NA	NA	NA	NA	NA	NA	NA	NA	SAS	source.sas	programming	text	sas	8407	TRUE	FALSE	TRUE	*	2022	361103	4682	NA	0	text	NA
julia	Julia	NA	pl	2012	Jeff Bezanson and Alan Edelman and Stefan Karpinski and Viral B. Shah	http://julialang.org/	julialang.org	NA	NA	22	22	35	34	https://github.com/JuliaLang/julia	41515	5100	2023	952	2011	The Julia Programming Language	4420	NA	Julia	source.julia	programming	julia	jl	53507	TRUE	FALSE	TRUE	#	2023	81911	85	https://github.com/JuliaLang	NA	text	TRUE

Date Appeared vs. Number of Users

lang_plot <- languages |>
  filter(appeared > 1980) |>
  ggplot() +
  aes(title=title, x=appeared, y=number_of_users) +
  geom_point() +
  labs(title = "Languages: Date Appeared vs Number of Users after 1980")

plotly::ggplotly(lang_plot)

Language Rank versus Number of Users

lang_plot <- languages |>
  filter(appeared > 1980) |>
  filter(language_rank < 60) |>
  mutate(ds_language = if_else(title %in% c("R", "MATLAB", "SAS", "Julia", "Python"), "Y", "N")) |>
  ggplot() +
  aes(title=title, x=language_rank, y=number_of_users, color=ds_language) +
  geom_point() +
  labs(title = "Languages: Rank vs Number of Users after 1980") +
    scale_color_manual(values=c("Y"="blue", 
                             "N" = "grey"))

plotly::ggplotly(lang_plot)

Languages by Type

languages |>
  count(type) |>
  arrange(desc(n)) |>
  knitr::kable()

Table 2: Languages sorted by Type

type	n
pl	3368
textMarkup	97
queryLanguage	94
xmlFormat	69
dataNotation	68
grammarLanguage	67
esolang	66
template	55
textDataFormat	50
protocol	49
assembly	45
notation	36
database	35
ir	22
isa	22
idl	20
visual	18
jsonFormat	14
plzoo	12
schema	12
barCodeFormat	9
configFormat	8
contractLanguage	7
numeralSystem	7
wikiMarkup	7
bytecode	6
dataValidationLanguage	6
knowledgeBase	5
stylesheetLanguage	5
hardwareDescriptionLanguage	4
diffFormat	3
headerLang	3
musicalNotation	3
timeFormat	3
textEncodingFormat	2
unixApplication	2
distribution	1
font	1
optimizingCompiler	1
yamlFormat	1

Citation

BibTeX citation:

@online{laderas2023,
  author = {Laderas, Ted},
  title = {Programming {Languages}},
  date = {2023-03-21},
  url = {https://laderast.github.io/articles/2023-03-21-programming-languages/},
  langid = {en}
}

For attribution, please cite this work as:

Laderas, Ted. 2023. “Programming Languages.” March 21, 2023. https://laderast.github.io/articles/2023-03-21-programming-languages/.