Gather.town
OSSFE in numbers
Sold out in 2 weeks!¶
On the day registration opened, 80 seats were reserved. Within about two weeks, all seats were fully booked!
Source
import pandas as pd
import plotly.express as px
from pypalettes import load_cmap
cmap = load_cmap("blaziken")
# read registrations.csv
df = pd.read_csv("registrations.csv")
# remove all line breaks
df = df.replace("\n", " ", regex=True)
# remove duplicate emails
df = df.drop_duplicates(subset=["Email Address"])
# based on the Timestamp column, plot a line chart of the number of registrations per day
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
df["Date"] = df["Timestamp"].dt.date
df = df.groupby("Date").size().reset_index(name="Count")
# now instead of plotting the number of registrations per day plot the total number of registrations
# up to that day
df["Cumulative"] = df["Count"].cumsum()
# add a couple of zeros to the beginning of the dataframe to make the plot look better
previous_days = pd.DataFrame(
{
"Date": pd.date_range(
start=df["Date"].min() - pd.Timedelta(days=2),
end=df["Date"].min(),
freq="D",
),
"Count": 0,
"Cumulative": 0,
}
)
df = pd.concat([previous_days, df])
df = df.reset_index(drop=True)
fig = px.area(
df,
x="Date",
# title="Registrations",
y="Cumulative",
)
# Update the area color
colours = [f"rgb{tuple(rgb)}" for rgb in cmap.rgb]
fig.update_traces(line_color=colours[0])
# ylim bottom at zero
fig.update_yaxes(range=[0, df["Cumulative"].max()])
fig.update_yaxes(title_text="")
fig.update_xaxes(title_text="")
# annotate Feb 6th "Registration opens"
fig.add_annotation(
x="2025-02-06",
y=0,
text="Registration opens",
showarrow=True,
arrowhead=1,
yshift=120,
)
fig.update_layout(font=dict(family="Coolvetica", color="black"))
fig
Loading...
Participants per institution¶
We are lucky to have participants from more than 100 different institutions worldwide!
Source
import pycountry_convert as pc
import pandas as pd
import plotly.express as px
import matplotlib.colors as mcolors
df = pd.read_csv("registrations.csv")
# remove trailing whitespaces from country names
df["Country"] = df["Country"].str.strip()
# replace double spaces by single space
df["Country"] = df["Country"].str.replace(" ", " ")
def lookup_country(name: str, *, allow_fuzzy: bool = False) -> str | None:
"""Lookup country name by country `name` using `pycountry`."""
import pycountry
# Handle special cases
if name == "UK":
name = "United Kingdom"
if name == "Cheshire":
name = "United Kingdom"
if name == "England":
name = "United Kingdom"
elif name == "Russia":
name = "Russian Federation"
elif name == "The Netherlands":
name = "Netherlands"
elif name == "USA and UK":
name = "United Kingdom"
elif name == "United States America":
name = "United States"
if country := pycountry.countries.get(name=name):
return country.name
try:
return pycountry.countries.lookup(name).name
except LookupError:
pass
try:
return (
pycountry.countries.search_fuzzy(query=name)[0].name
if allow_fuzzy
else None
)
except (LookupError, IndexError):
return None
# Apply the function to standardize country names
for country_name in df["Country"].unique():
assert (
lookup_country(country_name) is not None
), f"Country name {country_name} not found"
df["Country"] = df["Country"].apply(lookup_country)
# show where the country name is None
assert df["Country"].isnull().sum() == 0, "Some country names are None"
# Function to get continent name from country name
def get_continent(country_name):
try:
country_alpha2 = pc.country_name_to_country_alpha2(country_name)
continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
continent_name = pc.convert_continent_code_to_continent_name(continent_code)
return continent_name
except:
raise ValueError(f"Country name {country_name} not found")
# Add continent column
df["Continent"] = df["Country"].apply(get_continent)
institution_map = {
"UKAEA": "UKAEA",
"UK Atomic Energy Authority": "UKAEA",
"Imperial College London/UK Atomic Energy Authority": "Imperial College London",
"York Plasma Institute, University of York": "University of York",
"HI IBERIA (HIB) https://www.hi-iberia.es/artificial-intelligence": "HI IBERIA",
"HI-Iberia": "HI IBERIA",
"HI Iberia": "HI IBERIA",
"HI-Iberia, University Carlos II, Gregorio Millán Barbany Institute": "HI IBERIA",
"ATG Engineering S.L": "ATG Engineering S.L.",
"ATG Europe": "ATG Engineering S.L.",
"VTT Research Center of Finland": "VTT Technical Research Centre",
"VTT Technical Research Centre of Finland Ltd": "VTT Technical Research Centre",
"VTT Technical Research Centre of Finland Ltd.": "VTT Technical Research Centre",
"VTT": "VTT Technical Research Centre",
"CEA/IRFM": "CEA",
"CEA IRFM": "CEA",
"MIT": "Massachusetts Institute of Technology",
"General Fusion": "General Fusion Inc.",
"MIT PSFC": "Massachusetts Institute of Technology",
"ntTau Digital": "nTtau Digital LTD",
"nTtau Digital": "nTtau Digital LTD",
"nTtau Digital Ltd": "nTtau Digital LTD",
"Proxima Fusion GmbH": "Proxima Fusion",
"University of York Plasma Institute": "University of York",
"University of Rochester Laboratory for Laser Energetics": "University of Rochester",
"Politecnico of Turin": "Politecnico di Torino",
"Oak Ridge National Laboratory": "ORNL",
"Thales": "Gen-F",
"GenF": "Gen-F",
"ITER-FRANCE": "ITER Organization",
"Woodruff Scientific": "Woodruff Scientific Ltd",
"Lawrence Berkeley National Laboratory": "LBNL",
"IDOM UK": "IDOM UK Ltd",
"IDOM Nuclear Services": "IDOM",
"Flatiron institute": "Flatiron Institute",
"Commonwealth Fusion Systems": "CFS",
"Fusion for energy": "Fusion for Energy",
"Fusion For Energy": "Fusion for Energy",
"F4E": "Fusion for Energy",
"DTU - Technical University of Denmark": "DTU",
"Next Step Fusion s.a.r.l.": "Next Step Fusion",
}
# remove all trailing whitespaces from institutions
df["Institution"] = df["Institution"].str.strip()
def standardise_institutions(institution):
return institution_map.get(institution, institution)
df["Institution"] = df["Institution"].apply(standardise_institutions)
# # compute number of registrations per country
df = (
df.groupby(["Continent", "Country", "Institution"]).size().reset_index(name="count")
)
# Map colors to unique continents
unique_continents = (
df.groupby("Continent")["count"].sum().sort_values(ascending=False).index
)
color_map = {
continent: mcolors.to_hex(cmap(i / (len(unique_continents) - 1)))
for i, continent in enumerate(unique_continents)
}
df["color"] = df["Continent"].map(color_map)
# make a treemap grouped by continent
fig = px.treemap(
df,
path=["Continent", "Country", "Institution"], # Specify the hierarchy
values="count", # Specify the values
color="Continent", # Color by continent
color_discrete_map=color_map, # Set the color map
custom_data=df[["count"]], # Add custom data for the count
hover_data={"count": ":.0f"}, # Format the count
labels={"count": "Registrations"},
)
fig.update_traces(texttemplate="%{label} %{customdata[0]}") # Show label and count
fig.update_layout(font=dict(family="Coolvetica", color="black"))
# print number unique institutions
print(f"Number of unique institutions: {len(df['Institution'].unique())}")
fig
Loading...
Participants per country¶
The United States and the United Kingdom have the largest representations at OSSFE 2025, with 59 and 53 participants, respectively!
Source
# Create a bubble chart on a map
# count by country and not institution
# keep only Country and Count columns
df2 = df[["Country", "count"]]
# sum
df2 = df2.groupby("Country").sum().reset_index()
# df2["color"] = df2["Continent"].map(color_map)
fig = px.scatter_geo(
df2,
locations="Country",
locationmode="country names",
size="count",
hover_name="Country",
hover_data={"count": True, "Country": False},
projection="natural earth",
size_max=50,
)
# Update layout for better visualization
fig.update_layout(
# title="Registrations per Country",
geo=dict(showframe=False, showcoastlines=True),
font=dict(family="Coolvetica", color="black"),
)
fig
Loading...