import pandas as pd
The Portal Project data
= pd.read_csv('data/portal_project.tsv', delimiter='\t') d
Create separate tables
= d.copy()[['species_id', 'species', 'genus', 'taxa']]
species = species.drop_duplicates()
species
= d.copy()[['species_id', 'species', 'genus', 'taxa']].drop_duplicates()
species1
= d.copy().drop(columns=['species', 'genus', 'taxa']) observations
Average hindfoot length per species
= observations.hindfoot_length.mean(skipna=True) # skipna=True is also the default average_length
average_length
29.287931802277498
= observations.groupby('species_id', as_index=False).\
average_length_ps = ('hindfoot_length', 'mean')).\
agg(mean_hf_length ='inner')
merge(species, how average_length_ps.head()
species_id | mean_hf_length | species | genus | taxa | |
---|---|---|---|---|---|
0 | AB | NaN | bilineata | Amphispiza | Bird |
1 | AH | 33.0 | harrisi | Ammospermophilus | Rodent |
2 | AS | NaN | savannarum | Ammodramus | Bird |
3 | BA | 13.0 | taylori | Baiomys | Rodent |
4 | CB | NaN | brunneicapillus | Campylorhynchus | Bird |
Ratio M/F per species per year
= observations.\
ratios 'species_id', 'year', 'sex'], as_index=False).\
groupby([= ('record_id', 'count')).\
agg(nr = ['species_id', 'year'], columns = 'sex', values = 'nr')
pivot(index 'mfratio'] = ratios.M / ratios.F
ratios[= ratios.reset_index() ratios
ratios.head()
sex | species_id | year | F | M | mfratio |
---|---|---|---|---|---|
0 | AH | 1995 | NaN | 1.0 | NaN |
1 | BA | 1989 | NaN | 3.0 | NaN |
2 | BA | 1990 | 8.0 | 3.0 | 0.375000 |
3 | BA | 1991 | 19.0 | 6.0 | 0.315789 |
4 | BA | 1992 | 4.0 | 2.0 | 0.500000 |
Number of observations per species and year
= observations.\
obspspy 'species_id', 'year'], as_index=False).\
groupby([= ('record_id', 'count'))
agg(obs
'totobs'] = obspspy.groupby('species_id').obs.transform('sum')
obspspy[
= obspspy[obspspy.totobs > 300].\
obspspy ='inner')
merge(species, how
'speciesname'] = obspspy.genus + ' ' + obspspy.species obspspy[
Make a plot
import plotnine as p9
(=p9.aes(x='year', y='obs', color='speciesname'))
p9.ggplot(obspspy, mapping+ p9.geom_line()
)
(=p9.aes(x='year', y='obs'))
p9.ggplot(obspspy, mapping+ p9.geom_line()
+ p9.facet_wrap('speciesname')
+ p9.scale_y_log10()
)