Skip to content

Instantly share code, notes, and snippets.

@anapaulagomes
Created November 19, 2025 09:19
Show Gist options
  • Select an option

  • Save anapaulagomes/e131d38f327f501f220005269c912688 to your computer and use it in GitHub Desktop.

Select an option

Save anapaulagomes/e131d38f327f501f220005269c912688 to your computer and use it in GitHub Desktop.
Lê dados de internação SIHSUS
import marimo
__generated_with = "0.17.7"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import polars as pl
import plotly.express as px
from pathlib import Path
return Path, mo, pl, px
@app.cell
def _(mo):
mo.md(r"""
# Zika
Início: setembro 2015
https://pcdas.icict.fiocruz.br/conjunto-de-dados/sistema-de-informacoes-hospitalares-do-sus-sihsus/dicionario-de-variaveis/
* pico de microcefalia, doenças cerebrais
* desnutrição, vulnerabilidade alimentar
As colunas com nomes em MAIÚSCULO representam dados originais advindos do DATASUS e colunas com nomes iniciando em minúsculo representam dados resultantes de transformação ou enriquecimento.
""")
return
@app.cell
def _():
selected_columns = [
"DIAG_PRINC",
"DIAG_SECUN",
"ano_internacao",
"ano_saida",
"CID_MORTE",
"def_cobranca",
"def_cod_idade",
"def_complex",
"def_diag_princ_cap",
"def_diag_princ_cat",
"def_diag_princ_grupo",
"def_diag_princ_subcat",
"def_diag_secun_cap",
"def_diag_secun_cat",
"def_diag_secun_grupo",
"def_diag_secun_subcat",
"def_dias_perm",
"def_etnia",
"def_idade_18",
"def_idade_anos",
"def_idade_bas",
"def_idade_dias",
"def_idade_meses",
"def_idade_pub",
"def_leitos",
"def_morte",
"def_nacionalidade",
"def_procedimento_realizado",
"def_procedimento_solicitado",
"def_raca_cor",
"def_regime",
"def_sexo",
"def_tpdisec1",
"dt_inter",
"dt_saida",
"res_LATITUDE",
"res_LONGITUDE",
"res_MUNCOD",
"res_MUNNOME",
"res_MUNNOMEX",
"res_NOME_UF",
"res_SIGLA_UF"
]
return (selected_columns,)
@app.cell
def _(Path, pl, selected_columns):
_data_per_months = []
for _path in Path("data/ETLSIH").glob("ETLSIH.ST_PE_201[4-6]*.csv"):
_data_per_months.append(pl.read_csv(
_path,
infer_schema_length=0, # read the file without schema inference
columns=selected_columns
))
df = pl.concat(_data_per_months)
df = df.with_columns(
pl.col("dt_inter").str.strptime(pl.Date),
pl.col("dt_saida").str.strptime(pl.Date)
).filter(pl.col("dt_inter").is_between(pl.date(2014, 1, 1), pl.date(2016, 12, 31)))
df = df.with_columns(
week=pl.col("dt_inter").dt.week()
)
df.shape
return (df,)
@app.cell
def _(df, px):
_fig = px.line(
df.group_by(
["ano_internacao", "week"]
).len("count").sort(["week", "ano_internacao"]),
x="week",
y="count",
color="ano_internacao",
title="All cases by epidemiological week per year"
)
_fig.update_xaxes(dtick="M1", tickangle=45, showgrid=True)
_fig
return
@app.cell
def _(df):
diagnosis_cols = [
"DIAG_PRINC",
"DIAG_SECUN",
"def_diag_princ_cap",
"def_diag_princ_cat",
"def_diag_princ_grupo",
"def_diag_princ_subcat",
"def_diag_secun_cap",
"def_diag_secun_cat",
"def_diag_secun_grupo",
"def_diag_secun_subcat",
]
df[diagnosis_cols]
return
@app.cell
def _():
diagnosis_code = [
"U069",
"A928",
"A920",
"A925",
"P354",
]
return (diagnosis_code,)
@app.cell
def _():
# df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code)).with_columns(
# pl.col("dt_inter").dt.week().alias("week")
# ).group_by(
# ["ano_internacao", "week"]
# ).len("count").sort(["week", "ano_internacao"])
return
@app.cell
def _(df, diagnosis_code, pl, px):
_fig = px.line(
df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code)).group_by(["dt_inter", "DIAG_PRINC"]).len().sort("dt_inter"),
x="dt_inter",
y="len",
color="DIAG_PRINC",
title="Cases related to Zika"
)
_fig.update_xaxes(dtick="M1", tickangle=45, showgrid=True)
_fig
return
@app.cell
def _(df, diagnosis_code, pl):
df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code))
return
@app.cell
def _(diagnosis_code):
diagnosis_code[0]
return
if __name__ == "__main__":
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment