Created
November 19, 2025 09:19
-
-
Save anapaulagomes/e131d38f327f501f220005269c912688 to your computer and use it in GitHub Desktop.
Lê dados de internação SIHSUS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import marimo | |
| __generated_with = "0.17.7" | |
| app = marimo.App(width="medium") | |
| @app.cell | |
| def _(): | |
| import marimo as mo | |
| import polars as pl | |
| import plotly.express as px | |
| from pathlib import Path | |
| return Path, mo, pl, px | |
| @app.cell | |
| def _(mo): | |
| mo.md(r""" | |
| # Zika | |
| Início: setembro 2015 | |
| https://pcdas.icict.fiocruz.br/conjunto-de-dados/sistema-de-informacoes-hospitalares-do-sus-sihsus/dicionario-de-variaveis/ | |
| * pico de microcefalia, doenças cerebrais | |
| * desnutrição, vulnerabilidade alimentar | |
| As colunas com nomes em MAIÚSCULO representam dados originais advindos do DATASUS e colunas com nomes iniciando em minúsculo representam dados resultantes de transformação ou enriquecimento. | |
| """) | |
| return | |
| @app.cell | |
| def _(): | |
| selected_columns = [ | |
| "DIAG_PRINC", | |
| "DIAG_SECUN", | |
| "ano_internacao", | |
| "ano_saida", | |
| "CID_MORTE", | |
| "def_cobranca", | |
| "def_cod_idade", | |
| "def_complex", | |
| "def_diag_princ_cap", | |
| "def_diag_princ_cat", | |
| "def_diag_princ_grupo", | |
| "def_diag_princ_subcat", | |
| "def_diag_secun_cap", | |
| "def_diag_secun_cat", | |
| "def_diag_secun_grupo", | |
| "def_diag_secun_subcat", | |
| "def_dias_perm", | |
| "def_etnia", | |
| "def_idade_18", | |
| "def_idade_anos", | |
| "def_idade_bas", | |
| "def_idade_dias", | |
| "def_idade_meses", | |
| "def_idade_pub", | |
| "def_leitos", | |
| "def_morte", | |
| "def_nacionalidade", | |
| "def_procedimento_realizado", | |
| "def_procedimento_solicitado", | |
| "def_raca_cor", | |
| "def_regime", | |
| "def_sexo", | |
| "def_tpdisec1", | |
| "dt_inter", | |
| "dt_saida", | |
| "res_LATITUDE", | |
| "res_LONGITUDE", | |
| "res_MUNCOD", | |
| "res_MUNNOME", | |
| "res_MUNNOMEX", | |
| "res_NOME_UF", | |
| "res_SIGLA_UF" | |
| ] | |
| return (selected_columns,) | |
| @app.cell | |
| def _(Path, pl, selected_columns): | |
| _data_per_months = [] | |
| for _path in Path("data/ETLSIH").glob("ETLSIH.ST_PE_201[4-6]*.csv"): | |
| _data_per_months.append(pl.read_csv( | |
| _path, | |
| infer_schema_length=0, # read the file without schema inference | |
| columns=selected_columns | |
| )) | |
| df = pl.concat(_data_per_months) | |
| df = df.with_columns( | |
| pl.col("dt_inter").str.strptime(pl.Date), | |
| pl.col("dt_saida").str.strptime(pl.Date) | |
| ).filter(pl.col("dt_inter").is_between(pl.date(2014, 1, 1), pl.date(2016, 12, 31))) | |
| df = df.with_columns( | |
| week=pl.col("dt_inter").dt.week() | |
| ) | |
| df.shape | |
| return (df,) | |
| @app.cell | |
| def _(df, px): | |
| _fig = px.line( | |
| df.group_by( | |
| ["ano_internacao", "week"] | |
| ).len("count").sort(["week", "ano_internacao"]), | |
| x="week", | |
| y="count", | |
| color="ano_internacao", | |
| title="All cases by epidemiological week per year" | |
| ) | |
| _fig.update_xaxes(dtick="M1", tickangle=45, showgrid=True) | |
| _fig | |
| return | |
| @app.cell | |
| def _(df): | |
| diagnosis_cols = [ | |
| "DIAG_PRINC", | |
| "DIAG_SECUN", | |
| "def_diag_princ_cap", | |
| "def_diag_princ_cat", | |
| "def_diag_princ_grupo", | |
| "def_diag_princ_subcat", | |
| "def_diag_secun_cap", | |
| "def_diag_secun_cat", | |
| "def_diag_secun_grupo", | |
| "def_diag_secun_subcat", | |
| ] | |
| df[diagnosis_cols] | |
| return | |
| @app.cell | |
| def _(): | |
| diagnosis_code = [ | |
| "U069", | |
| "A928", | |
| "A920", | |
| "A925", | |
| "P354", | |
| ] | |
| return (diagnosis_code,) | |
| @app.cell | |
| def _(): | |
| # df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code)).with_columns( | |
| # pl.col("dt_inter").dt.week().alias("week") | |
| # ).group_by( | |
| # ["ano_internacao", "week"] | |
| # ).len("count").sort(["week", "ano_internacao"]) | |
| return | |
| @app.cell | |
| def _(df, diagnosis_code, pl, px): | |
| _fig = px.line( | |
| df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code)).group_by(["dt_inter", "DIAG_PRINC"]).len().sort("dt_inter"), | |
| x="dt_inter", | |
| y="len", | |
| color="DIAG_PRINC", | |
| title="Cases related to Zika" | |
| ) | |
| _fig.update_xaxes(dtick="M1", tickangle=45, showgrid=True) | |
| _fig | |
| return | |
| @app.cell | |
| def _(df, diagnosis_code, pl): | |
| df.filter(pl.col("DIAG_PRINC").is_in(diagnosis_code)) | |
| return | |
| @app.cell | |
| def _(diagnosis_code): | |
| diagnosis_code[0] | |
| return | |
| if __name__ == "__main__": | |
| app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment