FAQ: Cleaning up removed models from your production schema

I also adapted @elexisvenator 's code to work with Snowflake and dbt 1.1.0 - figured I would add my adaptation to the discussion for any future lost souls! Some notable changes I made to that source:

  • Re-set the schema variable to always be a list to prevent duplicate logic checking the schema type repeatedly
  • Always use uppercase schemas to match Snowflake consistently
  • Join the Snowflake information_schema data to values for both the current dbt node’s schema and the provided target schemas, to handle cases where a user is in a target environment whose schema differs from the desired schema
  • Insert an exception to exit the script if a BRONZE schema is about to be dropped, to prevent dropping raw source data in an automated CI/CD job (assumption being that dropping calculated silver/gold views and tables is less risky, since we can always recreate them from the raw bronze data instead)
{% macro delete_outdated_tables(schema) %} 
  {% if (schema is not string and schema is not iterable) or schema is mapping or schema|length <= 0 %}
    {% do exceptions.raise_compiler_error('"schema" must be a string or a list') %}
  {% endif %}
  {% if schema is string %}
    {% set schema = [schema] %}
  {% endif %}

  {% call statement('get_outdated_tables', fetch_result=True) %}
    select c.schema_name,
           c.ref_name,
           c.ref_type
    from (
        select table_schema as schema_name, 
           table_name  as ref_name, 
             'table'    as ref_type
      from information_schema.tables 
      where table_schema in (
        {%- for s in schema -%}
        UPPER('{{ s }}'){% if not loop.last %},{% endif %}
        {%- endfor -%}
      )
    union all
    select table_schema as schema_name, 
           table_name  as ref_name, 
             'view'     as ref_type
      from information_schema.views
        where table_schema in (
        {%- for s in schema -%}
        UPPER('{{ s }}'){% if not loop.last %},{% endif %}
        {%- endfor -%}
      )) as c
    left join (values
      {%- for node in graph['nodes'].values() | selectattr("resource_type", "equalto", "model") | list
                    + graph['nodes'].values() | selectattr("resource_type", "equalto", "seed")  | list %} 
        {% for s in schema %}
            (UPPER('{{ s }}'), UPPER('{{node.name}}')),
        {% endfor %}
        (UPPER('{{node.schema}}'), UPPER('{{node.name}}')){% if not loop.last %},{% endif %}
      {%- endfor %}
    ) as desired (schema_name, ref_name) on desired.schema_name = c.schema_name
                                        and desired.ref_name    = c.ref_name
    where desired.ref_name is null
  {% endcall %}

  {%- for to_delete in load_result('get_outdated_tables')['data'] %} 
    {% set fqn = target.database + '.' + to_delete[0] + '.' + to_delete[1] %}
    {% if 'BRONZE' in fqn %}
      {% do exceptions.raise_compiler_error('Was asked to drop a bronze table, will not proceed. Table: ' + fqn) %}
    {% endif %}
    {% call statement() -%}
      {% do log('dropping ' ~ to_delete[2] ~ ': ' ~ fqn, info=true) %}
      drop {{ to_delete[2] }} if exists {{ fqn }} cascade;
    {%- endcall %}
  {%- endfor %}

{% endmacro %}
2 Likes