Hi, this is a follow up post for Is there a way either or show only the most left handed sources/models in the DAG?. I cannot reply to that post anymore as it is closed. However, thanks for the suggestion to use the manifest.json to find out the root node of a complex dbt dag. I went further and wrote the code for the same using python. Wanted to leave a note here so that if someone in need of it, can reference it.
import json
f = open('/Users/Humapathy/Desktop/manifest.json') # <location of the manifest.json file along with the path>
d=json.load(f)
node_to_analyze = 'model.openfit.dim_platform' # <The node in the dbt document dag for which the root notes (with no predecessors) to be found>
# Fetch all the immediate 1st order parents of node in question.
for i in d['nodes']:
if i==node_to_analyze:
j=d['nodes'][i]
pred_nodes = j['depends_on']['nodes']
print (pred_nodes)
# Initializing variables
pred_nodes1 = []
pred_nodes1_removed = []
root_parent_node =[]
x = True
#Loop through the 1st order parent nodes, if they in turn have first order parents, add them to the loop (keep looping), if they DO not have first order parents, then note them as root parents and remove them from loop.
while x:
for k in pred_nodes:
l = d['nodes'][k]
if (l['depends_on']['nodes'] ==[]):
pred_nodes1_removed.append(k)
if k in pred_nodes1:
pred_nodes1.remove(k)
root_parent_node.append(k)
else:
if pred_nodes1_removed:
for m in l['depends_on']['nodes']:
if m not in pred_nodes1_removed:
if m not in pred_nodes1:
pred_nodes1.append(m)
else:
for m in l['depends_on']['nodes']:
if m not in pred_nodes1:
pred_nodes1.append(m)
pred_nodes1_removed.append(k)
if k in pred_nodes1:
pred_nodes1.remove(k)
pred_nodes = pred_nodes1
if pred_nodes1 == []:
x = False
break
print('Root Parent Nodes For ', node_to_analyze)
print('\n'.join(str(el) for el in root_parent_node ))