# Network Graph Problem

## Read Data and create similarites score dataframe

In [4]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import plotly.graph_objects as go
import os.path

current_dir = os.path.abspath('')
db_path = (current_dir + r"\data\food_data.db")

conn = sqlite3.connect(db_path)
query = "SELECT * FROM food_data LIMIT 100"
df = pd.read_sql_query(query, conn)

df= df.fillna(0)

nutrient_columns = [
    'energy-kcal_100g', 'fat_100g', 'saturated-fat_100g', 'unsaturated-fat_100g',
    'omega-3-fat_100g', 'omega-6-fat_100g', 'omega-9-fat_100g', 'trans-fat_100g',
    'cholesterol_100g', 'carbohydrates_100g', 'sugars_100g', 'sucrose_100g',
    'glucose_100g', 'fructose_100g', 'lactose_100g', 'maltose_100g', 'fiber_100g',
    'soluble-fiber_100g', 'insoluble-fiber_100g', 'proteins_100g', 'salt_100g',
    'added-salt_100g', 'sodium_100g', 'alcohol_100g', 'vitamin-a_100g',
    'beta-carotene_100g', 'vitamin-d_100g', 'vitamin-e_100g', 'vitamin-k_100g',
    'vitamin-c_100g', 'vitamin-b1_100g', 'vitamin-b2_100g', 'vitamin-pp_100g',
    'vitamin-b6_100g', 'vitamin-b9_100g', 'vitamin-b12_100g', 'bicarbonate_100g',
    'potassium_100g', 'chloride_100g', 'calcium_100g', 'phosphorus_100g', 'iron_100g',
    'magnesium_100g', 'zinc_100g', 'copper_100g', 'manganese_100g', 'fluoride_100g',
    'selenium_100g', 'chromium_100g', 'molybdenum_100g', 'iodine_100g',
    'caffeine_100g', 'cocoa_100g', 'carbon-footprint_100g'
]

conn.close()
chunk_size = 1000  

similarity_df = pd.DataFrame(index=df['product_name'], columns=df['product_name'])
for i in range(0, len(df), chunk_size):
    chunk = df.iloc[i:i+chunk_size]
    chunk_similarity_matrix = cosine_similarity(chunk[nutrient_columns])

    chunk_similarity_df = pd.DataFrame(chunk_similarity_matrix, index=chunk['product_name'], columns=chunk['product_name'])
    similarity_df.update(chunk_similarity_df)
similarity_df = similarity_df.fillna(1.0)

similarity_df

product_name,Nitroglycerin,Cheese twist,Pepperidge farm cookies,"Asian home gourmet, spice paste for indian butter chicken, mild",Crunchy Granola Bars,Nature Valley Crunchy Oats 'n Dark Chocolate Granola Bar,Iced Party Rings,Golden crunchy creams,Guacamole Dip,Coconut oil,...,คุกกี้สเปลท์เนยสดผสมข้าวกล้องงอก,กระเทียบปลอดเปลือง,มะนาว,มะเขือเทศราชินี,กรีนโอ๊ค,ฟิลเลย์ไอช์เบิร์ท,ส้มสายน้ำผึ้ง,lactasoy,ท๊อฟฟี่เค้ก,Bavarian bread
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Nitroglycerin,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
Cheese twist,0.0,1.000000,0.994018,0.995840,0.996531,0.959094,0.991335,0.996205,0.0,0.991655,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.997557,0.0
Pepperidge farm cookies,0.0,0.994018,1.000000,0.988383,0.999200,0.979432,0.999649,0.999609,0.0,0.977958,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.997638,0.0
"Asian home gourmet, spice paste for indian butter chicken, mild",0.0,0.995840,0.988383,1.000000,0.991282,0.944871,0.985161,0.991537,0.0,0.991367,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.995406,0.0
Crunchy Granola Bars,0.0,0.996531,0.999200,0.991282,1.000000,0.975556,0.998478,0.999325,0.0,0.980052,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.998650,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ฟิลเลย์ไอช์เบิร์ท,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
ส้มสายน้ำผึ้ง,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
lactasoy,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
ท๊อฟฟี่เค้ก,0.0,0.997557,0.997638,0.995406,0.998650,0.967932,0.996187,0.998944,0.0,0.986198,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000000,0.0


## Find similarities and put to weight of edges

In [5]:
# * Similarities calculate by cosine similarities

graph = nx.from_pandas_adjacency(similarity_df)
node_degrees = graph.degree()
sorted_nodes = sorted(node_degrees, key=lambda x: x[1], reverse=True)
top_nodes = [node[0] for node in sorted_nodes[:10]]
subgraph = graph.subgraph(top_nodes)
pos = nx.spring_layout(subgraph)
x, y = zip(*pos.values())
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in subgraph.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(
    x=x,
    y=y,
    mode='markers+text',
    hoverinfo='text',
    text=list(subgraph.nodes()),
    textposition='top center',
    marker=dict(
        showscale=False,
        color='rgb(150,150,150)',
        size=10,
        line=dict(width=2, color='rgb(255,255,255)')))

layout = go.Layout(
    showlegend=False,
    hovermode='closest',
    margin=dict(b=20, l=5, r=5, t=40),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))

fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
fig.show()

## Find Shortest path from one food to other food where similarities score is reverse (low similarities score = high similarity)


In [8]:
# ! Shortest Path

G = nx.Graph()
G.add_nodes_from(df['product_name'])

# ! Add edges to the graph with weight as the modified similarity score(Reverse similarities score)
for i in range(len(similarity_df)):
    for j in range(i+1, len(similarity_df)):
        if similarity_df.iloc[i,j] > 0:
            similarity_score = similarity_df.iloc[i,j]
            similarity_weight = 1 - similarity_score
            G.add_edge(similarity_df.index[i], similarity_df.columns[j], weight=similarity_weight)

# * Example
start_node = 'Cheese twist'
end_node = 'Pepperidge farm cookies'
shortest_path = nx.shortest_path(G, start_node, end_node, weight='weight')

print(shortest_path)

['Cheese twist', 'Scottish All Butter Shortbread Assortment', 'all butter Scottish shortbread biscuits with toffee pieces', 'Cookies stem ginger', 'Jam Sandwich Creams', 'Golden crunchy creams', 'Pepperidge farm cookies']
