import pandas as pd # for reading and maniuplating dataframes
import numpy as np #  general math calulations
import matplotlib.pyplot as plt # for visualization
import seaborn as sns # visualizations, some analysis

# scipy.optimize.milp requires scipy >=1.9.0
from scipy import stats
from scipy.optimize import minimize
from scipy.optimize import NonlinearConstraint, LinearConstraint
from scipy.optimize import milp

# Load data and print sheetnames
supply_chain = pd.ExcelFile('data/Supply_chain_logistics_problem.xlsx')
supply_chain.sheet_names

['OrderList',
 'FreightRates',
 'WhCosts',
 'WhCapacities',
 'ProductsPerPlant',
 'VmiCustomers',
 'PlantPorts']

sheets = ['OrderList',
 'FreightRates',
 'WhCosts',
 'WhCapacities']

dfs = {}

# Load sheets into dictionary of dataframes
# view dataframe info
for sheet in sheets:
    dfs[sheet] = pd.read_excel(supply_chain, sheet_name=sheet)
    print(sheet)
    print(dfs[sheet].info())
    print('Duplicates: {}'.format(dfs[sheet].duplicated().sum()))
    print('\n\n')

OrderList
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9215 entries, 0 to 9214
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Order ID              9215 non-null   float64       
 1   Order Date            9215 non-null   datetime64[ns]
 2   Origin Port           9215 non-null   object        
 3   Carrier               9215 non-null   object        
 4   TPT                   9215 non-null   int64         
 5   Service Level         9215 non-null   object        
 6   Ship ahead day count  9215 non-null   int64         
 7   Ship Late Day count   9215 non-null   int64         
 8   Customer              9215 non-null   object        
 9   Product ID            9215 non-null   int64         
 10  Plant Code            9215 non-null   object        
 11  Destination Port      9215 non-null   object        
 12  Unit quantity         9215 non-null   int64         
 13  Weight                9215 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int64(5), object(6)
memory usage: 1008.0+ KB
None
Duplicates: 0


FreightRates
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1540 entries, 0 to 1539
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Carrier       1540 non-null   object 
 1   orig_port_cd  1540 non-null   object 
 2   dest_port_cd  1540 non-null   object 
 3   minm_wgh_qty  1540 non-null   float64
 4   max_wgh_qty   1540 non-null   float64
 5   svc_cd        1540 non-null   object 
 6   minimum cost  1540 non-null   float64
 7   rate          1540 non-null   float64
 8   mode_dsc      1540 non-null   object 
 9   tpt_day_cnt   1540 non-null   int64  
 10  Carrier type  1540 non-null   object 
dtypes: float64(4), int64(1), object(6)
memory usage: 132.5+ KB
None
Duplicates: 3


WhCosts
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   WH         19 non-null     object 
 1   Cost/unit  19 non-null     float64
dtypes: float64(1), object(1)
memory usage: 432.0+ bytes
None
Duplicates: 0


WhCapacities
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Plant ID         19 non-null     object
 1   Daily Capacity   19 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 432.0+ bytes
None
Duplicates: 0

dfs.keys()

dict_keys(['OrderList', 'FreightRates', 'WhCosts', 'WhCapacities'])

data = dfs['OrderList']

data = data.merge(dfs['WhCosts'], left_on='Plant Code', right_on = 'WH')
data = data.merge(dfs['WhCapacities'], left_on='WH', right_on = 'Plant ID')

# Use a left join here
data = data.merge(dfs['FreightRates'], how='left', left_on=['Carrier', 'Service Level', 'Origin Port', 'Destination Port'], right_on = ['Carrier', 'svc_cd', 'orig_port_cd','dest_port_cd'])
data.head()

num_nonCRF = data[data['Service Level']!='CRF']['Order ID'].nunique()
num_CRF = data[data['Service Level']=='CRF']['Order ID'].nunique()

print('Transportation needed for {} orders, {} CRF orders'.format(num_nonCRF, num_CRF))

Transportation needed for 8361 orders, 854 CRF orders

#For now remove CRF
CRF_df = data[data['Service Level']=='CRF']

wght_cond = (data['Weight']>=data['minm_wgh_qty']) & (data['Weight']<data['max_wgh_qty'])
data_check = data[wght_cond].reset_index(drop=True)

# Above we saw that FreightRates has some duplicates
# We will simply drop these
print('duplicates: ', dfs['FreightRates'].duplicated().sum())

duplicates:  3

dfs['FreightRates'] = dfs['FreightRates'].drop_duplicates()

''' Note aso that there are several Transit options that only differ by their minimum cost and rate
 This is a bit troublesome to deal with. Later on we will simply take the average values for these 
 Freight Rate entries. Below we show the number of duplciates when excluding only 'minimum cost', 'rate' '''

dfs['FreightRates'][['Carrier', 'orig_port_cd', 'dest_port_cd', 'minm_wgh_qty',
       'max_wgh_qty', 'svc_cd', 'mode_dsc',
       'tpt_day_cnt', 'Carrier type']].duplicated().sum()

104

wght_cond = (data['Weight']>=data['minm_wgh_qty']) & (data['Weight']<=data['max_wgh_qty'])
#service_cond = (data['Service Level'] == data['svc_cd'])
#port_cond = (data['Origin Port'] == data['orig_port_cd']) & (data['Destination Port'] == data['dest_port_cd'])

data_wght_constraint = data[ wght_cond]

all_orders, cleaned_orders = data['Order ID'].unique(), data_wght_constraint['Order ID'].unique()


missing = list(set(all_orders).difference(set(cleaned_orders)))
print('Total orders {} \nOrders with valid transport {} \n\
Orders without valid transport info {} (incl. {} CRF)'.format(len(all_orders), len(cleaned_orders), len(missing), num_CRF))

Total orders 9215 
Orders with valid transport 6991 
Orders without valid transport info 2224 (incl. 854 CRF)

cols = data.columns.to_list()

gp_cols = data.columns.to_list()
for x in ['tpt_day_cnt','minimum cost', 'rate']:
    gp_cols.remove(x)

grouped_df = data_wght_constraint[cols].groupby(gp_cols, as_index=False).mean()

data = pd.concat([grouped_df, CRF_df])
data.reset_index(inplace=True, drop=True)

def wh_cost(data):
    ''' Warehouse cost is the sum of the units quantity per order multipllied by 
        warehouse cost/unit
    '''
    cost  = (data['Unit quantity']*data['Cost/unit']).sum()
    return cost



# "The freight rate is determined based on total weight
#   on any given line pjcstm and the corresponding 
#    weight band in the freight rate table."
def tpt_cost(data):
    
    TCa = np.maximum(data['rate']*data['Weight'], data['minimum cost'])
    TCg = data['rate']*data['Weight'] 
    TC = np.where(data['mode_dsc']=='GROUND', TCg, TCa)
    TC_final = (np.where(data['Service Level']=='CRF', 0, TC) ).sum()
    
    return TC_final

print('Warehouse cost is ${:.1f} million'.format(wh_cost(data)/1e6))
print('Transportation cost is ${:.1f} million'.format(tpt_cost(data)/1e6))
print('Total cost is ${:.1f} million'.format((tpt_cost(data) + wh_cost(data))/1e6))

Warehouse cost is $14.9 million
Transportation cost is $0.1 million
Total cost is $15.0 million

# could merge instaed to unify WH info

dfs['WhCosts'] = dfs['WhCosts'].sort_values('WH').reset_index(drop=True)
wcosts = dfs['WhCosts']['Cost/unit']

dfs['WhCapacities'] = dfs['WhCapacities'].sort_values('Plant ID').reset_index(drop=True)
wcap = dfs['WhCapacities']['Daily Capacity ']

whs = dfs['WhCosts']['WH']

print('Max orders in a day {}'.format(wcap.sum()))

Max orders in a day 5791

# We choose some value of orders we want to consider
num_orders = 4000

new_order_cols = ['Origin Port','Service Level','Destination Port','Unit quantity','Weight']
new_orders = pd.DataFrame(np.repeat( dfs['OrderList'][new_order_cols].sample(1,random_state=1).values, num_orders, axis=0))
new_orders.columns = new_order_cols
#We wont really use this, but if lets generate unique Order IDs
new_orders['Order ID'] = np.random.choice(np.arange(num_orders), size=num_orders, replace=False)

new_orders.head()

# grab the unit quantity for each of our orders
uq = new_orders['Unit quantity'].mode().iloc[0]

def random_assign(total, constraints):
    ''' Randomly assign a number (total) elements
        to len(constraints) open slots, obeying max numebr of 
        objects per slot as defnined by constraints
    '''
    rng = np.random.default_rng()
    samples = rng.multinomial(total, constraints / constraints.sum(), size=1000)
    try:
        return next(val for val in samples if np.all(val < constraints))
    except:
        return constraints

# Define our objective funtion (i.e. the warehouse storage costs)
def obj(x):
    ''' Input array containing the number 
        of orders for each warehouse. 
        
        Cost is calculated as the
        (warehouse cost/unit) X (unit quantity) X (number of orders)
        
        Output is cost in $1k    
    '''
    return (wcosts*uq*x).sum()/1e3


bounds = [(0,np.inf)]*len(wcosts) # disallow negative values



b_l = np.zeros(len(wcosts))
b_u = np.full_like(b_l, np.inf)
A = np.ones((len(b_l), len(wcosts)))


cons1 = LinearConstraint(A, b_l, b_u)
cons2 = NonlinearConstraint( lambda x: x.sum()-num_orders, 0,0) # Must account for all orders
cons3 = NonlinearConstraint( lambda x: min(wcap - x ), 0, np.inf) # Do not exceed warehouse capacity

cons = []
cons += [cons1]
cons += [cons2]
cons += [cons3]

# get random assignment of warehouses
print('get')
ra = random_assign(num_orders, wcap)
print(ra)
res = minimize(obj,x0 = ra,  bounds=bounds, constraints = cons, method='SLSQP')

print('Initial cost ${:.0f}k'.format(obj(ra)))
print('Optimized cost ${:.0f}k'.format(obj(res.x)))
print('Cost reduction {:.1f}%'.format((1-obj(res.x)/obj(ra))*100))

get
[781  99 679 382 270  34 173  12   5  81 238 146 344 385   8 295   7  58
   3]
Initial cost $658k
Optimized cost $508k
Cost reduction 22.8%

plt.figure(figsize=(6,3), dpi=150)
xvals = np.arange(len(whs))
plt.bar(x = xvals,height=ra, alpha=0.5, label='Random')
plt.bar(x = xvals,height=res.x, alpha=0.5, label='Optimized')
plt.bar(x = xvals,height=wcap, alpha=1, edgecolor='k',facecolor='none', label='Capacity')

plt.xticks([x for x in range(19)])

plt.legend()
plt.xlabel('Plant ID')



for i in xvals:
    if i==0:
        costlabel = 'cost/unit\n{:.2f}'.format(wcosts[i])
    else:
        costlabel = '{:.2f}'.format(wcosts[i])
    plt.text(i, wcap[i]+20, costlabel, fontsize=5, ha='center')

plt.ylim(ymax=1200)

(0.0, 1200.0)

%%time

iters = 100
ra_costs = []
opt_costs = []
cost_reductions = []

errors = 0
unit_errors = []
relaxed_err= []


for i in range(iters):
    if (i+1)%10==0:
        print('{}/{}'.format(i+1, iters))

        
    # get random assignment of warehouses
    ra = random_assign(num_orders, wcap)


    res = minimize(obj, x0 = ra, bounds=bounds, constraints = cons, method='SLSQP', options={'maxiter':1000})
    result = np.round(res.x)
    if result.sum() != num_orders:
        unit_errors.append(result.sum()-num_orders)
        errors +=1
        
    relaxed_err.append(100*(1-obj(result)/obj(res.x)))
    cost_reductions.append((1-obj(result)/obj(ra))*100)

    ra_costs.append(obj(ra))
    opt_costs.append(obj(result))

10/100
20/100
30/100
40/100
50/100
60/100
70/100
80/100
90/100
100/100
CPU times: user 7min 36s, sys: 69.3 ms, total: 7min 36s
Wall time: 7min 40s

print('Warning: {} rounding errors totaling {} orders'.format(errors, np.sum(np.abs(unit_errors)) ))

# Does not account for capacity violations!
plt.figure(figsize=(4,3), dpi=150)
plt.hist(relaxed_err, alpha=0.5, edgecolor='grey')
plt.xlabel('% difference due to rounding')

Warning: 56 rounding errors totaling 69.0 orders

Text(0.5, 0, '% difference due to rounding')

#Make as subplots
fig, ax = plt.subplots(1,2,figsize=(8,3), dpi=150)
ax[0].hist(ra_costs, alpha=0.5, edgecolor='grey', label = 'Random')
ax[0].hist(opt_costs, alpha=0.5, edgecolor='grey' ,label = 'Optimized')
ax[0].set_xlabel('cost [$1k]')

ax[1].hist(cost_reductions, alpha=0.5, edgecolor='grey')
ax[1].set_xlabel('% cost reduction')
ax[1].set_xlim(0,100)

print('mean cost reduction = {:.1f}%'.format(np.mean(cost_reductions)))
print('deviation = {:.1f}%'.format(100*np.std(opt_costs)/np.mean(opt_costs)))

mean cost reduction = 25.4%
deviation = 0.6%

#!pip install pulp --progress-bar off

import pulp


wcost_list = wcosts.to_list()
wcap_list = wcap.to_list()

num_vars = len(whs)

#define problem  model
model = pulp.LpProblem('model',pulp.LpMinimize)  

# Define integer varaibles (i.e. number of orders per warehouse)
orders = [pulp.LpVariable("x"+str(i), lowBound = 0,  cat = 'Integer') for i in range(num_vars)]


# Define Objective
model += pulp.lpSum([wcost_list[i] * uq*orders[i]/1e3 for i in range(num_vars)]), "Warehouse Cost"


# Define constraints
model += pulp.lpSum([orders[i] for i in range(num_vars)]) == num_orders, "Total Orders"

for i in range(num_vars):
    model += orders[i] <= wcap_list[i], 'Plant {} Capacity'.format(i)

print(model)

model:
MINIMIZE
0.14231102241732*x0 + 0.11985338325073283*x1 + 0.13936706990325226*x10 + 0.19405608037378935*x11 + 0.11789647120195978*x12 + 0.15921676416387132*x13 + 0.3551809075486018*x14 + 0.4818716844709608*x15 + 0.10766559335939214*x16 + 0.51109969371064*x17 + 0.1605803924741909*x18 + 0.12989297479799092*x2 + 0.10755432708220254*x3 + 0.12252424051637308*x4 + 0.1390761125040729*x5 + 0.09322740829378695*x6 + 0.1312371820982516*x7 + 0.1167328454431167*x8 + 0.12388905651921926*x9 + 0.0
SUBJECT TO
Total_Orders: x0 + x1 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18
 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 = 4000

Plant_0_Capacity: x0 <= 1070

Plant_1_Capacity: x1 <= 138

Plant_2_Capacity: x2 <= 1013

Plant_3_Capacity: x3 <= 554

Plant_4_Capacity: x4 <= 385

Plant_5_Capacity: x5 <= 49

Plant_6_Capacity: x6 <= 265

Plant_7_Capacity: x7 <= 14

Plant_8_Capacity: x8 <= 11

Plant_9_Capacity: x9 <= 118

Plant_10_Capacity: x10 <= 332

Plant_11_Capacity: x11 <= 209

Plant_12_Capacity: x12 <= 490

Plant_13_Capacity: x13 <= 549

Plant_14_Capacity: x14 <= 11

Plant_15_Capacity: x15 <= 457

Plant_16_Capacity: x16 <= 8

Plant_17_Capacity: x17 <= 111

Plant_18_Capacity: x18 <= 7

VARIABLES
0 <= x0 Integer
0 <= x1 Integer
0 <= x10 Integer
0 <= x11 Integer
0 <= x12 Integer
0 <= x13 Integer
0 <= x14 Integer
0 <= x15 Integer
0 <= x16 Integer
0 <= x17 Integer
0 <= x18 Integer
0 <= x2 Integer
0 <= x3 Integer
0 <= x4 Integer
0 <= x5 Integer
0 <= x6 Integer
0 <= x7 Integer
0 <= x8 Integer
0 <= x9 Integer

# Run the solver
model.solve()

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/armcdan/.local/lib/python3.9/site-packages/pulp/solverdir/cbc/linux/64/cbc /local_scratch/pbs.1783878.pbs02/3c43a266460b498b8f5b26f55ba5cb04-pulp.mps timeMode elapsed branch printingOptions all solution /local_scratch/pbs.1783878.pbs02/3c43a266460b498b8f5b26f55ba5cb04-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 25 COLUMNS
At line 121 RHS
At line 142 BOUNDS
At line 162 ENDATA
Problem MODEL has 20 rows, 19 columns and 38 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 497.699 - 0.00 seconds
Cgl0004I processed model has 1 rows, 19 columns (19 integer (0 of which binary)) and 19 elements
Cbc0012I Integer solution of 497.69879 found by greedy equality after 0 iterations and 0 nodes (0.00 seconds)
Cbc0001I Search completed - best objective 497.6987932001232, took 0 iterations and 0 nodes (0.00 seconds)
Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost
Cuts at root node changed objective from 497.699 to 497.699
Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)

Result - Optimal solution found

Objective value:                497.69879320
Enumerated nodes:               0
Total iterations:               0
Time (CPU seconds):             0.00
Time (Wallclock seconds):       0.00

Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

1

res_dict = {}
for x in  model.variables():
    res_dict[x.name] = x.value()

    
pulp_res = []
for i in range(num_vars):
    pulp_res.append(res_dict['x'+str(i)])

plt.figure(figsize=(6,3), dpi=150)
xvals = np.arange(len(whs))
plt.bar(x=xvals, height=pulp_res,alpha=0.5,  label='PuLP')
#plt.bar(x = xvals,height=ra, alpha=0.5, label='Random')
plt.bar(x = xvals,height=res.x, alpha=0.5, label='Scipy')
plt.bar(x = xvals,height=wcap, alpha=1, edgecolor='k',facecolor='none', label='Capacity')

plt.legend()
plt.xlabel('Plant ID')
plt.xticks(xvals)


for i in xvals:
    if i==0:
        costlabel = 'cost/unit\n{:.2f}'.format(wcosts[i])
    else:
        costlabel = '{:.2f}'.format(wcosts[i])
    plt.text(i, wcap[i]+20, costlabel, fontsize=5, ha='center')

plt.ylim(ymax=1200)

(0.0, 1200.0)

scipy_cost = obj(np.round(res.x))
scipy_cost_rel = res.fun
pulp_cost = pulp.value(model.objective)

print('Cost (SciPy): ${:.1f}k (relax ${:.1f}k)'.format(scipy_cost, scipy_cost_rel))
print('Cost (PuLP): ${:.1f}k'.format(pulp_cost))
print('PuLP Cost reduction: {:.1f}% (relax {:.1f}%)'.format((1 - pulp_cost/scipy_cost)*100, (1 - pulp_cost/scipy_cost_rel)*100))

Cost (SciPy): $498.1k (relax $498.0k)
Cost (PuLP): $497.7k
PuLP Cost reduction: 0.1% (relax 0.1%)

	Order ID	Order Date	Origin Port	Carrier	TPT	Service Level	Ship ahead day count	Customer	Product ID	...	orig_port_cd	dest_port_cd	minm_wgh_qty	max_wgh_qty	svc_cd	minimum cost	rate	mode_dsc	tpt_day_cnt	Carrier type
0	1.447296e+09	2013-05-26	PORT09	V44_3	1	CRF	3	V55555_53	1700106	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	1.447158e+09	2013-05-26	PORT09	V44_3	1	CRF	3	V55555_53	1700106	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	1.447139e+09	2013-05-26	PORT09	V44_3	1	CRF	3	V55555_53	1700106	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	1.447364e+09	2013-05-26	PORT09	V44_3	1	CRF	3	V55555_53	1700106	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	1.447364e+09	2013-05-26	PORT09	V44_3	1	CRF	3	V55555_53	1700106	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Origin Port	Service Level	Destination Port	Unit quantity	Weight	Order ID
0	PORT04	CRF	PORT09	251	1.941615	2562
1	PORT04	CRF	PORT09	251	1.941615	344
2	PORT04	CRF	PORT09	251	1.941615	2911
3	PORT04	CRF	PORT09	251	1.941615	3752
4	PORT04	CRF	PORT09	251	1.941615	3828

Supply Chain Analysis and Optimization¶

Import packages¶

Load & View Data¶

Data info - Connections and nulls¶

Define warehouse and transport costs¶

Calulate Cost¶

The Optimization Problem¶

Plot warehouse assignments¶

Repeat process to get cost reduction distribution¶

Check differences due to rounding¶

Summary of Results¶

Supply Chain optimization with PuLP¶

Summary of Results with PuLP¶

Additional analysis¶