Monday, April 29, 2013

Rewriting GAMS code

A first attempt to write some code turned out to be correct, but very slow:
cube('PPX0a.zip','Producer Price',cgroup,cname,y,indicator,
    
'','','-','','','',''
,cty) =
     
sum
((j2c(j,c),cgmap(cgroup,c),cnamemap(c,cname)),
          PPX0(j,cty,y));
After rewriting this fragment to include a loop much better performance was achieved:
loop((cgmap(cgroup,c),cnamemap(c,cname)),
   cube(
'PPX0b.zip','Producer Price'
,cgroup,cname,y,indicator,
       
'','','-','','','',''
,cty) =
        
sum
(j2c(j,c),
             PPX0(j,cty,y));
);
The timings for these assignments are:
----    289 Assignment cube        344.247   344.247 SECS    100 Mb  634708
----    294 Loop                     2.590   346.837 SECS    107 Mb
The second fragment is more than 100x as fast as the original code (from 344 seconds to 2.6 seconds).
In a sense modeling languages are still woefully inadequate and much too primitive. From the first fragment a system like GAMS should be able to understand the meaning of what I want to do and should be able to re-arrange computations to achieve near-optimal performance.
This is a similar to RDBMS systems where query optimizers try to find the most efficient way to execute an SQL query. Although that is more mature technology (not in the least because of larger budgets for larger development teams), even here we see that an experienced database user is able to write much more efficient SQL queries than the average user. 

Thursday, April 18, 2013

More optimization in JavaScript

http://www.optimizationzen.com/2013/04/18/optimization-framework-js/

Not sure if this model is as readable as I would like it to be. Not so easy to recognize a transportation model in this.

require.config({
    paths: {
        "Optimization.Framework": "
http://optimizationzen.com/optimization.framework.min"
    }
});
var TransportModel = (function () {
    function TransportModel(origins, destinations) {
        if (typeof origins === "undefined") {
            origins = [];
        }
        if (typeof destinations === "undefined") {
            destinations = [];
        }
        this.origins = origins;
        this.destinations = destinations;
    }
    return TransportModel;
})();
var Origin = (function () {
    function Origin(id, name, supply, cost) {
        this.id = id;
        this.name = name;
        this.supply = supply;
        this.cost = cost;
    }
    return Origin;
})();
var Destination = (function () {
    function Destination(id, name, demand) {
        this.id = id;
        this.name = name;
        this.demand = demand;
    }
    return Destination;
})();
var transportModel = new TransportModel();
var fra = new Destination(0, "FRA", 900);
var det = new Destination(1, "DET", 1200);
var lan = new Destination(2, "LAN", 600);
var win = new Destination(3, "WIN", 400);
var stl = new Destination(4, "STL", 1700);
var fre = new Destination(5, "FRE", 1100);
var laf = new Destination(6, "LAF", 1000);
transportModel.destinations = [
fra,
det,
lan,
win,
stl,
fre,
laf];
var garycost = {};
garycost["fra"] = 39;
garycost["det"] = 14;
garycost["lan"] = 11;
garycost["win"] = 14;
garycost["stl"] = 16;
garycost["fre"] = 82;
garycost["laf"] = 8;
var gary = new Origin(0, "GARY", 1400, garycost);
var clevcost = {};
clevcost["fra"] = 27;
clevcost["det"] = 9;
clevcost["lan"] = 12;
clevcost["win"] = 9;
clevcost["stl"] = 26;
clevcost["fre"] = 95;
clevcost["laf"] = 17;
var clev = new Origin(1, "clev", 2600, clevcost);
var pittcost = {};
pittcost["fra"] = 24;
pittcost["det"] = 14;
pittcost["lan"] = 17;
pittcost["win"] = 13;
pittcost["stl"] = 28;
pittcost["fre"] = 99;
pittcost["laf"] = 20;
var pitt = new Origin(2, "pitt", 2900, pittcost);
transportModel.origins = [
gary,
clev,
pitt];

function run() {
    require([
        'Optimization.Framework'], function (of) {
        var glpk = new of.Optimization.GLPKSolver();
        var mathModel = new of.Optimization.MathModel();
        var Transport = new of.Optimization.VariableCollection("Transport_", 0, Math.min(), of.Optimization.VariableType.Integer, transportModel.origins, transportModel.destinations);
        Transport.IndexValidation = false;
        mathModel.setObjective(new of.Optimization.Expression(Enumerable.from(transportModel.origins).selectMany(function (orig) {
            return Enumerable.from(transportModel.destinations).select(function (dest) {
                return Transport.varFromIndex(orig, dest).multiplyBy(orig.cost[dest.Name]);
            });
        }).toArray()));
        transportModel.origins.forEach(function (orig) {
            mathModel.addConstraint(new of.Optimization.Expression(Enumerable.from(transportModel.destinations).select(function (dest) {
                return Transport.varFromIndex(orig, dest);
            }).toArray()).eq(orig.supply));
        });
        transportModel.destinations.forEach(function (dest) {
            mathModel.addConstraint(new of.Optimization.Expression(Enumerable.from(transportModel.origins).select(function (orig) {
                return Transport.varFromIndex(orig, dest);
            }).toArray()).eq(dest.demand));
        });
        var solution = glpk.solve(mathModel);
        var lp = mathModel.write();
        $('#model_as_lp_file').val(lp);
        $('#log').html("Solution: " + JSON.stringify(solution, null, 4));
    });

};

See also http://yetanothermathprogrammingconsultant.blogspot.com/2013/01/c-vs-javascript.html.

Wednesday, April 17, 2013

Excel for Modeling

Excel is great for data entry and reporting, but as a modeling tool it may not always be the best choice. Here is a great example:

It is noted that the Excel bug was just one of the issues found with the Reinhart and Rogoff paper. It is not often that rather  technical data problems reach newspaper headlines but in this case the underlying paper was very influential in guiding economic policy towards austerity.

I am fairly regularly involved in conversion of spreadsheet models to modeling language implementations. That often (almost always) reveals errors in the spreadsheet formulas. Here is an interesting paper on spreadsheet errors: http://panko.shidler.hawaii.edu/ssr/Mypapers/whatknow.htm.

ArcGIS adventures

This is not yet really big data, but the data was large enough to cause some headaches. Different database formats start to fail when exceeding 2 GB file size limits. Old fashioned CSV helped, but we still had to investigate this unexpected extra line we saw.  

Slide1

Slide2

Wednesday, April 3, 2013

Hamming distance and persistent solutions

When building scheduling models it is often important to make sure that a new schedule is not radically different from an old schedule unless there is a very good reason (e.g. we can make a lot of money by doing so). One way to handle this is to adds a cost-term to the objective that measures the difference between two schedules.

As we usually use binary variables to model a schedule, we basically need to find a way to write:

image

where we measure the difference between a new solution x and a previous solution x*. Here x is a decision variable and x* is a parameter.

Here are some (equivalent) ways to write this as a linear expression:

image

 

See also: