Part 4 of n: Using Node.JS to provide a REST interface

From our last post we constructed geospatial queries. The data returned would be very crucial for showing visualizations for our web application. Instead of directly using MongoDB in our application, we will create a rest interface using Node.js which then can be consumed by multiple clients.

As the website states, Node.js is a platform built on Chrome’s JavaScript runtime for easily building fast, scalable network applications. Node.js uses an event-driven, non-blocking I/O model that makes it lightweight and efficient, perfect for data-intensive real-time applications that run across distributed devices.
Node.js is single-threaded which makes it avoid the costly context switching seen in multi-threaded environment.

The node.js application that I created is a simple one. It’s purpose is to provide a REST API for our web application. Down the road, if I decide to write a mobile application, I could use that API, hence it could serve multiple clients.

Let’s get started.
Firstly, please download Node.js and install it in your machine. After that, open the node.js command prompt and install nodemon. It is a great utility which observes the changes that you make in your node.js application and restarts it for you. It has a good documentation on how you can start your server.

I am using express.js to create the API. Also using cors to enable cors so that our web application can connect to the server. Lastly used mongoskin which acts as a wrapper to the native mongodb-nodejs driver. It helps ease development. All these come in the form of packages which are easy to install. The links that I have provided explains how to install. After installation, you will notice a folder named “node_modules” in the directory where your server code is. Inside it is where our packages reside.

Let’s have a look at the code:

Code: https://github.com/tarun11ks/NYCTaxi/blob/master/js/external/server.js

var express = require('express'),
	app = express(),
	cors = require('cors'),
	mongo = require('mongoskin'),
	url = 'mongodb://tarun:tarun@localhost:27017/NYCTaxiDB',
	db = mongo.db(url, {
		native_parser: true
	});

// Logs content with great readability
var inspect = require('eyes').inspector({
	maxLength: false
});

// Allows request from any origin 
app.use(cors());

// A middleware to hook db object to all the requests
app.use(function(req, res, next) {
	req.db = db;
	// Find the matching route
	next();
});

// Returns a random integer between min (included) and max (excluded)
function getRandomInt(min, max) {
	return Math.floor(Math.random() * (max - min)) + min;
};

// Returns a random taxi record
app.get('/randomtaxi', function(req, res) {
	var rnd = getRandomInt(1, 1000);
	var db = req.db;
	var coll = db.collection('NYCTaxis');
	coll.find().limit(1).skip(rnd).toArray(function(err, results) {
		if (err)
			throw err;
		res.send(results[0]);
	});
});

// Get top 2000 locations(pickup and drop) for Staten Islands
app.get('/staten', function(req, res) {
	var coordinates = [
		[-74.071116, 40.651530],
		[-74.071116, 40.624696],
		[-74.040989, 40.595049],
		[-74.244580, 40.484162],
		[-74.258999, 40.509486],
		[-74.236340, 40.557759],
		[-74.219174, 40.557498],
		[-74.200291, 40.598177],
		[-74.202008, 40.631275],
		[-74.185957, 40.645800],
		[-74.071116, 40.651530]
	];
	sendResult(req, res, coordinates, 2000);
});

// Get top 3000 locations(pickup and drop) for Queens
app.get('/queens', function(req, res) {
	var coordinates = [
		[-73.779696, 40.809849],
		[-73.702106, 40.752136],
		[-73.761844, 40.551572],
		[-73.952044, 40.528612],
		[-73.961658, 40.562006],
		[-73.833255, 40.607895],
		[-73.868960, 40.695414],
		[-73.897113, 40.684220],
		[-73.928870, 40.727814],
		[-73.961658, 40.740562],
		[-73.911618, 40.795101],
		[-73.779696, 40.809849]
	];
	sendResult(req, res, coordinates, 3000);
});

// Get top 2000 locations(pickup and drop) for Manhattan
app.get('/manhattan', function(req, res) {
	var coordinates = [
		[-74.034240, 40.686697],
		[-74.019992, 40.680709],
		[-73.995495, 40.704948],
		[-73.971463, 40.709893],
		[-73.961764, 40.743814],
		[-73.911724, 40.794679],
		[-73.927174, 40.802346],
		[-73.933354, 40.835214],
		[-73.907433, 40.873646],
		[-73.933699, 40.882083],
		[-74.013984, 40.756951],
		[-74.034240, 40.686697]
	];
	sendResult(req, res, coordinates, 2000);
});

// Get top 3000 locations(pickup and drop) for Brooklyn
app.get('/brooklyn', function(req, res) {
	var coordinates = [
		[-73.962421, 40.737982],
		[-73.929806, 40.727706],
		[-73.896675, 40.683071],
		[-73.869381, 40.694916],
		[-73.854360, 40.643420],
		[-73.881483, 40.574612],
		[-74.035635, 40.562876],
		[-74.055891, 40.652017],
		[-74.033231, 40.686130],
		[-74.020443, 40.680077],
		[-73.994865, 40.704546],
		[-73.972206, 40.708910],
		[-73.962421, 40.737982]
	];
	sendResult(req, res, coordinates, 3000);
});

// Get top 2000 locations(pickup and drop) from Bronx
app.get('/bronx', function(req, res) {
	var coordinates = [
		[-73.912002, 40.915643],
		[-73.748580, 40.871751],
		[-73.790809, 40.803112],
		[-73.861877, 40.799214],
		[-73.873550, 40.784821],
		[-73.932773, 40.807628],
		[-73.933803, 40.834681],
		[-73.908054, 40.873243],
		[-73.925906, 40.879538],
		[-73.912002, 40.915643]
	];
	sendResult(req, res, coordinates, 2000);
});

// Filters by location from the aggLocations collection
// and sends back result to the clients.
function sendResult(req, res, coordinates, limit) {
	var db = req.db;
	var coll = db.collection('aggLocations');
	coll.aggregate(
		[{
			$match: {
				"_id.lglt": {
					$geoWithin: {
						$polygon: coordinates
					}
				}
			}
		}, {
			$sort: {
				"value.cnt": -1
			}
		}, {
			$limit: limit
		}], {
			allowDiskUse: true
		},
		function(err, result) {
			if (err)
				throw err;

			//inspect(result);
			res.send(result);
		}
	);
}

// listen port 2387 on localhost. For ex: http://localhost:2387/manhattan
app.listen(2387);

– The require statements tells it to load the packages mentioned. Since there is no path given to the package, node.js will look for the “node-modules” folder which is were our packages are kept.
– I have also added the connection for our MongoDB database to connect to. 27017 is the default port, you will more information in the shell after you connect to it.
– Also added a middleware to attach our db object to each request that is made by our web application which will be then used to fetch records.
– Next is a series of routes, each requesting GET for each borough of NYC, each with it’s own geographical coordinates represented as a polygon.
– Then used a simple aggregation query to fetch records for the requested borough, sort in descending order of instances and finally limit the number of documents to fetch.
– Finally listen for the connections made on localhost for that particular port.

Start the server using nodemon, open your favorite browser and type http://localhost:2387/manhattan. You will the records being fetched. Please let me know if you are facing any issue.

Part 3 of n: Preparing Geo-spatial queries

From our last post we created an aggregated collection of coordinates with count of number of times frequented.

As stated, following is my goal: To show a heat map visualization of top pickup and dropoff locations in NYC. Currently I have divided the city into its five boroughs -> Manhattan, Brooklyn, Staten Island, Queens and Bronx. Each showing its top most frequented locations.

The technique that I applied is that:
View each borough’s geographical area as a polygon and use the geoWithin operator on those polygon coordinates to get the records for that borough.
We can create a rough diagram of each borough and set the coordinates at each point which makes a polygon. I used google maps for that.

Following are the polygons with coordinates that I created for each borough.

Manhattan:

Manhattan

Manhattan

Brooklyn

Brooklyn

Brooklyn

Staten Island

Staten Island

Staten Island

Queens

Queens

Queens

Bronx

Bronx

Bronx

Now that we got our coordinates, we can write a query to fetch all records within those coordinates.
Query for manhattan:

db.aggLocations.aggregate(
	[{
		$match: {
			"_id.lglt": {
				$geoWithin: {
					$polygon: [
						[-74.034240, 40.686697],
						[-74.019992, 40.680709],
						[-73.995495, 40.704948],
						[-73.971463, 40.709893],
						[-73.961764, 40.743814],
						[-73.911724, 40.794679],
						[-73.927174, 40.802346],
						[-73.933354, 40.835214],
						[-73.907433, 40.873646],
						[-73.933699, 40.882083],
						[-74.013984, 40.756951],
						[-74.034240, 40.686697]
					]
				}
			}
		}
	}, {
		$sort: {
			"value.cnt": -1
		}
	}, {
		$limit: 2000
	}], {
		allowDiskUse: true
	})

It took around 400ms to execute.

Similarly, you can create queries for other boroughs. I have prepared for the rest in the Node.js server. Check it out: https://github.com/tarun11ks/NYCTaxi/blob/master/js/external/server.js

Cool! Now we can head to our next post where we will setup our Node.js server.

Analyzing NYC 2013 taxi data

It all started after I saw this post on Hacker news : https://news.ycombinator.com/item?id=7910173
Thanks to Chris Wong for foiling the data.
Off-topic: Just checked his site and found that he has foiled another data. Awesome!

Back to the org topic, it’s a HUGE dataset. 173 million records!

Inspired from Chris’s work, I decided to give it a try and created a single page web application. This application will show a heat map visualization of top pickup and dropoff locations in NYC. Currently I have divided the city into its five boroughs -> Manhattan, Brooklyn, Staten Island, Queens and Bronx. Each showing its top most frequented locations.

I will share my work here by dividing it into different parts:
1) Preparing the dataset using MongoDB
2) Creating Map-Reduce
3) Preparing Geo-spatial queries
4) Using Node.JS to provide a REST interface
5) Finally Backbone.JS to create the single page application

Following is the GitHub page: https://github.com/tarun11ks/NYCTaxi
You can have a look at the Technology Stack here: http://stackshare.io/tarun11ks/nyctaxi

Note: The articles are not beginner articles. It expects some knowledge of Backbone.JS and MongoDB.