diff --git a/import/source/tripadvisor.js b/import/source/tripadvisor.js new file mode 100644 index 0000000..b6999e1 --- /dev/null +++ b/import/source/tripadvisor.js @@ -0,0 +1,7 @@ +const file = require('../file') + +module.exports = { + ingress: file, + format: 'csv', + mapper: require('./tripadvisor/map/place') +} diff --git a/import/source/tripadvisor/config/placetypes.json b/import/source/tripadvisor/config/placetypes.json new file mode 100644 index 0000000..34dd832 --- /dev/null +++ b/import/source/tripadvisor/config/placetypes.json @@ -0,0 +1,378 @@ +{ + "9999": { + "name": "World", + "is_geographic": true, + "is_virtual": false, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10000": { + "name": "Continent", + "is_geographic": true, + "is_virtual": false, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10001": { + "name": "Country", + "is_geographic": true, + "is_virtual": false, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10002": { + "name": "Nation", + "is_geographic": true, + "is_virtual": false, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10003": { + "name": "State", + "is_geographic": true, + "is_virtual": false, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10004": { + "name": "City", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10005": { + "name": "County", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10006": { + "name": "Borough", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10007": { + "name": "Neighborhood", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10008": { + "name": "Metro", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10009": { + "name": "Region", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10010": { + "name": "Island", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10011": { + "name": "Island Group", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10012": { + "name": "Territory", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10013": { + "name": "District", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10014": { + "name": "Prefecture", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10015": { + "name": "Municipality", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10016": { + "name": "Republic", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10017": { + "name": "Community", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10018": { + "name": "Kibbutz", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10019": { + "name": "Department", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10020": { + "name": "Province", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10021": { + "name": "Attraction", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10022": { + "name": "Eatery", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10023": { + "name": "Accomodation", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10024": { + "name": "Tour", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10025": { + "name": "Parish", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10026": { + "name": "Canton", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10027": { + "name": "NationalPark", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10028": { + "name": "Geographic Location", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10029": { + "name": "Resource", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10030": { + "name": "Entertainment", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10031": { + "name": "Shopping", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10032": { + "name": "Narrow Virtual Region", + "is_geographic": true, + "is_virtual": true, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10033": { + "name": "Broad Virtual Region", + "is_geographic": true, + "is_virtual": true, + "is_broad": true, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10034": { + "name": "Narrow Virtual Region Leaf", + "is_geographic": true, + "is_virtual": true, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10036": { + "name": "Neighborhood Tier 1", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10037": { + "name": "Neighborhood Tier 2", + "is_geographic": true, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": true, + "could_have_address": false + }, + "10038": { + "name": "Airport", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10039": { + "name": "Area", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10040": { + "name": "Theme Park", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10041": { + "name": "Metro Station", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": true + }, + "10042": { + "name": "Airline", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10043": { + "name": "Activity", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10044": { + "name": "Ship", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10045": { + "name": "Car Rental Office", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + }, + "10046": { + "name": "Cruise Line", + "is_geographic": false, + "is_virtual": false, + "is_broad": false, + "is_autobroaden_type": false, + "could_have_address": false + } +} \ No newline at end of file diff --git a/import/source/tripadvisor/config/radius.js b/import/source/tripadvisor/config/radius.js new file mode 100644 index 0000000..c5d5e7a --- /dev/null +++ b/import/source/tripadvisor/config/radius.js @@ -0,0 +1,23 @@ +// function returns the preferred buffer radius for a placetype +const _ = require('lodash') + +function radius (place) { + const _class = _.get(place, 'ontology.class', '').trim().toUpperCase() + const _type = _.get(place, 'ontology.type', '').trim().toUpperCase() + + // non-geographic places + if (_class !== 'GEOGRAPHIC') { + return 0.0001 + } + + switch (_type) { + case 'COUNTRY': return 10 + case 'REGION': return 0.2 + case 'DISTRICT': return 0.1 + case 'CITY': return 0.05 + } + + return 0.01 +} + +module.exports = radius diff --git a/import/source/tripadvisor/map/geometries.js b/import/source/tripadvisor/map/geometries.js new file mode 100644 index 0000000..0abdf12 --- /dev/null +++ b/import/source/tripadvisor/map/geometries.js @@ -0,0 +1,37 @@ +const _ = require('lodash') +const wkx = require('wkx') +const format = require('../../../format') +const Geometry = require('../../../../model/Geometry') +const radius = require('../config/radius') + +const turf = { + point: require('turf-point'), + buffer: require('@turf/buffer') +} + +function mapper (place, doc) { + let lon = parseFloat(_.get(doc, 'longitude')) + let lat = parseFloat(_.get(doc, 'latitude')) + + if (_.isNumber(lat) && !isNaN(lat) && _.isNumber(lon) && !isNaN(lon)) { + // add a explicit centroid geometry so that one + // does not need to be calculated. + place.addGeometry(new Geometry( + wkx.Geometry.parse(`POINT(${lon} ${lat})`), + 'centroid' + )) + + // select a buffer radius based on ontology + const rad = radius(place) + + // buffer POINT to a create a POLYGON + var point = turf.point([lon, lat]) + var buffered = turf.buffer(point, rad, { units: 'degrees', steps: 8 }) + place.addGeometry(new Geometry( + format.from('geometry', 'geojson', buffered.geometry), + 'buffer' + )) + } +} + +module.exports = mapper diff --git a/import/source/tripadvisor/map/geometries.test.js b/import/source/tripadvisor/map/geometries.test.js new file mode 100644 index 0000000..63dfa5e --- /dev/null +++ b/import/source/tripadvisor/map/geometries.test.js @@ -0,0 +1,40 @@ +const Place = require('../../../../model/Place') +const Geometry = require('../../../../model/Geometry') +const map = require('./geometries') + +module.exports.tests = {} + +module.exports.tests.mapper = (test) => { + test('mapper: geometry empty', (t) => { + let p = new Place() + map(p, {}) + + t.equals(p.geometry.length, 0) + t.end() + }) + test('mapper: centroid', (t) => { + let p = new Place() + map(p, { + 'longitude': 1.1, + 'latitude': 2.2 + }) + t.equals(p.geometry.length, 2) + t.true(p.geometry[0] instanceof Geometry) + t.equal(p.geometry[0].geometry.constructor.name.toUpperCase(), 'POINT') + t.equal(p.geometry[0].role, 'centroid') + t.true(p.geometry[1] instanceof Geometry) + t.equal(p.geometry[1].geometry.constructor.name.toUpperCase(), 'POLYGON') + t.equal(p.geometry[1].role, 'buffer') + t.end() + }) +} + +module.exports.all = (tape) => { + function test (name, testFunction) { + return tape(`geometries: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test) + } +} diff --git a/import/source/tripadvisor/map/hierarchies.js b/import/source/tripadvisor/map/hierarchies.js new file mode 100644 index 0000000..d59c158 --- /dev/null +++ b/import/source/tripadvisor/map/hierarchies.js @@ -0,0 +1,29 @@ +// 1:9999:World|4:10000:Europe|187427:10001:Spain|187511:10009:La Rioja|187513:10004:Logrono +// 4:10000:Europe|187768:10001:Italy|187811:10009:Friuli Venezia Giulia|2339869:10020:Province of Pordenone|1400479:10015:Budoia + +const _ = require('lodash') +const Identity = require('../../../../model/Identity') +const Hierarchy = require('../../../../model/Hierarchy') + +function mapper (place, doc) { + const placeid = _.get(place, 'identity.id') + let parents = _.get(doc, 'navigationstring', '').split('|').map(p => p.split(':')) + if (!parents.length) { return } + + // remove invalid parents and self + parents = parents.filter(p => (p.length === 3) && p[0] !== placeid) + if (!parents.length) { return } + + place.addHierarchy( + new Hierarchy( + place.identity, + new Identity( + place.identity.source, + parents[parents.length - 1][0] // get last parent (most granular) + ), + `trip:nav` + ) + ) +} + +module.exports = mapper diff --git a/import/source/tripadvisor/map/hierarchies.test.js b/import/source/tripadvisor/map/hierarchies.test.js new file mode 100644 index 0000000..6064e3c --- /dev/null +++ b/import/source/tripadvisor/map/hierarchies.test.js @@ -0,0 +1,79 @@ +const Place = require('../../../../model/Place') +const Identity = require('../../../../model/Identity') +const Ontology = require('../../../../model/Ontology') +const map = require('./hierarchies') + +module.exports.tests = {} + +const fixture = { + municipality: { + identity: new Identity('trip', '1400479'), + ontology: new Ontology('geographic', 'municipality') + }, + province: { + identity: new Identity('trip', '2339869'), + ontology: new Ontology('geographic', 'province') + } +} + +module.exports.tests.mapper = (test) => { + test('mapper: properties empty', (t) => { + let p = new Place(fixture.municipality.identity, fixture.municipality.ontology) + map(p, {}) + + t.equals(p.hierarchy.length, 0) + t.end() + }) + test('mapper: navigationstring empty', (t) => { + let p = new Place(fixture.municipality.identity, fixture.municipality.ontology) + map(p, { 'navigationstring': '' }) + + t.equals(p.hierarchy.length, 0) + t.end() + }) + test('mapper: navigationstring contains self-reference', (t) => { + let p = new Place(fixture.municipality.identity, fixture.municipality.ontology) + map(p, { + 'navigationstring': '4:10000:Europe|187768:10001:Italy|187811:10009:Friuli Venezia Giulia|2339869:10020:Province of Pordenone|1400479:10015:Budoia' + }) + + t.equals(p.hierarchy.length, 1) + t.equals(p.hierarchy[0].child, fixture.municipality.identity) + t.equals(p.hierarchy[0].parent.source, fixture.province.identity.source) + t.equals(p.hierarchy[0].parent.id, '2339869') + t.equals(p.hierarchy[0].branch, 'trip:nav') + t.end() + }) + test('mapper: navigationstring no self-reference', (t) => { + let p = new Place(fixture.municipality.identity, fixture.municipality.ontology) + map(p, { + 'navigationstring': '4:10000:Europe|187768:10001:Italy|187811:10009:Friuli Venezia Giulia|2339869:10020:Province of Pordenone' + }) + + t.equals(p.hierarchy.length, 1) + t.equals(p.hierarchy[0].child, fixture.municipality.identity) + t.equals(p.hierarchy[0].parent.source, fixture.province.identity.source) + t.equals(p.hierarchy[0].parent.id, '2339869') + t.equals(p.hierarchy[0].branch, 'trip:nav') + t.end() + }) + test('mapper: navigationstring only self-reference', (t) => { + let p = new Place(fixture.municipality.identity, fixture.municipality.ontology) + map(p, { + 'navigationstring': '1400479:10015:Budoia' + }) + + t.equals(p.hierarchy.length, 0) + t.end() + }) +} + +module.exports.all = (tape) => { + function test (hierarchy, testFunction) { + return tape(`hierarchies: ${hierarchy}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test) + } +} diff --git a/import/source/tripadvisor/map/names.js b/import/source/tripadvisor/map/names.js new file mode 100644 index 0000000..92959f1 --- /dev/null +++ b/import/source/tripadvisor/map/names.js @@ -0,0 +1,9 @@ +const _ = require('lodash') +const Name = require('../../../../model/Name') + +function mapper (place, doc) { + // generic names + place.addName(new Name('und', 'default', false, _.get(doc, 'primaryname', '').trim())) +} + +module.exports = mapper diff --git a/import/source/tripadvisor/map/names.test.js b/import/source/tripadvisor/map/names.test.js new file mode 100644 index 0000000..5906289 --- /dev/null +++ b/import/source/tripadvisor/map/names.test.js @@ -0,0 +1,35 @@ +const Place = require('../../../../model/Place') +const map = require('./names') + +module.exports.tests = {} + +module.exports.tests.mapper = (test) => { + test('mapper: properties empty', (t) => { + let p = new Place() + map(p, {}) + + t.equals(p.name.length, 0) + t.end() + }) + test('mapper: primaryname', (t) => { + let p = new Place() + map(p, { 'primaryname': 'example1' }) + + t.equals(p.name.length, 1) + t.equals(p.name[0].lang, 'und') + t.equals(p.name[0].tag, 'default') + t.equals(p.name[0].abbr, false) + t.equals(p.name[0].name, 'example1') + t.end() + }) +} + +module.exports.all = (tape) => { + function test (name, testFunction) { + return tape(`names: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test) + } +} diff --git a/import/source/tripadvisor/map/place.js b/import/source/tripadvisor/map/place.js new file mode 100644 index 0000000..1f61440 --- /dev/null +++ b/import/source/tripadvisor/map/place.js @@ -0,0 +1,45 @@ +const _ = require('lodash') +const placetypes = require('../config/placetypes.json') +const Identity = require('../../../../model/Identity') +const Ontology = require('../../../../model/Ontology') +const Place = require('../../../../model/Place') + +const map = { + properties: require('./properties'), + names: require('./names'), + hierarchies: require('./hierarchies'), + geometries: require('./geometries') +} + +function mapper (doc) { + const placetypeid = _.get(doc, 'placetypeid') + const placetype = _.get(placetypes, placetypeid) + + let ontologyClass = 'venue' + if (_.get(placetype, 'is_geographic', false) === true) { + ontologyClass = 'geographic' + } else if (_.get(placetype, 'is_virtual', false) === true) { + ontologyClass = 'virtual' + } else if (_.get(placetype, 'is_broad', false) === true) { + ontologyClass = 'broad' + } + + // skip non-geographic records + // if (ontologyClass !== 'geographic') { return null } + + // instantiate a new place + const place = new Place( + new Identity('trip', _.get(doc, 'id')), + new Ontology(ontologyClass, _.get(placetype, 'name', 'unknown')) + ) + + // run mappers + map.properties(place, doc) + map.names(place, doc) + map.hierarchies(place, doc) + map.geometries(place, doc) + + return place +} + +module.exports = mapper diff --git a/import/source/tripadvisor/map/properties.js b/import/source/tripadvisor/map/properties.js new file mode 100644 index 0000000..e48ab86 --- /dev/null +++ b/import/source/tripadvisor/map/properties.js @@ -0,0 +1,21 @@ +const _ = require('lodash') +const Property = require('../../../../model/Property') + +function mapper (place, doc) { + // trip-specific properties + const picked = _.pickBy(doc, (val, key) => { + if (key === 'id') { return false } + if (key === 'primaryname') { return false } + if (key === 'navigationstring') { return false } + if (key === 'latitude') { return false } + if (key === 'longitude') { return false } + return true + }) + for (let key in picked) { + let val = picked[key] + if (typeof val.toString === 'function') { val = val.toString() } + place.addProperty(new Property(`trip:${key}`, val)) + } +} + +module.exports = mapper diff --git a/import/source/tripadvisor/map/properties.test.js b/import/source/tripadvisor/map/properties.test.js new file mode 100644 index 0000000..daf570f --- /dev/null +++ b/import/source/tripadvisor/map/properties.test.js @@ -0,0 +1,55 @@ +const Place = require('../../../../model/Place') +const map = require('./properties') + +module.exports.tests = {} + +module.exports.tests.mapper = (test) => { + test('mapper: properties empty', (t) => { + let p = new Place() + map(p, {}) + + t.equals(p.property.length, 0) + t.end() + }) + test('mapper: placetypeid', (t) => { + let p = new Place() + map(p, { 'placetypeid': '10015' }) + + t.equals(p.property.length, 1) + t.equals(p.property[0].key, 'trip:placetypeid', 'trip:placetypeid') + t.equals(p.property[0].value, '10015', 'trip:placetypeid') + t.end() + }) + test('mapper: excluded properties', (t) => { + let p = new Place() + map(p, { + 'id': '9556686', + 'primaryname': 'Hotel Ambica', + 'placetypeid': '10023', + 'navigationstring': '1:9999:World|2:10000:Asia|293860:10001:India|297619:10003:Jammu and Kashmir|1830829:10009:Jammu|12375003:10013:Jammu District|297620:10004:Jammu City', + 'street1': 'Below Gumet', + 'neighborhoodid': '0', + 'latitude': '32.706920', + 'longitude': '74.853830' + }) + + t.equals(p.property.length, 3) + t.equals(p.property[0].key, 'trip:placetypeid', 'trip:placetypeid') + t.equals(p.property[0].value, '10023', 'trip:placetypeid') + t.equals(p.property[1].key, 'trip:street1', 'trip:street1') + t.equals(p.property[1].value, 'Below Gumet', 'trip:street1') + t.equals(p.property[2].key, 'trip:neighborhoodid', 'trip:neighborhoodid') + t.equals(p.property[2].value, '0', 'trip:neighborhoodid') + t.end() + }) +} + +module.exports.all = (tape) => { + function test (name, testFunction) { + return tape(`properties: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test) + } +}