Skip to content

Commit

Permalink
New translators and update by ibex. Thanks!
Browse files Browse the repository at this point in the history
  • Loading branch information
avram committed Sep 23, 2011
1 parent 19e0c17 commit 43c869c
Show file tree
Hide file tree
Showing 4 changed files with 583 additions and 35 deletions.
182 changes: 182 additions & 0 deletions Beobachter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
{
"translatorID":"a571680e-6338-46c2-a740-3cd9eb80fc7f",
"label":"Beobachter",
"creator":"ibex",
"target":"^http://((www\\.)?beobachter\\.ch/.)",
"minVersion":"2.1.9",
"maxVersion":"",
"priority":100,
"inRepository":"0",
"translatorType":4,
"lastUpdated":"2011-09-17 11:17:03"
}

/*
Beobachter Translator - Parses Beobachter articles and creates Zotero-based
metadata.
Copyright (C) 2011 ibex
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*
Reference URLs:
Article: http://www.beobachter.ch/natur/natuerlich-leben/wohnen-freizeit/artikel/beleuchtung_es-werde-led/
Topic list: http://www.beobachter.ch/natur/forschung-wissen/
*/

/* Zotero API */
function detectWeb(doc, url) {
// Z.debug("ibex detectWeb URL = " + url);
if (doc.location.href.match(/.*\/artikel\//i) && (ZU.xpath(doc, '/html/body[' + containingClass('articleSingle') + ']').length > 0)) {
return "magazineArticle";
// AJAX-ified results are currently not compatible with Zotero.
// The following condition is not useful:
// http://forums.zotero.org/discussion/18518/import-citation-from-an-ajaxbased-site/
// } else if (doc.location.href.match(/\/suche\//i) && (ZU.xpath(doc, '//div[@id = "multiSerachListContainer"]') + ']').length > 0)) {
} else if (ZU.xpath(doc, '//html/body[' + containingClass('article') + ']').length > 0) {
return "multiple";
}
}

/* Zotero API */
function doWeb(doc, url) {
// Z.debug("ibex doWeb URL = " + url);
var urls = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = ZU.getItemArray(doc, doc.getElementById("mainContent").getElementsByTagName('h3'), '.*');
if (!items || countObjectProperties(items) == 0) {
return true;
}
items = Z.selectItems(items);
if (!items) {
return true;
}

for (var i in items) {
urls.push(i);
}
} else {
urls.push(doc.location.href);
}
ZU.processDocuments(urls, scrape, function() { Z.done(); } );
Z.wait();
}

/* Zotero API */
function scrape(doc) {
// Z.debug("ibex scrape URL = " + doc.location.href);

// Fetch meta tags and fill meta tag array for associateMeta() function
var metaTags = fetchMeta(doc);

var newItem = new Z.Item('magazineArticle');
newItem.url = doc.location.href;
var shortTitle = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//h3');
if (shortTitle.length > 0) {
newItem.shortTitle = ZU.trimInternal(shortTitle[0].textContent);
}

associateMeta(newItem, metaTags, "DC.title", "title");
associateMeta(newItem, metaTags, "DC.date", "date");
associateMeta(newItem, metaTags, "publisher", "publicationTitle");
associateMeta(newItem, metaTags, "abstract", "abstractNote");
associateMeta(newItem, metaTags, "DC.Language", "language");
// Other potentially usful meta data: DC.keywords

newItem.ISSN = "1661-7444";

var authorline = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//dl/dt[. = "Text:"]');
if (authorline.length > 0) {
authorline = ZU.trimInternal(authorline[0].nextSibling.textContent);
// Assumption of authorline: "name1[, name2] [und Name3]"
var authors = authorline.split(/,|und/);
for (var i = 0; i < authors.length && authorline.length > 0; i++) {
newItem.creators.push(ZU.cleanAuthor(authors[i], "author"));
}
}

var issueDt = ZU.xpath(doc, '//div[' + containingClass('mediaarticleSingleView') + ']//dl/dt[. = "Ausgabe:"]');
if (issueDt.length > 0) {
issueArray = issueDt[0].nextSibling.textContent.split("/");
newItem.issue = ZU.trimInternal(issueArray[0]);
newItem.volume = ZU.trimInternal(issueArray[1]);
}

// A print dialog is shown to the user. The print page listens to the
// onload JavaScriptevent and executes window.print().
// I do not know how to disable this behaviour.
newItem.attachments.push({title: "Beobachter Article Snapshot", mimeType: "text/html", url: doc.location.href + "/print.html", snapshot: true});

newItem.complete();
}

/*
* There is no built-in function to count object properties which often are used as associative arrays.
*
* @param {Object} obj Associative array
* @return {int} Number of object properties = ength of associative array
*/
function countObjectProperties(obj) {
var size = 0;
for (var key in obj) {
if (obj.hasOwnProperty(key)) size++;
}
return size;
}

/**
* Fetch meta tags and fill meta tag array for associateMeta() function
*
* @param {element} doc Document DOM
* @return {Object} Associative array (Object) of meta tags, array[name] = value
*/
function fetchMeta(doc) {
var metaTagHTML = doc.getElementsByTagName("meta");
var metaTags = new Object();
for (var i = 0 ; i < metaTagHTML.length ; i++) {
metaTags[metaTagHTML[i].getAttribute("name")] = metaTagHTML[i].getAttribute("content");
}
return metaTags;
}

/**
* Adds an HTML meta tag to a Zotero item field.
* The meta tags array can be filled with fetchMeta() function.
*
* @param {Object} newItem The Zotero item
* @param {Object} metaTags Associative array (Object) of meta tags, array[name] = value
* @param {String} name The meta tag name
* @param {String} zoteroField The Zotero field name in the Zotero item.
* @return {null} Nothing is returned
*/
function associateMeta(newItem, metaTags, name, zoteroField) {
if (metaTags[name]) {
newItem[zoteroField] = ZU.trimInternal(metaTags[name]);
}
}

/**
* Generates a partial xpath expression that matches an element whose 'class' attribute
* contains the given CSS className. So to match &lt;div class='foo bar'&gt; you would
* say "//div[" + containingClass("foo") + "]".
*
* Reference: http://pivotallabs.com/users/alex/blog/articles/427-xpath-css-class-matching
*
* @param {String} className CSS class name
* @return {String} XPath fragment
*/
function containingClass(className) {
return "contains(concat(' ',normalize-space(@class),' '),' " + className + " ')";
}
172 changes: 172 additions & 0 deletions Handelszeitung.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
{
"translatorID":"cfbb3e2c-8292-43d0-86d5-e457399107de",
"label":"Handelszeitung, Bilanz, Stocks",
"creator":"ibex",
"target":"^http://((www\\.)?(handelszeitung|bilanz|stocks)\\.ch/.)",
"minVersion":"2.1.9",
"maxVersion":"",
"priority":100,
"inRepository":"0",
"translatorType":4,
"lastUpdated":"2011-09-18 11:17:03"
}

/*
Handelszeitung Translator - Parses Handelszeitung, Bilanz and Stocks articles
and creates Zotero-based metadata.
Copyright (C) 2011 ibex
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*
Reference URLs:
Article: http://www.handelszeitung.ch/unternehmen/google-kauft-daily-deal
Search: http://www.handelszeitung.ch/search/apachesolr_search/Google
Article: http://www.bilanz.ch/unternehmen/google-kauft-daily-deal
Search: http://www.bilanz.ch/search/apachesolr_search/Google
Article: http://www.stocks.ch/unternehmen/google-kauft-daily-deal
Search: http://www.stocks.ch/search/apachesolr_search/Google
*/

/* Zotero API */
function detectWeb(doc, url) {
//Z.debug("ibex detectWeb URL = " + url);
if (doc.location.href.match(/\/search\//) && (ZU.xpath(doc, '//div[' + containingClass('buildmode-3') + ']').length > 0)) {
return "multiple";
} else if (doc.location.href.match(/./) && (ZU.xpath(doc, '//div[' + containingClass('node-type-article') + ']').length > 0)) {
return "newspaperArticle";
}
}

/* Zotero API */
function doWeb(doc, url) {
//Z.debug("ibex doWeb URL = " + url);
var urls = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = ZU.getItemArray(doc, doc.getElementById('content-left').getElementsByClassName('buildmode-3'), 'http://.+/.+');
if (!items || countObjectProperties(items) == 0) {
return true;
}
items = Z.selectItems(items);
if (!items) {
return true;
}

for (var i in items) {
urls.push(i);
}
} else {
urls.push(doc.location.href);
}
ZU.processDocuments(urls, scrape, function() { Z.done(); } );
Z.wait();
}

/* Zotero API */
function scrape(doc) {
//Z.debug("ibex scrape URL = " + doc.location.href);

var newItem = new Z.Item('newspaperArticle');
newItem.url = doc.location.href;
var title = ZU.xpath(doc, '//div[' + containingClass('node-type-article') + ']//div[' + containingClass('field-title') + ']');
if (title.length > 0) {
newItem.title = ZU.trimInternal(title[0].textContent);
}
newItem.shortTitle = null;

var abstract = ZU.xpath(doc, '//div[' + containingClass('node-type-article') + ']//div[' + containingClass('field-article-lead') + ']');
if (abstract.length > 0) {
newItem.abstractNote = ZU.trimInternal(abstract[0].textContent);
}

var date = ZU.xpath(doc, '//div[' + containingClass('node-type-article') + ']//div[' + containingClass('field-publish-date') + ']');
if (date.length > 0) {
newItem.date = ZU.trimInternal(date[0].textContent.replace(/|.*$/, ''));
}

if (doc.location.href.match('handelszeitung.ch')) {
newItem.publicationTitle = 'Handelszeitung';
newItem.ISSN = "1422-8971";
} else if (doc.location.href.match('bilanz.ch')) {
newItem.publicationTitle = 'Bilanz';
newItem.ISSN = "1022-3487";
} else if (doc.location.href.match('stocks.ch')) {
newItem.publicationTitle = 'Stocks';
newItem.ISSN = "1424-7739";
}

newItem.language = "de";

var section = ZU.xpath(doc, '//div[' + containingClass('node-type-article') + ']//div[' + containingClass('channel') + ']');
if (section.length > 0) {
newItem.section = ZU.trimInternal(section[0].textContent);
}

// Use the CSS media print stylesheet for the snapshot.
switchDomMediaPrint(doc);
newItem.attachments.push({title: newItem.publicationTitle + " Article Snapshot", document: doc});

newItem.complete();
}

/*
* There is no built-in function to count object properties which often are used as associative arrays.
*
* @param {Object} obj Associative array
* @return {int} Number of object properties = ength of associative array
*/
function countObjectProperties(obj) {
var size = 0;
for (var key in obj) {
if (obj.hasOwnProperty(key)) size++;
}
return size;
}

/**
* Generates a partial xpath expression that matches an element whose 'class' attribute
* contains the given CSS className. So to match &lt;div class='foo bar'&gt; you would
* say "//div[" + containingClass("foo") + "]".
*
* Reference: http://pivotallabs.com/users/alex/blog/articles/427-xpath-css-class-matching
*
* @param {String} className CSS class name
* @return {String} XPath fragment
*/
function containingClass(className) {
return "contains(concat(' ',normalize-space(@class),' '),' " + className + " ')";
}

/**
* Manipulates the DOM document tree by switching CSS media from screen to print.
*
* @param {element} doc Document DOM tree (Remember: Javascript parameters are passed by reference)
* @return {element} document Document DOM tree
*/
function switchDomMediaPrint(doc) {
var nodes = doc.getElementsByTagName('link');
for (var i = 0; i < nodes.length; i++) {
//Z.debug("ibex media before = " + nodes[i].getAttribute('media'));
if (nodes[i].getAttribute('media') == 'print') {
nodes[i].setAttribute('media', 'all');
} else if (nodes[i].getAttribute('media') == 'screen') {
nodes[i].setAttribute('media', 'DISABLE');
}
//Z.debug("ibex media after = " + nodes[i].getAttribute('media'));
}
return doc;
}
Loading

0 comments on commit 43c869c

Please sign in to comment.