MediaWiki talk:OCR.js

Latest comment: 9 years ago by Phe in topic Recent double toolbar problem

Could someone make these changes to make the code more compatible with the new version of MediaWiki? Besides changing addOnloadHook to $; and accessing wg* variables through mw.config, it also fixes some problems identified by JSHint, such as missing {...} around some if blocks. Helder 02:15, 14 July 2011 (UTC)

Enhanced toolbar edit

{{editprotected}} Could some admin fix this script to work with the enhanced toolbar? See for example the last part of this code. Helder 01:35, 14 July 2011 (UTC)

Phe says   Done by other means with the implementation of upgraded OCR tool. billinghurst sDrewth 15:04, 18 February 2012 (UTC)Reply

Tests into it.source edit

Phe: Test are going on into it.source to drag as deeply as we can into hOCR data (mapped text) coming from this tool. Work is going on using a local copy of OCR.js (it:MediaWiki:OCR.js). Thanks Phe. --Alex brollo (talk) 17:17, 7 November 2014 (UTC)Reply

Recent double toolbar problem edit

@Zyephyrus, Phe, Tpt: For recent double toolbar / buggy OCR button problem, try

/*jshint boss:true*/
/*global $, mw*/

/*
 * Query an ocr for a given Page:, first try to get the hocr text layer as it's available
 * for most book, fast and of a better quality. If it fails, try the older and slower
 * ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
 * visible on the Page:.
 */

var lang = mw.config.get( 'wgContentLanguage' );

function disable_input(set)
{
	if (set) {
		$(document).keyup(function(e) {
			if (e.which == 27) { disable_input(false); }
		});
	}

	set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
	set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);

	$('#wpTextbox1').prop('disabled', set);
}

function ocr_callback(data) {
	if (data.error) {
		alert(data.text);
	} else {
		// Checking if tb is disabled is required with chrome as ESC doesn't kill
		// the query.
		var tb = document.getElementById("wpTextbox1");
		if (tb.disabled)
			tb.value = data.text;
	}

	disable_input(false);
}

function hocr_callback(data) {
	if (data.error) {
		// Fallback to the slow way.
		disable_input(false);
		do_ocr();
		return;
	} else {
		// Checking if tb is disabled is required with chrome as ESC doesn't kill
		// the query.
		var tb = document.getElementById("wpTextbox1");
		if (tb.disabled) {
                        localStorage.ws_hOCR = data.text;

			var text = $(data.text).text();
			// Ugly as hell.
			text = text.replace(/[ ]*\n[ ]*/g, '\n')
				.replace(/\n\n\n\n/g, '@_@_@_@_@_@')
				.replace(/\n\n/g, '\n')
				.replace(/@_@_@_@_@_@/g, '\n\n')
				.replace(/\n\n\n/g, '\n\n');
			tb.value = $.trim(text);
		}
	}

	disable_input(false);
}

function do_hocr() {
	disable_input(true);

	var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
		+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');

	$.getJSON(request_url).done(hocr_callback);
}

function do_ocr() {
	if ($( '.prp-page-image img' ).length) {
		disable_input(true);

		// server side can't use protocol relative url, request it as https:
		var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');

		var request_url = "//tools.wmflabs.org/phetools/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');

		$.getJSON( request_url ).done( ocr_callback );
	}
}

function fraktur_ocr()
{
	lang = 'de-f';
	// For fraktur we need to use the slow way, all hocr for 'de'
	// are done with non-fraktur.
	do_ocr();
	lang = mw.config.get( 'wgContentLanguage' );
}

function addButtonToWikiEditorToolbar( b ){
	var tools = {};
	tools[ b.imageId ] = {
		label: b.speedTip,
//		filters: [ 'body.ns-104' ],
		type: 'button',
		icon: b.imageFile,
		action: {
			type: 'callback',
			execute: b.onClick
		}
	};
	$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
		section: 'main',
		group: 'insert',
		tools: tools
	} );
	$( '[rel="' + b.imageId + '"]' ).width( 42 );
}

function addButtonToClassicToolbar( b ){
	mw.toolbar.addButton( {
		imageFile: b.imageFile,
		speedTip: b.speedTip,
		imageId: b.imageId
	} );
	$( '#' + b.imageId ).off( 'click' ).click( function () {
		b.onClick();
		return false;
	} ).width( 46 );
}

function customizeToolbar()
{
	var modules, add, img;
	// This can be the string "0" if the user disabled the preference ([[bugzilla:52542#c3]])
	if( mw.user.options.get( 'usebetatoolbar' ) == 1 ){
		modules = ['ext.wikiEditor.toolbar', 'schema.Edit'];
		img = '//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png';
		add = addButtonToWikiEditorToolbar;
	} else if ( mw.user.options.get( 'showtoolbar' ) == 1 ){
		modules = ['mediawiki.toolbar'];
		img = '//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png';
		add = addButtonToClassicToolbar;
	} else {
		return;
	}
	$.when(
		mw.loader.using( modules ),
		$.ready
	).then( function(){
		if( mw.config.get( 'wgContentLanguage' ) === 'de' ){
			add( {
				imageFile: img,
				speedTip: 'Normale OCR',
				imageId: 'wsOcr1',
				onClick: do_hocr
			} );
			add( {
				imageFile: '//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png',
				speedTip: 'Fraktur OCR',
				imageId: 'wsOcr2',
				onClick: fraktur_ocr
			} );
		} else {
			add( {
				imageFile: img,
				speedTip: 'Get the text by OCR',
				imageId: 'wsOcr1',
				onClick: do_hocr
			} );
		}
	} );
}

if ( mw.config.get( 'wgPageContentModel' ) === 'proofread-page' &&
	$.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1 &&
	!self.proofreadpage_disable_ocr
) {
	mw.loader.using( 'ext.proofreadpage.page.edit' ).done( customizeToolbar );
}

Seems to have done the trick on en.WS though OCR is a gadget there. Module 'schema.Edit' appears to be the root of the new problem(s) regardless. -- George Orwell III (talk) 07:32, 20 March 2015 (UTC)Reply

Thank you @George Orwell III:, I can see you are one of these mysterious magicians! seems to be all right now; if a mul-wikisorcerer complains, I'll try this script... with your help :) --Zyephyrus (talk) 16:39, 21 March 2015 (UTC)Reply
I tried that on fr. but it didn't work for all people, anyway it look like the trouble was in mediawiki javascript and was fixed two days ago, I reverted all my change and things work fine now. — Phe 02:35, 22 March 2015 (UTC)Reply
Return to "OCR.js" page.