Idi na sadržaj

Modul:Citation/CS1/COinS

S Wikipedije, slobodne enciklopedije

Dokumentaciju za ovaj modul možete napraviti na straniciModul:Citation/CS1/COinS/dok

--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]

localis_set,in_array,remove_wiki_link,strip_apostrophe_markup;-- functions in Module:Citation/CS1/Utilities

localcfg;-- table of configuration tables that are defined in Module:Citation/CS1/Configuration


--[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------

Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)

Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
of %27%27...

]]

localfunctionmake_coins_title(title,script)
ifis_set(title)then
title=strip_apostrophe_markup(title);-- strip any apostrophe markup
else
title='';-- if not set, make sure title is an empty string
end
ifis_set(script)then
script=script:gsub('^%l%l%s*:%s*','');-- remove language prefix if present (script value may now be empty string)
script=strip_apostrophe_markup(script);-- strip any apostrophe markup
else
script='';-- if not set, make sure script is an empty string
end
ifis_set(title)andis_set(script)then
script=' '..script;-- add a space before we concatenate
end
returntitle..script;-- return the concatenation
end


--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------

Returns a string where all of Lua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]

localfunctionescape_lua_magic_chars(argument)
argument=argument:gsub("%%","%%%%");-- replace % with %%
argument=argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])","%%%1");-- replace all other Lua magic pattern characters
returnargument;
end


--[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------

Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.

]]

localfunctionget_coins_pages(pages)
localpattern;
ifnotis_set(pages)thenreturnpages;end-- if no page numbers then we're done

whiletruedo
pattern=pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]");-- pattern is the opening bracket, the URL and following space(s): "[url"
ifnil==patternthenbreak;end-- no more URLs
pattern=escape_lua_magic_chars(pattern);-- pattern is not a literal string; escape Lua's magic pattern characters
pages=pages:gsub(pattern,"");-- remove as many instances of pattern as possible
end
pages=pages:gsub("[%[%]]","");-- remove the brackets
pages=pages:gsub("–","-");-- replace endashes with hyphens
pages=pages:gsub("&%w+;","-");-- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
returnpages;
end


--[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------

There are three options for math markup rendering that depend on the editor's math preference settings. These
settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
PNG images
TeX source
MathML with SVG or PNG fallback

All three are heavy with HTML and CSS which doesn't belong in the metadata.

Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
of the last editor to save the page.

This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.

When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
value. To replace multipe equations it is necessary to call this function from within a loop.

]=]

localfunctioncoins_replace_math_stripmarker(value)
localstripmarker=cfg.stripmarkers['math'];
localrendering=value:match(stripmarker);-- is there a math stripmarker

ifnotrenderingthen-- when value doesn't have a math stripmarker, abandon this test
returnfalse,value;
end

rendering=mw.text.unstripNoWiki(rendering);-- convert stripmarker into rendered value (or nil? ''? when math render error)

ifrendering:match('alt= "[^" ]+ "')then-- if PNG math option
rendering=rendering:match('alt= "([^" ]+) "');-- extract just the math text
elseifrendering:match('$%s+.+%s+%$')then-- if TeX math option; $ is legit character that is escapes as \$
rendering=rendering:match('$%s+(.+)%s+%$')-- extract just the math text
elseifrendering:match('<annotation[^>]+>.+</annotation>')then-- if MathML math option
rendering=rendering:match('<annotation[^>]+>(.+)</annotation>')-- extract just the math text
else
returnfalse,value;-- had math stripmarker but not one of the three defined forms
end

returntrue,value:gsub(stripmarker,rendering,1);
end


--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.

2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29

TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
characters table?

]]

localfunctioncoins_cleanup(value)
localreplaced=true;-- default state to get the do loop running

whilereplaceddo-- loop until all math stripmarkers replaced
replaced,value=coins_replace_math_stripmarker(value);-- replace math stripmarker with text representation of the equation
end

value=value:gsub(cfg.stripmarkers['math'],"MATH RENDER ERROR");-- one or more couldn't be replaced; insert vague error message

value=mw.text.unstripNoWiki(value);-- replace nowiki stripmarkers with their content
value=value:gsub('<span class= "nowrap" style= "padding%-left:0%.1em;" >&#39;(s?)</span>',"'%1");-- replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s
value=value:gsub('&nbsp;',' ');-- replace &nbsp; entity with plain space
value=value:gsub('\226\128\138',' ');-- replace hair space with plain space
ifnotmw.ustring.find(value,cfg.indic_script)then-- don't remove zero-width joiner characters from indic script
value=value:gsub('&zwj;','');-- remove &zwj; entities
value=mw.ustring.gsub(value,'[\226\128\141\226\128\139\194\173]','');-- remove zero-width joiner, zero-width space, soft hyphen
end
value=value:gsub('[\009\010\013]+',' ');-- replace horizontal tab, line feed, carriage return with plain space
returnvalue;
end


--[[--------------------------< C O I N S >--------------------------------------------------------------------

COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.

]]

localfunctionCOinS(data,class)
if'table'~=type(data)ornil==next(data)then
return'';
end

fork,vinpairs(data)do-- spin through all of the metadata parameter values
if'ID_list'~=kand'Authors'~=kthen-- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
data[k]=coins_cleanup(v);
end
end

localctx_ver="Z39.88-2004";

-- treat table strictly as an array with only set values.
localOCinSoutput=setmetatable({},{
__newindex=function(self,key,value)
ifis_set(value)then
rawset(self,#self+1,table.concat{key,'=',mw.uri.encode(remove_wiki_link(value))});
end
end
});

ifin_array(class,{'arxiv','biorxiv','citeseerx','ssrn','journal','news','magazine'})or(in_array(class,{'conference','interview','map','press release','web'})andis_set(data.Periodical))or
('citation'==classandis_set(data.Periodical)andnotis_set(data.Encyclopedia))then
OCinSoutput.rft_val_fmt="info:ofi/fmt:kev:mtx:journal";-- journal metadata identifier
ifin_array(class,{'arxiv','biorxiv','citeseerx','ssrn'})then-- set genre according to the type of citation template we are rendering
OCinSoutput["rft.genre"]="preprint";-- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
elseif'conference'==classthen
OCinSoutput["rft.genre"]="conference";-- cite conference (when Periodical set)
elseif'web'==classthen
OCinSoutput["rft.genre"]="unknown";-- cite web (when Periodical set)
else
OCinSoutput["rft.genre"]="article";-- journal and other 'periodical' articles
end
OCinSoutput["rft.jtitle"]=data.Periodical;-- journal only
OCinSoutput["rft.atitle"]=data.Title;-- 'periodical' article titles

-- these used only for periodicals
OCinSoutput["rft.ssn"]=data.Season;-- keywords: winter, spring, summer, fall
OCinSoutput["rft.quarter"]=data.Quarter;-- single digits 1->first quarter, etc.
OCinSoutput["rft.chron"]=data.Chron;-- free-form date components
OCinSoutput["rft.volume"]=data.Volume;-- does not apply to books
OCinSoutput["rft.issue"]=data.Issue;
OCinSoutput["rft.pages"]=data.Pages;-- also used in book metadata

elseif'thesis'~=classthen-- all others except cite thesis are treated as 'book' metadata; genre distinguishes
OCinSoutput.rft_val_fmt="info:ofi/fmt:kev:mtx:book";-- book metadata identifier
if'report'==classor'techreport'==classthen-- cite report and cite techreport
OCinSoutput["rft.genre"]="report";
elseif'conference'==classthen-- cite conference when Periodical not set
OCinSoutput["rft.genre"]="conference";
OCinSoutput["rft.atitle"]=data.Chapter;-- conference paper as chapter in proceedings (book)
elseifin_array(class,{'book','citation','encyclopaedia','interview','map'})then
ifis_set(data.Chapter)then
OCinSoutput["rft.genre"]="bookitem";
OCinSoutput["rft.atitle"]=data.Chapter;-- book chapter, encyclopedia article, interview in a book, or map title
else
if'map'==classor'interview'==classthen
OCinSoutput["rft.genre"]='unknown';-- standalone map or interview
else
OCinSoutput["rft.genre"]='book';-- book and encyclopedia
end
end
else-- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
OCinSoutput["rft.genre"]="unknown";
end
OCinSoutput["rft.btitle"]=data.Title;-- book only
OCinSoutput["rft.place"]=data.PublicationPlace;-- book only
OCinSoutput["rft.series"]=data.Series;-- book only
OCinSoutput["rft.pages"]=data.Pages;-- book, journal
OCinSoutput["rft.edition"]=data.Edition;-- book only
OCinSoutput["rft.pub"]=data.PublisherName;-- book and dissertation

else-- cite thesis
OCinSoutput.rft_val_fmt="info:ofi/fmt:kev:mtx:dissertation";-- dissertation metadata identifier
OCinSoutput["rft.title"]=data.Title;-- dissertation (also patent but that is not yet supported)
OCinSoutput["rft.degree"]=data.Degree;-- dissertation only
OCinSoutput['rft.inst']=data.PublisherName;-- book and dissertation
end
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
-- and now common parameters (as much as possible)
OCinSoutput["rft.date"]=data.Date;-- book, journal, dissertation

fork,vinpairs(data.ID_list)do-- what to do about these? For now assume that they are common to all?
ifk=='ISBN'thenv=v:gsub("[^-0-9X]","");end
localid=cfg.id_handlers[k].COinS;
ifstring.sub(idor"",1,4)=='info'then-- for ids that are in the info:registry
OCinSoutput["rft_id"]=table.concat{id,"/",v};
elseifstring.sub(idor"",1,3)=='rft'then-- for isbn, issn, eissn, etc. that have defined COinS keywords
OCinSoutput[id]=v;
elseif'url'==idthen-- for urls that are assembled in ~/Identifiers; |asin= and |ol=
OCinSoutput["rft_id"]=table.concat({data.ID_list[k],"#id-name=",cfg.id_handlers[k].label});
elseifidthen-- when cfg.id_handlers[k].COinS is not nil so urls created here
OCinSoutput["rft_id"]=table.concat{cfg.id_handlers[k].prefix,v,cfg.id_handlers[k].suffixor'',"#id-name=",cfg.id_handlers[k].label};-- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
end
end

locallast,first;
fork,vinipairs(data.Authors)do
last,first=coins_cleanup(v.last),coins_cleanup(v.firstor'');-- replace any nowiki stripmarkers, non-printing or invisible characters
ifk==1then-- for the first author name only
ifis_set(last)andis_set(first)then-- set these COinS values if |first= and |last= specify the first author name
OCinSoutput["rft.aulast"]=last;-- book, journal, dissertation
OCinSoutput["rft.aufirst"]=first;-- book, journal, dissertation
elseifis_set(last)then
OCinSoutput["rft.au"]=last;-- book, journal, dissertation -- otherwise use this form for the first name
end
else-- for all other authors
ifis_set(last)andis_set(first)then
OCinSoutput["rft.au"]=table.concat{last,",",first};-- book, journal, dissertation
elseifis_set(last)then
OCinSoutput["rft.au"]=last;-- book, journal, dissertation
end
-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
end
end

OCinSoutput.rft_id=data.URL;
OCinSoutput.rfr_id=table.concat{"info:sid/",mw.site.server:match("[^/]*$"),":",data.RawPage};

-- TODO: Add optional extra info:
-- rfr_dat=#REVISION<version> (referrer private data)
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)

OCinSoutput=setmetatable(OCinSoutput,nil);

-- sort with version string always first, and combine.
-- table.sort( OCinSoutput );
table.insert(OCinSoutput,1,"ctx_ver="..ctx_ver);-- such as "Z39.88-2004"
returntable.concat(OCinSoutput,"&");
end


--[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------

Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.

]]

localfunctionset_selected_modules(cfg_table_ptr,utilities_page_ptr)
cfg=cfg_table_ptr;

is_set=utilities_page_ptr.is_set;-- import functions from selected Module:Citation/CS1/Utilities module
in_array=utilities_page_ptr.in_array;
remove_wiki_link=utilities_page_ptr.remove_wiki_link;
strip_apostrophe_markup=utilities_page_ptr.strip_apostrophe_markup;
end


--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]

return{
make_coins_title=make_coins_title,
get_coins_pages=get_coins_pages,
COinS=COinS,
set_selected_modules=set_selected_modules,
}