Modulu:String2

Documentation for this module may be created atModulu:String2/dok
require('strict');
localp={}

p.trim=function(frame)
returnmw.text.trim(frame.args[1]or"")
end

p.sentence=function(frame)
-- {{lc:}} is strip-marker safe, string.lower is not.
frame.args[1]=frame:callParserFunction('lc',frame.args[1])
returnp.ucfirst(frame)
end

p.ucfirst=function(frame)
locals=frame.args[1];
ifnotsor''==sors:match('^%s+$')then-- when <s> is nil, empty, or only whitespace
returns;-- abandon because nothing to do
end

s=mw.text.trim(frame.args[1]or"")
locals1=""

localprefix_patterns_t={-- sequence of prefix patterns
'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',-- stripmarker
'^([%*;:#]+)',-- various list markup
'^(\'\'\'*)',-- bold / italic markup
'^(%b<>)',-- html-like tags because some templates render these
'^(&%a+;)',-- html character entities because some templates render these
'^(&#%d+;)',-- html numeric (decimal) entities because some templates render these
'^(&#x%x+;)',-- html numeric (hexadecimal) entities because some templates render these
'^(%s+)',-- any whitespace characters
'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„ “” ʻ|\ "\'\\]+)',-- miscellaneous punctuation
}

localprefixes_t={};-- list, bold/italic, and html-like markup, & whitespace saved here

localfunctionprefix_strip(s)-- local function to strip prefixes from <s>
for_,patterninipairs(prefix_patterns_t)do-- spin through <prefix_patterns_t>
ifs:match(pattern)then-- when there is a match
localprefix=s:match(pattern);-- get a copy of the matched prefix
table.insert(prefixes_t,prefix);-- save it
s=s:sub(prefix:len()+1);-- remove the prefix from <s>
returns,true;-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker
end
end
returns;-- no prefix found; return <s> with nil flag
end

localprefix_removed;-- flag; boolean true as long as prefix_strip() finds and removes a prefix

repeat-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>
s,prefix_removed=prefix_strip(s);
until(notprefix_removed);-- until <prefix_removed> is nil

s1=table.concat(prefixes_t);-- recreate the prefix string for later reattachment

localfirst_text=mw.ustring.match(s,'^%[%[[^%]]+%]%]');-- extract wikilink at start of string if present; TODO: this can be string.match()?

localupcased;
iffirst_textthen
iffirst_text:match('^%[%[[^|]+|[^%]]+%]%]')then-- if <first_text> is a piped link
upcased=mw.ustring.match(s,'^%[%[[^|]+|%W*(%w)');-- get first letter character
upcased=mw.ustring.upper(upcased);-- upcase first letter character
s=mw.ustring.gsub(s,'^(%[%[[^|]+|%W*)%w','%1'..upcased);-- replace
else-- here when <first_text> is a wikilink but not a piped link
upcased=mw.ustring.match(s,'^%[%[%W*%w');-- get '[[' and first letter
upcased=mw.ustring.upper(upcased);-- upcase first letter character
s=mw.ustring.gsub(s,'^%[%[%W*%w',upcased);-- replace; no capture needed here
end

elseifs:match('^%[%S+%s+[^%]]+%]')then-- if <s> is a ext link of some sort; must have label text
upcased=mw.ustring.match(s,'^%[%S+%s+%W*(%w)');-- get first letter character
upcased=mw.ustring.upper(upcased);-- upcase first letter character
s=mw.ustring.gsub(s,'^(%[%S+%s+%W*)%w','%1'..upcased);-- replace

elseifs:match('^%[%S+%s*%]')then-- if <s> is a ext link without label text; nothing to do
returns1..s;-- reattach prefix string (if present) and done

else-- <s> is not a wikilink or ext link; assume plain text
upcased=mw.ustring.match(s,'^%W*%w');-- get the first letter character
upcased=mw.ustring.upper(upcased);-- upcase first letter character
s=mw.ustring.gsub(s,'^%W*%w',upcased);-- replace; no capture needed here
end

returns1..s;-- reattach prefix string (if present) and done
end


p.title=function(frame)
-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html
-- recommended by The U.S. Government Printing Office Style Manual:
-- "Capitalize all words in titles of publications and documents,
-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor. "
localalwayslower={['a']=1,['an']=1,['the']=1,
['and']=1,['but']=1,['or']=1,['for']=1,
['nor']=1,['on']=1,['in']=1,['at']=1,['to']=1,
['from']=1,['by']=1,['of']=1,['up']=1}
localres=''
locals=mw.text.trim(frame.args[1]or"")
localwords=mw.text.split(s,"")
fori,sinipairs(words)do
-- {{lc:}} is strip-marker safe, string.lower is not.
s=frame:callParserFunction('lc',s)
ifi==1oralwayslower[s]~=1then
s=mw.getContentLanguage():ucfirst(s)
end
words[i]=s
end
returntable.concat(words,"")
end

-- findlast finds the last item in a list
-- the first unnamed parameter is the list
-- the second, optional unnamed parameter is the list separator (default = comma space)
-- returns the whole list if separator not found
p.findlast=function(frame)
locals=mw.text.trim(frame.args[1]or"")
localsep=frame.args[2]or""
ifsep==""thensep=","end
localpattern=".*"..sep.."(.*)"
locala,b,last=s:find(pattern)
ifathen
returnlast
else
returns
end
end

-- stripZeros finds the first number and strips leading zeros (apart from units)
-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"
p.stripZeros=function(frame)
locals=mw.text.trim(frame.args[1]or"")
localn=tonumber(string.match(s,"%d+"))or""
s=string.gsub(s,"%d+",n,1)
returns
end

-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string
-- it takes an unnamed parameter and trims whitespace, then removes any wikicode
p.nowiki=function(frame)
localstr=mw.text.trim(frame.args[1]or"")
returnmw.text.nowiki(str)
end

-- split splits text at boundaries specified by separator
-- and returns the chunk for the index idx (starting at 1)
-- #invoke:String2 |split |text |separator |index |true/false
-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false
-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=true
p.split=function(frame)
localargs=frame.args
ifnot(args[1]orargs.txt)thenargs=frame:getParent().argsend
localtxt=args[1]orargs.txtor""
iftxt==""thenreturnnilend
localsep=(args[2]orargs.sepor""):gsub(' "','')
localidx=tonumber(args[3]orargs.idx)or1
localplain=(args[4]orargs.plainor"true"):sub(1,1)
plain=(plain~="f"andplain~="n"andplain~="0")
localsplittbl=mw.text.split(txt,sep,plain)
ifidx<0thenidx=#splittbl+idx+1end
returnsplittbl[idx]
end

-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each number it finds into a percentage and returns the resultant string.
p.val2percent=function(frame)
localargs=frame.args
ifnot(args[1]orargs.txt)thenargs=frame:getParent().argsend
localtxt=mw.text.trim(args[1]orargs.txtor"")
iftxt==""thenreturnnilend
localfunctionv2p(x)
x=(tonumber(x)or0)*100
ifx==math.floor(x)thenx=math.floor(x)end
returnx.."%"
end
txt=txt:gsub("%d[%d%.]*",v2p)-- store just the string
returntxt
end

-- one2a scans through a string, passed as either the first unnamed parameter or |txt=
-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.
p.one2a=function(frame)
localargs=frame.args
ifnot(args[1]orargs.txt)thenargs=frame:getParent().argsend
localtxt=mw.text.trim(args[1]orargs.txtor"")
iftxt==""thenreturnnilend
txt=txt:gsub("one","a"):gsub("^one","a"):gsub("One","A"):gsub("a ([aeiou])","an %1"):gsub("A ([aeiou])","An %1")
returntxt
end

-- findpagetext returns the position of a piece of text in a page
-- First positional parameter or |text is the search text
-- Optional parameter |title is the page title, defaults to current page
-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search
-- Optional parameter |nomatch is the return value when no match is found; default is nil
p._findpagetext=function(args)
-- process parameters
localnomatch=args.nomatchor""
ifnomatch==""thennomatch=nilend
--
localtext=mw.text.trim(args[1]orargs.textor"")
iftext==""thenreturnnilend
--
localtitle=args.titleor""
localtitleobj
iftitle==""then
titleobj=mw.title.getCurrentTitle()
else
titleobj=mw.title.new(title)
end
--
localplain=args.plainor""
ifplain:sub(1,1)=="f"thenplain=falseelseplain=trueend
-- get the page content and look for 'text' - return position or nomatch
localcontent=titleobjandtitleobj:getContent()
returncontentandmw.ustring.find(content,text,1,plain)ornomatch
end
p.findpagetext=function(frame)
localargs=frame.args
localpargs=frame:getParent().args
fork,vinpairs(pargs)do
args[k]=v
end
ifnot(args[1]orargs.text)thenreturnnilend
-- just the first value
return(p._findpagetext(args))
end

-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}
-- Type is:
-- QUERY decodes + to space (default)
-- PATH does no extra decoding
-- WIKI decodes _ to space
p._urldecode=function(url,type)
url=urlor""
type=(type=="PATH"ortype=="WIKI")andtype
returnmw.uri.decode(url,type)
end
-- {{#invoke:String2|urldecode|url=url|type=type}}
p.urldecode=function(frame)
returnmw.uri.decode(frame.args.url,frame.args.type)
end

-- what follows was merged from Module:StringFunc

-- helper functions
p._GetParameters=require('Module:GetParameters')

-- Argument list helper function, as per Module:String
p._getParameters=p._GetParameters.getParameters

-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
functionp._escapePattern(pattern_str)
returnmw.ustring.gsub(pattern_str,"([%(%)%.%%%+%-%*%?%[%^%$%]])","%%%1")
end

-- Helper Function to interpret boolean strings, as per Module:String
p._getBoolean=p._GetParameters.getBoolean

--[[
Strip

This function Strips characters from string

Usage:
{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}

Parameters
source: The string to strip
chars: The pattern or list of characters to strip from string, replaced with ''
plain: A flag indicating that the chars should be understood as plain text. defaults to true.

Leading and trailing whitespace is also automatically stripped from the string.
]]
functionp.strip(frame)
localnew_args=p._getParameters(frame.args,{'source','chars','plain'})
localsource_str=new_args['source']or''
localchars=new_args['chars']or''or'characters'
source_str=mw.text.trim(source_str)
ifsource_str==''orchars==''then
returnsource_str
end
locall_plain=p._getBoolean(new_args['plain']ortrue)
ifl_plainthen
chars=p._escapePattern(chars)
end
localresult
result=mw.ustring.gsub(source_str,"["..chars.."]",'')
returnresult
end

--[[
Match any
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.
Returns the empty string if nothing matches for use in {{#if:}}

Usage:
{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.

Parameters:
source: the string to search
plain: A flag indicating that the patterns should be understood as plain text. defaults to true.
1, 2, 3,...: the patterns to search for
]]
functionp.matchAny(frame)
localsource_str=frame.args['source']orerror('The source parameter is mandatory.')
locall_plain=p._getBoolean(frame.args['plain']ortrue)
fori=1,math.hugedo
localpattern=frame.args[i]
ifnotpatternthenreturn''end
ifmw.ustring.find(source_str,pattern,1,l_plain)then
returntostring(i)
end
end
end

--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------

Converts a hyphen to a dash under certain conditions. The hyphen must separate
like items; unlike items are returned unmodified. These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)

any other forms are returned unmodified.

str may be a comma- or semicolon-separated list

]]
functionp.hyphen_to_dash(str,spacing)
if(str==nilorstr=='')then
returnstr
end

localaccept

str=mw.text.decode(str,true)-- replace html entities with their characters; semicolon mucks up the text.split

localout={}
locallist=mw.text.split(str,'%s*[,;]%s*')-- split str at comma or semicolon separators if there are any

for_,iteminipairs(list)do-- for each item in the list
item=mw.text.trim(item)-- trim whitespace
item,accept=item:gsub('^%(%((.+)%)%)$','%1')
ifaccept==0andmw.ustring.match(item,'^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$')then-- if a hyphenated range or has endash or emdash separators
ifitem:match('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$')or-- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:match('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$')or-- digitletter hyphen digitletter (optional separator between digit and letter)
item:match('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$')or-- digit separator digit hyphen digit separator digit
item:match('^%d+%s*%-%s*%d+$')or-- digit hyphen digit
item:match('^%a+%s*%-%s*%a+$')then-- letter hyphen letter
item=item:gsub('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)','%1–%2')-- replace hyphen, remove extraneous space characters
else
item=mw.ustring.gsub(item,'%s*[–—]%s*','–')-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
end
end
table.insert(out,item)-- add the (possibly modified) item to the output table
end

localtemp_str=table.concat(out,','..spacing)-- concatenate the output table into a comma separated string
temp_str,accept=temp_str:gsub('^%(%((.+)%)%)$','%1')-- remove accept-this-as-written markup when it wraps all of concatenated out
ifaccept~=0then
temp_str=str:gsub('^%(%((.+)%)%)$','%1')-- when global markup removed, return original str; do it this way to suppress boolean second return value
end
returntemp_str
end

functionp.hyphen2dash(frame)
localstr=frame.args[1]or''
localspacing=frame.args[2]or' '-- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing

returnp.hyphen_to_dash(str,spacing)
end

-- Similar to [[Module:String#endswith]]
functionp.startswith(frame)
return(frame.args[1]:sub(1,frame.args[2]:len())==frame.args[2])and'yes'or''
end

returnp