Changes
Jump to navigation
Jump to search
transclusion count
<!-- Please place categories where indicated at the bottom of this page and interwikis at Wikidata (see [[Wikipedia:Wikidata]]) -->
{{high risk|261335}}
== Usage ==
Extracted from Scripts.txt and PropertyValueAliases.txt from the Unicode Character Database using two scripts shown below, written in Lua 5.3 and using [http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html LPeg]. The scripts must be in the same folder as the two data files, and you must have a global function named "sortedpairs" that can iterate over integer keys in a sparse table in numerical order. (The sortedPairs function in [[wikt:Module:table|Module:table]] on English Wiktionary will work.)
{{collapse top|title=Lua 5.3 scripts}}
; make_script_data.lua
<source lang="lua">
local lpeg = require 'lpeg'
local infilehandle = assert(io.open('./Scripts.txt', 'rb'))
local scriptdata = assert(infilehandle:read 'a')
infilehandle:close()
local outfile = './data.lua'
local script_name_to_code = dofile './name_to_code.lua'
for k, v in pairs(lpeg) do
local firstletter = k:sub(1, 1)
if firstletter:upper() == firstletter then
_ENV[k] = v
end
end
local function numtohex(number)
return ('%04X'):format(number)
end
local function hextonum(hex)
return tonumber(hex, 16)
end
-- Create table that contains arrays of codepoint ranges as well as codepoint-to-script fields.
-- Need separate tables for map and ranges.
local prev
local maxscriptnamelen = 0
local function process(t, cp1, cp2, scriptname, ...)
if not scriptname then return t end
local script_code = script_name_to_code[scriptname]
local rangearray = t.ranges[script_code] -- Place in script-specific array initially.
if not rangearray then
rangearray = {}
t.ranges[script_code] = rangearray
prev = nil
end
local cpnumber1, cpnumber2
cpnumber1 = hextonum(cp1)
if cp2 then
cpnumber2 = hextonum(cp2)
end
if prev and cpnumber1 == prev + 1 then
if t.individual[prev] then -- Move individual condepoint to previously created range.
t.individual[prev] = nil
table.insert(rangearray, { prev, cpnumber2 or cpnumber1, script_code })
else
rangearray[#rangearray][2] = cpnumber2 or cpnumber1 -- Increment top of previous codepoint range.
end
else
if cpnumber2 then
table.insert(rangearray, { cpnumber1, cpnumber2, script_code })
else
t.individual[cpnumber1] = script_code
end
end
prev = cpnumber2 or cpnumber1
return t
end
local patt = P {
Cf((Cc{ ranges = {}, individual = {} } * V 'patt' + 1)^1, process),
patt = V 'nl' * (Cg(V 'data_line') + V 'comment'),
data_line = V 'cprange' * V 'opts'
* P ';' * V 'opts' * C(V 'scriptname') * V 'opts'
* (V 'count' * V 'opts')^-1
* P '#' * V 'opts' * V 'category' * V 'opts'
* V 'not_nl',
comment = P '#' * V 'not_nl',
count = '[' * R '09'^1 * ']',
category = R 'AZ' * R('az', '&&'),
scriptname = R('AZ', 'az', '__')^1, -- Actually starts with capital and rest is alphabetic or underscore.
cprange = C(V 'cp') * (P '..' * C(V 'cp') + Cc(nil)), -- XXXX; XXXX..XXXX -> string, string; string, nil
not_nl = (1 - V 'nl')^0,
cp = V 'hex' * V 'hex' * V 'hex' * V 'hex' * V 'hex'^-1,-- maximum of 5 hex digits
hex = R('09', 'AF', 'af'), -- Actually just 0-9A-F.
opts = S ' \t'^0, -- Actually just space.
nl = P '\r'^-1 * P '\n'
}
local scriptdatatable = patt:match(scriptdata)
-- Move arrays for individual scripts to a single array.
local i = 0
local new_ranges = {}
for script, ranges in pairs(scriptdatatable.ranges) do
for _, range in ipairs(ranges) do
i = i + 1
new_ranges[i] = range
end
end
scriptdatatable.ranges = new_ranges
table.sort(
scriptdatatable.ranges,
function (range1, range2)
return range1[1] < range2[1]
end)
-- P R I N T R E S U L T
local out = assert(io.open(outfile, 'wb'))
out:write [[
-- Generated by make_script_data.lua.
return {
individual = {
]]
for cp, script in sortedpairs(scriptdatatable.individual) do
out:write(('\t\t[0x%05X] = "%s",\n'):format(cp, script))
end
out:write[[
},
ranges = {
]]
for _, range in ipairs(scriptdatatable.ranges) do
out:write(('\t\t{ 0x%05X, 0x%05X, %-' .. maxscriptnamelen .. 's },\n')
:format(range[1], range[2], ('%q'):format(range[3])))
end
out:write [[
},
}
]]
assert(out:close())
</source>
; name_to_code.lua
<source lang="lua">
local lpeg = require 'lpeg'
local property_value_aliases_filename = "./PropertyValueAliases.txt"
local property_value_aliases = assert(io.open(property_value_aliases_filename, 'rb')):read('a')
for k, v in pairs(lpeg) do
local firstletter = k:sub(1, 1)
if firstletter:upper() == firstletter then
_ENV[k] = v
end
end
local script_name_to_code = {}
local function add_to_table(code, name)
script_name_to_code[name] = code
end
local patt = P {
(V 'script_line' / add_to_table + 1)^1,
script_line = V 'nl' * P 'sc' * V 'sep' * C(V 'code') * V 'sep' * C(V 'name') * (P(1) - V 'nl')^0,
code = R 'AZ' * V 'lower' * V 'lower' * V 'lower',
name = R('AZ', 'az', '__')^1,
lower = R 'az',
sep = V 'w' * P ';' * V 'w',
w = S ' \t'^0,
nl = P '\r'^-1 * P '\n'
}
patt:match(property_value_aliases)
return script_name_to_code
</source>
{{collapse bottom}}
<includeonly>{{#ifeq:{{SUBPAGENAME}}|sandbox | |
<!-- Categories below this line, please; interwikis at Wikidata -->
}}</includeonly>
{{high risk|261335}}
== Usage ==
Extracted from Scripts.txt and PropertyValueAliases.txt from the Unicode Character Database using two scripts shown below, written in Lua 5.3 and using [http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html LPeg]. The scripts must be in the same folder as the two data files, and you must have a global function named "sortedpairs" that can iterate over integer keys in a sparse table in numerical order. (The sortedPairs function in [[wikt:Module:table|Module:table]] on English Wiktionary will work.)
{{collapse top|title=Lua 5.3 scripts}}
; make_script_data.lua
<source lang="lua">
local lpeg = require 'lpeg'
local infilehandle = assert(io.open('./Scripts.txt', 'rb'))
local scriptdata = assert(infilehandle:read 'a')
infilehandle:close()
local outfile = './data.lua'
local script_name_to_code = dofile './name_to_code.lua'
for k, v in pairs(lpeg) do
local firstletter = k:sub(1, 1)
if firstletter:upper() == firstletter then
_ENV[k] = v
end
end
local function numtohex(number)
return ('%04X'):format(number)
end
local function hextonum(hex)
return tonumber(hex, 16)
end
-- Create table that contains arrays of codepoint ranges as well as codepoint-to-script fields.
-- Need separate tables for map and ranges.
local prev
local maxscriptnamelen = 0
local function process(t, cp1, cp2, scriptname, ...)
if not scriptname then return t end
local script_code = script_name_to_code[scriptname]
local rangearray = t.ranges[script_code] -- Place in script-specific array initially.
if not rangearray then
rangearray = {}
t.ranges[script_code] = rangearray
prev = nil
end
local cpnumber1, cpnumber2
cpnumber1 = hextonum(cp1)
if cp2 then
cpnumber2 = hextonum(cp2)
end
if prev and cpnumber1 == prev + 1 then
if t.individual[prev] then -- Move individual condepoint to previously created range.
t.individual[prev] = nil
table.insert(rangearray, { prev, cpnumber2 or cpnumber1, script_code })
else
rangearray[#rangearray][2] = cpnumber2 or cpnumber1 -- Increment top of previous codepoint range.
end
else
if cpnumber2 then
table.insert(rangearray, { cpnumber1, cpnumber2, script_code })
else
t.individual[cpnumber1] = script_code
end
end
prev = cpnumber2 or cpnumber1
return t
end
local patt = P {
Cf((Cc{ ranges = {}, individual = {} } * V 'patt' + 1)^1, process),
patt = V 'nl' * (Cg(V 'data_line') + V 'comment'),
data_line = V 'cprange' * V 'opts'
* P ';' * V 'opts' * C(V 'scriptname') * V 'opts'
* (V 'count' * V 'opts')^-1
* P '#' * V 'opts' * V 'category' * V 'opts'
* V 'not_nl',
comment = P '#' * V 'not_nl',
count = '[' * R '09'^1 * ']',
category = R 'AZ' * R('az', '&&'),
scriptname = R('AZ', 'az', '__')^1, -- Actually starts with capital and rest is alphabetic or underscore.
cprange = C(V 'cp') * (P '..' * C(V 'cp') + Cc(nil)), -- XXXX; XXXX..XXXX -> string, string; string, nil
not_nl = (1 - V 'nl')^0,
cp = V 'hex' * V 'hex' * V 'hex' * V 'hex' * V 'hex'^-1,-- maximum of 5 hex digits
hex = R('09', 'AF', 'af'), -- Actually just 0-9A-F.
opts = S ' \t'^0, -- Actually just space.
nl = P '\r'^-1 * P '\n'
}
local scriptdatatable = patt:match(scriptdata)
-- Move arrays for individual scripts to a single array.
local i = 0
local new_ranges = {}
for script, ranges in pairs(scriptdatatable.ranges) do
for _, range in ipairs(ranges) do
i = i + 1
new_ranges[i] = range
end
end
scriptdatatable.ranges = new_ranges
table.sort(
scriptdatatable.ranges,
function (range1, range2)
return range1[1] < range2[1]
end)
-- P R I N T R E S U L T
local out = assert(io.open(outfile, 'wb'))
out:write [[
-- Generated by make_script_data.lua.
return {
individual = {
]]
for cp, script in sortedpairs(scriptdatatable.individual) do
out:write(('\t\t[0x%05X] = "%s",\n'):format(cp, script))
end
out:write[[
},
ranges = {
]]
for _, range in ipairs(scriptdatatable.ranges) do
out:write(('\t\t{ 0x%05X, 0x%05X, %-' .. maxscriptnamelen .. 's },\n')
:format(range[1], range[2], ('%q'):format(range[3])))
end
out:write [[
},
}
]]
assert(out:close())
</source>
; name_to_code.lua
<source lang="lua">
local lpeg = require 'lpeg'
local property_value_aliases_filename = "./PropertyValueAliases.txt"
local property_value_aliases = assert(io.open(property_value_aliases_filename, 'rb')):read('a')
for k, v in pairs(lpeg) do
local firstletter = k:sub(1, 1)
if firstletter:upper() == firstletter then
_ENV[k] = v
end
end
local script_name_to_code = {}
local function add_to_table(code, name)
script_name_to_code[name] = code
end
local patt = P {
(V 'script_line' / add_to_table + 1)^1,
script_line = V 'nl' * P 'sc' * V 'sep' * C(V 'code') * V 'sep' * C(V 'name') * (P(1) - V 'nl')^0,
code = R 'AZ' * V 'lower' * V 'lower' * V 'lower',
name = R('AZ', 'az', '__')^1,
lower = R 'az',
sep = V 'w' * P ';' * V 'w',
w = S ' \t'^0,
nl = P '\r'^-1 * P '\n'
}
patt:match(property_value_aliases)
return script_name_to_code
</source>
{{collapse bottom}}
<includeonly>{{#ifeq:{{SUBPAGENAME}}|sandbox | |
<!-- Categories below this line, please; interwikis at Wikidata -->
}}</includeonly>