Changes
Jump to navigation
Jump to search
scripts
<!-- Please place categories where indicated at the bottom of this page and interwikis at Wikidata (see [[Wikipedia:Wikidata]]) -->
Unicode [[General Category]] data derived from [https://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt DerivedGeneralCategory.txt] in the Unicode Character Database.
The data was generated by the two Lua 5.3 scripts below. [http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html LPeg] is required. If the two scripts are in the same folder as <code>DerivedGeneralCategory.txt</code>, all one has to do is go to the directory with the command line and type <code>lua print_data.lua</code> to print the data to <code>data.lua</code>.
{{collapse top|title=Lua 5.3 scripts}}
; parse_data.lua
<source lang="lua">
local f = assert(io.open 'DerivedGeneralCategory.txt', 'r')
local Derived_General_Category = f:read 'a'
f:close()
local lpeg = require 'lpeg'
for k, v in pairs(lpeg) do
if type(k) == 'string' then
local first_letter = k:sub(1, 1)
if first_letter == first_letter:upper() then
_ENV[k] = v
end
end
end
local General_Category_data = { singles = {}, ranges = {} }
local function process_match(str, pos, ...)
if select(3, ...) then -- three arguments: XXXX..XXXX ; gc
local low, high, category = ...
if category ~= 'Cn' then
low, high = tonumber(low, 16), tonumber(high, 16)
table.insert(General_Category_data.ranges, { low, high, category })
end
else -- two arguments: XXXX ; gc
local codepoint, category = ...
if category ~= 'Cn' then
codepoint = tonumber(codepoint, 16)
General_Category_data.singles[codepoint] = category
end
end
return pos
end
local patt = P {
(V 'line' + 1)^1,
line = Cmt((V 'range' + C(V 'codepoint')) * V 'white' * P ';' * V 'white' * C(V 'gc') * (1 - V 'nl')^0,
process_match),
range = C(V 'codepoint') * P '..' * C(V 'codepoint'),
codepoint = V 'hex' * V 'hex' * V 'hex' * V 'hex' * V 'hex'^-2,
gc = R 'AZ' * P(1),
hex = R("09", "AF"),
white = S ' \t'^0,
nl = P '\r'^-1 * P '\n',
}
patt:match(Derived_General_Category)
return General_Category_data
</source>
; print_data.lua
<source lang="lua">
local data_filename = [[make_data.lua]]
local data = dofile(data_filename)
local output_filename = [[data.lua]]
local output = assert(io.open(output_filename, 'w'))
local function writef(...)
output:write(string.format(...))
end
writef [[
return {
singles = {
]]
-- Check that maximum "singles" codepoint is less than 0x100000?
for codepoint, category in require 't'.spairs(data.singles) do
writef('\t\t [0x%05X] = "%s",\n', codepoint, category)
end
writef [[
},
ranges = {
]]
local function compare_ranges(range1, range2)
return range1[1] < range2[1]
end
table.sort(data.ranges, compare_ranges)
for _, range in ipairs(data.ranges) do
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', table.unpack(range))
end
writef [[
},
}]]
</source>
{{collapse bottom}}
<includeonly>{{#ifeq:{{SUBPAGENAME}}|sandbox | |
<!-- Categories below this line, please; interwikis at Wikidata -->
}}</includeonly>
Unicode [[General Category]] data derived from [https://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt DerivedGeneralCategory.txt] in the Unicode Character Database.
The data was generated by the two Lua 5.3 scripts below. [http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html LPeg] is required. If the two scripts are in the same folder as <code>DerivedGeneralCategory.txt</code>, all one has to do is go to the directory with the command line and type <code>lua print_data.lua</code> to print the data to <code>data.lua</code>.
{{collapse top|title=Lua 5.3 scripts}}
; parse_data.lua
<source lang="lua">
local f = assert(io.open 'DerivedGeneralCategory.txt', 'r')
local Derived_General_Category = f:read 'a'
f:close()
local lpeg = require 'lpeg'
for k, v in pairs(lpeg) do
if type(k) == 'string' then
local first_letter = k:sub(1, 1)
if first_letter == first_letter:upper() then
_ENV[k] = v
end
end
end
local General_Category_data = { singles = {}, ranges = {} }
local function process_match(str, pos, ...)
if select(3, ...) then -- three arguments: XXXX..XXXX ; gc
local low, high, category = ...
if category ~= 'Cn' then
low, high = tonumber(low, 16), tonumber(high, 16)
table.insert(General_Category_data.ranges, { low, high, category })
end
else -- two arguments: XXXX ; gc
local codepoint, category = ...
if category ~= 'Cn' then
codepoint = tonumber(codepoint, 16)
General_Category_data.singles[codepoint] = category
end
end
return pos
end
local patt = P {
(V 'line' + 1)^1,
line = Cmt((V 'range' + C(V 'codepoint')) * V 'white' * P ';' * V 'white' * C(V 'gc') * (1 - V 'nl')^0,
process_match),
range = C(V 'codepoint') * P '..' * C(V 'codepoint'),
codepoint = V 'hex' * V 'hex' * V 'hex' * V 'hex' * V 'hex'^-2,
gc = R 'AZ' * P(1),
hex = R("09", "AF"),
white = S ' \t'^0,
nl = P '\r'^-1 * P '\n',
}
patt:match(Derived_General_Category)
return General_Category_data
</source>
; print_data.lua
<source lang="lua">
local data_filename = [[make_data.lua]]
local data = dofile(data_filename)
local output_filename = [[data.lua]]
local output = assert(io.open(output_filename, 'w'))
local function writef(...)
output:write(string.format(...))
end
writef [[
return {
singles = {
]]
-- Check that maximum "singles" codepoint is less than 0x100000?
for codepoint, category in require 't'.spairs(data.singles) do
writef('\t\t [0x%05X] = "%s",\n', codepoint, category)
end
writef [[
},
ranges = {
]]
local function compare_ranges(range1, range2)
return range1[1] < range2[1]
end
table.sort(data.ranges, compare_ranges)
for _, range in ipairs(data.ranges) do
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', table.unpack(range))
end
writef [[
},
}]]
</source>
{{collapse bottom}}
<includeonly>{{#ifeq:{{SUBPAGENAME}}|sandbox | |
<!-- Categories below this line, please; interwikis at Wikidata -->
}}</includeonly>