File: //usr/share/highlight/langDefs/purebasic.lang
--[[******************************************************************************
* *
* PureBASIC Language Definition *
* *
* v1.7.1 - 2017/11/18 *
* *
******************************************************************************
PureBASIC v5.00-5.61 -- The goal of this language definition is to emulate the
way PureBASIC's native IDE highlights its code, including inline Assembly
syntax coloring. When used with the "edit-purebasic" theme, PureBASIC code
will be highlighted just like in its native IDE.
Keywords from all PureBASIC versions (from 5.00 up to current) are added to
the list (deprecated keywords are preserved) to ensure that any code written
for PureBASIC >=5.00 will be parsed and highlighted correctly.
Comments in color definitions refer to PureBASIC native IDE's default palette.
------------------------------------------------------------------------------
This language definition is maintained at the PureBASIC Archives project:
https://github.com/tajmone/purebasic-archives/tree/master/syntax-highlighting/highlight
(visit the above link for more info and resources on this lang definition)
------------------------------------------------------------------------------
Written by Tristano Ajmone:
<tajmone@gmail.com>
https://github.com/tajmone
Released into the public domain according to the Unlicense terms:
http://unlicense.org/
------------------------------------------------------------------------------
]]
Description="PureBASIC"
IgnoreCase=false
Comments={ -- PB IDE color: #00AAAA (Persian Green/Tradewind)
{ Block=false,
Nested=false,
Delimiter = { [[ ; ]] }
}
}
Strings={ -- PB IDE color: #0080FF (Azure Radiance)
Delimiter=[[ " ]],
Escape=[=[\\[abfnrtv"\\]]=], -- PB IDE color: same as String
}
--[[ STRINGS NOTE: There's more to PB strings than this delimiter definition.
Escaped strings (~"") are handled via `Keyword Id=4` and custom code in
the `OnStateChange()` function found below.
--]]
Operators=[[\&|<|>|\!|\||\=|\/|\*|\%|\+|\-|~]] -- PB IDE color: same as normal text (Black)
-- NUMBERS > PB IDE color: same as normal text (Black)
Digits=[[ (?x)
# ============ HEX ============
# Pascal style ($FF):
\$[0-9a-fA-F]+\b
# ============ BINARY ============
| %[01]+\b
# ============ FLOAT ============
# With decimal point separator:
| \b[-]?\d+\.\d+(?:[eE][\-\+]?\d+)?[a-zA-Z]*\b
# Without decimal point separator:
| \b[-]?\d+(?:[eE][\-\+]?\d+)[a-zA-Z]*\b
# ============ DECIMAL ============
| (?<!\$)\b\d+\b
]]
-- FLOATS NOTE: PureBASIC strips and ignores all suffixes from float numbers.
-- Therefore "1.575e+1" and "1.575e+1bananas" are both valid float values
-- in PureBASIC code (both yelding the smae value of 15.75).
Keywords={
{ Id=1, -- PureBASIC Keywords > PB IDE color: #006666 (Blue Stone) + Bold
List={
-- Keywords list built by parsing the tokens inside PureBASIC SDK's
-- "SyntaxHilighting.dll" (from each PureBASIC version)...
"Align", "And", "Array", "As", "Break", "CallDebugger", "Case", "CompilerCase", "CompilerDefault",
"CompilerElse", "CompilerElseIf", "CompilerEndIf", "CompilerEndSelect", "CompilerError",
"CompilerIf", "CompilerSelect", "CompilerWarning", "Continue", "Data", "DataSection", "Debug",
"DebugLevel", "Declare", "DeclareC", "DeclareCDLL", "DeclareDLL", "DeclareModule", "Default",
"Define", "Dim", "DisableASM", "DisableDebugger", "DisableExplicit", "Else", "ElseIf", "EnableASM",
"EnableDebugger", "EnableExplicit", "End", "EndDataSection", "EndDeclareModule", "EndEnumeration",
"EndIf", "EndImport", "EndInterface", "EndMacro", "EndModule", "EndProcedure", "EndSelect",
"EndStructure", "EndStructureUnion", "EndWith", "Enumeration", "EnumerationBinary", "Extends",
"FakeReturn", "For", "ForEach", "ForEver", "Global", "Gosub", "Goto", "If", "Import", "ImportC",
"IncludeBinary", "IncludeFile", "IncludePath", "Interface", "List", "Macro", "MacroExpandedCount",
"Map", "Module", "NewList", "NewMap", "Next", "Not", "Or", "Procedure", "ProcedureC",
"ProcedureCDLL", "ProcedureDLL", "ProcedureReturn", "Protected", "Prototype", "PrototypeC", "ReDim",
"Read", "Repeat", "Restore", "Return", "Runtime", "Select", "Shared", "Static", "Step", "Structure",
"StructureUnion", "Swap", "Threaded", "To", "UndefineMacro", "Until", "Until ", "UnuseModule",
"UseModule", "Wend", "While", "With", "XIncludeFile", "XOr", },
},
{ Id=2, -- Constants > PB IDE color: #924B72 (Cannon Pink)
Regex=[[ (#[a-zA-Z_]\w*\$?) ]]
},
{ Id=2, -- Inline ASM > PB IDE color: #924B72 (Cannon Pink)
Regex=[[ ^\s*(![^;]*) ]], Group=1
},
{ Id=2, -- ASM Keywords > PB IDE color: #924B72 (Cannon Pink)
List={
-- Keywords list built by parsing the tokens inside PureBASIC SDK's
-- "SyntaxHilighting.dll" (from each PureBASIC version)...
"AAA", "AAD", "AAM", "AAS", "ADC", "ADD", "AND", "ARPL", "BOUND", "BSF", "BSR", "BSWAP", "BT",
"BTC", "BTR", "BTS", "CALL", "CBW", "CDQ", "CLC", "CLD", "CLI", "CLTS", "CMC", "CMOVA", "CMOVAE",
"CMOVB", "CMOVBE", "CMOVC", "CMOVE", "CMOVG", "CMOVGE", "CMOVL", "CMOVLE", "CMOVNA", "CMOVNAE",
"CMOVNB", "CMOVNBE", "CMOVNC", "CMOVNE", "CMOVNG", "CMOVNGE", "CMOVNL", "CMOVNLE", "CMOVNO",
"CMOVNP", "CMOVNS", "CMOVNZ", "CMOVO", "CMOVP", "CMOVPE", "CMOVPO", "CMOVS", "CMOVZ", "CMP", "CMPS",
"CMPSB", "CMPSD", "CMPSW", "CMPXCHG", "CMPXCHG8B", "CWD", "CWDE", "DAA", "DAS", "DB", "DD", "DEC",
"DIV", "DW", "EMMS", "ENTER", "ESC", "F2XM1", "FABS", "FADD", "FADDP", "FBLD", "FBSTP", "FCHS",
"FCLEX", "FCMOVB", "FCMOVBE", "FCMOVE", "FCMOVNB", "FCMOVNBE", "FCMOVNE", "FCMOVNU", "FCMOVU",
"FCOM", "FCOMI", "FCOMIP", "FCOMP", "FCOMPP", "FCOS", "FDECSTP", "FDIV", "FDIVP", "FDIVR", "FDIVRP",
"FFREE", "FIADD", "FICOM", "FICOMP", "FIDIV", "FIDIVR", "FILD", "FIMUL", "FINCSTP", "FINIT", "FIST",
"FISTP", "FISUB", "FISUBR", "FLD", "FLD1", "FLDCW", "FLDENV", "FLDL2E", "FLDL2T", "FLDLG2",
"FLDLN2", "FLDPI", "FLDZ", "FMUL", "FMULP", "FNCLEX", "FNINIT", "FNOP", "FNSAVE", "FNSTCW",
"FNSTENV", "FNSTSW", "FPATAN", "FPREM", "FPREM1", "FPTAN", "FRNDINT", "FRSTOR", "FSAVE", "FSCALE",
"FSETPM", "FSIN", "FSINCOS", "FSQRT", "FST", "FSTCW", "FSTENV", "FSTP", "FSTSW", "FSUB", "FSUBP",
"FSUBR", "FSUBRP", "FTST", "FUCOM", "FUCOMI", "FUCOMIP", "FUCOMP", "FUCOMPP", "FWAIT", "FXAM",
"FXCH", "FXTRACT", "FYL2X", "FYL2XP1", "HLT", "IDIV", "IMUL", "IN", "INC", "INS", "INSB", "INSD",
"INSW", "INT", "INTO", "INVD", "INVLPG", "IRET", "IRETD", "JA", "JAE", "JB", "JBE", "JC", "JCXZ",
"JE", "JECXZ", "JG", "JGE", "JL", "JLE", "JMP", "JNA", "JNAE", "JNB", "JNBE", "JNC", "JNE", "JNG",
"JNGE", "JNL", "JNLE", "JNO", "JNP", "JNS", "JNZ", "JO", "JP", "JPE", "JPO", "JS", "JZ", "LAHF",
"LAR", "LDS", "LEA", "LEAVE", "LES", "LFS", "LGDT", "LGS", "LIDT", "LLDT", "LMSW", "LOCK", "LODS",
"LODSB", "LODSD", "LODSW", "LOOP", "LOOPE", "LOOPNE", "LOOPNZ", "LOOPZ", "LSL", "LSS", "LTR", "MOV",
"MOVD", "MOVQ", "MOVS", "MOVSB", "MOVSD", "MOVSW", "MOVSX", "MOVZX", "MUL", "NEG", "NOP", "NOT",
"OR", "OUT", "OUTS", "OUTSB", "OUTSD", "OUTSW", "PACKSSDW", "PACKSSWB", "PACKUSWB", "PADDB",
"PADDD", "PADDSB", "PADDSW", "PADDUSB", "PADDUSW", "PADDW", "PAND", "PANDN", "PCMPEQB", "PCMPEQD",
"PCMPEQW", "PCMPGTB", "PCMPGTD", "PCMPGTW", "PMADDWD", "PMULHW", "POP", "POPA", "POPAD", "POPF",
"POPFD", "POR", "PSLLD", "PSLLQ", "PSLLW", "PSRAD", "PSRAW", "PSRLD", "PSRLQ", "PSRLW", "PSUBB",
"PSUBD", "PSUBSB", "PSUBSW", "PSUBUSB", "PSUBUSW", "PSUBW", "PUNPCKHBW", "PUNPCKHDQ", "PUNPCKHWD",
"PUNPCKLBW", "PUNPCKLDQ", "PUNPCKLWD", "PUSH", "PUSHA", "PUSHAD", "PUSHF", "PUSHFD", "PXOR", "RCL",
"RCR", "RDMSR", "RDPMC", "RDTSC", "REP", "REPE", "REPNE", "REPNZ", "REPZ", "RET", "RETF", "ROL",
"ROR", "RSM", "SAHF", "SAL", "SAR", "SBB", "SCAS", "SCASB", "SCASD", "SCASW", "SETA", "SETAE",
"SETB", "SETBE", "SETC", "SETE", "SETG", "SETGE", "SETL", "SETLE", "SETNA", "SETNAE", "SETNB",
"SETNBE", "SETNC", "SETNE", "SETNG", "SETNGE", "SETNL", "SETNLE", "SETNO", "SETNP", "SETNS",
"SETNZ", "SETO", "SETP", "SETPE", "SETPO", "SETS", "SETZ", "SGDT", "SHL", "SHLD", "SHR", "SHRD",
"SIDT", "SLDT", "SMSW", "STC", "STD", "STI", "STOS", "STOSB", "STOSD", "STOSW", "STR", "SUB",
"TEST", "UD2", "VERR", "VERW", "WAIT", "WBINVD", "WRMSR", "XADD", "XCHG", "XLAT", "XLATB", "XOR" },
},
{ Id=3, -- Procedure calls > PB IDE color: #006666 (Blue Stone)
Regex=[[ ([a-zA-Z_]\w*)(?:(?:\s*)\() ]],
Group=1
},
{ Id=4, -- Escaped-String Prefix ("~") > PB IDE color: same as strings
Regex=[[ ~" ]], -- NOTE: In the final doc, this Keyword is converted to become
-- part of the string [see OnStateChange() func below]
},
}
function OnStateChange(oldState, newState, token, kwgroup)
--[[ Dismiss Escape-Sequences
=========================
Currently, I couldn't find a way to preserve escape sequences without causing
stray behaviour in string. So, for the time being they are just dismissed. ]]
if newState==HL_ESC_SEQ then
if oldState==HL_STRING then
-- ESCAPE SEQUENCE FOUND INSIDE A STRING:
if escapedString~=true then
-- String is Literal (no escaping allowed)...
escapeSeq = false
if token=='\\"' then
-- rejecting a \" will cause the \ to become part of the curr. string
-- but the " will be thrown again to the parser, which will mistake it
-- for a new string start. We'll use the `forceStringEnd` var to
-- prevent this...
forceStringEnd = true
return HL_REJECT
else
-- all other escape sequences can be suppresed by assimilating
-- them to the current string...
forceStringEnd=false
return HL_REJECT --HL_STRING
end
else
-- String is Escapable...
escapeSeq = true
forceStringEnd=false
return HL_ESC_SEQ
end
-- HANDLE TWO ESCAPRE SEQUENCES IN A ROW:
elseif oldState==HL_ESC_SEQ then
escapeSeq = true
escapedString = true
forceStringEnd=false
return HL_ESC_SEQ
else
-- ESCAPE SEQUENCE FOUND OUTSIDE A STRING
escapeSeq = false
forceStringEnd=false
escapedString = false
return HL_REJECT
end
--[[ PB Escape Strings (~"...")
==========================
Keyword 4 (~") is converted to string state, so the tilda becomes part of
the actual string. The boolean var `escapedString` tracks this process. ]]
elseif newState==HL_KEYWORD and kwgroup==4 then
-- If ~" occurs inside a string, it's just a string with a tilda as last char...
if oldState==HL_STRING then
-- We use the `forceStringEnd` var trick here, like we did with
-- rejected \" escape sequences above...
forceStringEnd=true
return HL_REJECT -- HL_STRING -- HL_REJECT
-- In all other cases it's an escaped string delimiter (opening)...
else
escapedString = true
escapeSeq = false
forceStringEnd=false
return HL_STRING
end
--[[ NEW STATE IS STRING: ]]
elseif newState==HL_STRING then
--[[ Handle The " After A Rejcted \" or ~" in Literal String
=======================
A rejected \" or ~" led to a spurious " being fed to the parser which can
be mistaken for a new string delimiter ]]
if forceStringEnd==true then
forceStringEnd=false
return HL_STRING_END
--[[ Sanitize String Starts
======================
Because Keyword 4 is converted to a string start, we must tell the parser to
treat the next string delimiter as a string end! ]]
elseif escapedString==true then
escapedString = false
return HL_STRING_END
--[[ Sanitize String After Escape Sequence
=====================================
Ensure that a " immediately following an escape sequence is treated as
a string-end delimiter. ]]
elseif token=='"' and escapeSeq==true then
escapeSeq = false
forceStringEnd=false
escapedString = false
return HL_STRING_END
else
escapeSeq = false
forceStringEnd=false
return HL_STRING
end
--[[ FOR ALL OTHER SYNTAX ITEMS:]]
elseif oldState~=HL_STRING and oldState~=HL_ESC_SEQ then
-- Reset all Trackers' States: This is required to avoid some edge-cases
-- of strings corruption in complex source code...
escapeSeq = false
escapedString = false
forceStringEnd=false
return newState
end
end
--[[==========================================================================
CHANGELOG
==============================================================================
v1.7.1 - 2017/11/18 (PureBASIC v5.61)
- Syntax checked against new PureBASIC v5.61 (no changes detected)
v1.7 - 2017/10/02 (PureBASIC v5.60)
- IMPROVEMENTS: Escape sequences are now corretcly parsed and highlighted.
v1.6 - 2017/09/30 (PureBASIC v5.60)
- IMPROVEMENTS: Added numbers definition (hex, binary, floats an decimals)
- BUG-FIX: String concatenations didn't always parse correctly; now this
was fixed (at the expenses of escaped sequences).
- ABROGATED: parsing of escape sequences is now disabled because it caused
too many problems with strings.
v1.5 - 2017/09/28 (PureBASIC v5.60)
- IMPROVEMENTS:
- Escaped-String Prefix (~) is no longer handled as a keyword (ID=4/kwd)
but its recognized as a valid string delimiter.
- Escape Sequences are further sanitized so that they can occur only
inside strings with the ~" opening delimiter.
- ABROGATED:
- The lang definition no longer uses Keyword 4 (Escaped-String Prefix).
v1.4 - 2017/09/27 (PureBASIC v5.60)
- BUG-FIX: Added sanitize function to avoid false positive Escape Sequences
in structured vars (eg: "\v" in "SomeStructure\var1").
(Thanks to André Simon -- see Issue #23:)
-- https://github.com/andre-simon/highlight/issues/23
v1.3 - 2017/05/20 (PureBASIC v5.60)
- fixed single line comment delimiter
v1.2 - 2017/05/11 (PureBASIC v5.60)
- Added ASM keywords and support for inline ASM (via "!" syntax).
- BUG-FIXES:
- Repaired missing or mispelled PB Keywords (something went wrong in the
keywords list of the v1.1 of this lang definition, some tokens were
lost, other fused into a single token -- sorry about that).
v1.1 - 2017/04/30 (PureBASIC v5.60)
- Keywords list now built by extracting them from the PureBASIC SDK's
"SyntaxHilighting.dll" (from each PureBASIC version). Tokens from each
version are added to the list, and renamed or removed tokens are kept
for the sake of covering all versions of the language from PureBASIC
v5.00 upward. (NOTE: currently, there are no renamed or deprecated
tokens in the keywords list). For more info, see:
-- http://www.purebasic.fr/english/viewtopic.php?&p=506269
-- https://github.com/tajmone/purebasic-archives/tree/master/syntax-highlighting/guidelines
v1.0 - 2016/10/27 (PureBASIC v5.50)
- First release. Keywords list taken and adapted from GuShH's PureBasic
language file for GeSHi:
-- https://github.com/easybook/geshi/blob/master/geshi/purebasic.php
--]]