ReactOS 0.4.15-dev-7788-g1ad9096
asmpp.cpp
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS host tools
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: ASM preprocessor
5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
6 */
7
8// Optimize even on debug builds, because otherwise it's ridiculously slow
9#ifdef _MSC_VER
10#pragma optimize("gst", on)
11#pragma auto_inline(on)
12#else
13#pragma GCC optimize("O3,inline")
14#endif
15
16#include "tokenizer.hpp"
17#include <cstdlib>
18#include <cstdio>
19#include <sstream>
20#include <ctime>
21
22#define PROFILING_ENABLED 0
23
24using namespace std;
25
27
29{
30 Invalid = -1,
38
46
61
65
77
80
93
96};
97
98int fake_printf(const char* format, ...)
99{
100 return 0;
101}
102
103//#define printf fake_printf
104
105// Use a look-ahead for following characters, not included into the match
106//#define FOLLOWED_BY(x) R"((?=)" x R"())"
107#define FOLLOWED_BY(x) x
108
109#define ANY_CHAR R"((?:.|\n))"
110#define WHITESPACE R"((?:[ \t]++))"
111#define NEWLINE R"([\n])"
112#define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)"
113#define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])"
114
115#define INSTRUCTION \
116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \
117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \
118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \
119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \
120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \
121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \
122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \
123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \
124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \
125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \
126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \
127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \
128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \
129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \
130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \
131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \
132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \
133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \
134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \
135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \
136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \
137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \
138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \
139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \
140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \
141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \
142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \
143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \
144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \
145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \
146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \
147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \
148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \
149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \
150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \
151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \
152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \
153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \
154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \
155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \
156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \
157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \
158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \
159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \
160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \
161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \
162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \
163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \
164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \
165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \
166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \
167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \
168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \
169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \
170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \
171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \
172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \
173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \
174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \
175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \
176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \
177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \
178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \
179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \
180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \
181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \
182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \
183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \
184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \
185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \
186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \
187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \
188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \
189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \
190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \
191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \
192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \
193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \
194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \
195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \
196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \
197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \
198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \
199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \
200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \
201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \
202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \
203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \
204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \
205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \
206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \
207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \
208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \
209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \
210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \
211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \
212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \
213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \
214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \
215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \
216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \
217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \
218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \
219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \
220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \
221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \
222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \
223 "XSAVES|XSETBV|XTEST"
224
226{
227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" },
228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" },
229 { TOKEN_TYPE::NewLine, R"((\n))" },
230 { TOKEN_TYPE::Comment, R"((;.*\n))" },
231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
233 { TOKEN_TYPE::String, R"((\".*\"))" },
234
235 { TOKEN_TYPE::BraceOpen, R"((\‍())"},
236 { TOKEN_TYPE::BraceClose, R"((\)))"},
237 { TOKEN_TYPE::MemRefStart, R"((\[))"},
238 { TOKEN_TYPE::MemRefEnd, R"((\]))"},
239 { TOKEN_TYPE::Colon, R"((\:))"},
240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"},
241 { TOKEN_TYPE::StringDef, R"((<.+>))" },
242
243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") },
244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") },
245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") },
246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") },
247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") },
248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") },
249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") },
250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") },
251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") },
252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") },
253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") },
254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")},
255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")},
256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")},
257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")},
258
259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") },
260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") },
261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") },
262
263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") },
264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") },
265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") },
266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") },
267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") },
268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") },
269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") },
270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") },
271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") },
272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") },
273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") },
274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")},
275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")},
276
277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") },
278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") },
279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") },
280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") },
281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") },
282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") },
283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") },
284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
289
290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)},
291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
293
294};
295
296// FIXME: use context?
297unsigned int g_label_number = 0;
298
299bool g_processing_jmp = false;
300
301enum class IDTYPE
302{
303 Memory,
304 Register,
305 Label,
306 Constant,
307 Macro,
309 String,
310 Unknown
311};
312
314{
315 string Name;
317};
318
320
321static
322void
324{
325 g_identifiers.push_back(IDENTIFIER{ tok.str(), type });
326 //fprintf(stderr, "Added id: '%s'\n", tok.str().c_str());
327}
328
329void
331{
333}
334
335bool
337{
338 for (IDENTIFIER& identifier : g_identifiers)
339 {
340 if (identifier.Name == tok.str())
341 {
342 return identifier.Type == IDTYPE::Memory;
343 }
344 }
345
346 return true;
347}
348
349bool
350iequals(const string &a, const string &b)
351{
352 size_t sz = a.size();
353 if (b.size() != sz)
354 return false;
355 for (unsigned int i = 0; i < sz; ++i)
356 if (tolower(a[i]) != tolower(b[i]))
357 return false;
358 return true;
359}
360
361Token
363{
364 if (tok.type() != type)
365 {
366 throw "Not white space after identifier!\n";
367 }
368
369 return tok;
370}
371
373{
374 int type = tok.type();
375 if (type != TOKEN_TYPE::WhiteSpace)
376 {
377 throw "Not white space after identifier!\n";
378 }
379
380 return tok;
381}
382
384{
385 int type = tok.type();
386 if ((type != TOKEN_TYPE::WhiteSpace) &&
387 (type != TOKEN_TYPE::NewLine))
388 {
389 throw "Not white space after identifier!\n";
390 }
391
392 return tok;
393}
394
396{
397 for (string &s : list)
398 {
399 if (s == str)
400 {
401 return true;
402 }
403 }
404
405 return false;
406}
407
408size_t
409translate_token(TokenList& tokens, size_t index, const vector<string> &macro_params)
410{
411 Token tok = tokens[index];
412 switch (tok.type())
413 {
414 case TOKEN_TYPE::Comment:
415 printf("//%s", tok.str().c_str() + 1);
416 break;
417
418 case TOKEN_TYPE::DecNumber:
419 {
420 unsigned long long num = stoull(tok.str(), nullptr, 10);
421 printf("%llu", num);
422 break;
423 }
424
425 case TOKEN_TYPE::HexNumber:
426 {
427 string number = tok.str();
428 printf("0x%s", number.substr(0, number.size() - 1).c_str());
429 break;
430 }
431
432 case TOKEN_TYPE::Identifier:
433 if (is_string_in_list(macro_params, tok.str()))
434 {
435 printf("\\");
436 }
437 printf("%s", tok.str().c_str());
438 break;
439
440 // We migt want to improve these
441 case TOKEN_TYPE::BYTE_PTR:
442 case TOKEN_TYPE::WORD_PTR:
443 case TOKEN_TYPE::DWORD_PTR:
444 case TOKEN_TYPE::QWORD_PTR:
445 case TOKEN_TYPE::XMMWORD_PTR:
446
447 // Check these. valid only in instructions?
448 case TOKEN_TYPE::Reg8:
449 case TOKEN_TYPE::Reg16:
450 case TOKEN_TYPE::Reg32:
451 case TOKEN_TYPE::Reg64:
452 case TOKEN_TYPE::RegXmm:
453 case TOKEN_TYPE::Instruction:
454
455 case TOKEN_TYPE::WhiteSpace:
456 case TOKEN_TYPE::NewLine:
457 case TOKEN_TYPE::Operator:
458 printf("%s", tok.str().c_str());
459 break;
460
461 default:
462 printf("%s", tok.str().c_str());
463 break;
464 }
465
466 return index + 1;
467}
468
469size_t complete_line(TokenList &tokens, size_t index, const vector<string> &macro_params)
470{
471 while (index < tokens.size())
472 {
473 Token tok = tokens[index];
474 index = translate_token(tokens, index, macro_params);
475 if ((tok.type() == TOKEN_TYPE::NewLine) ||
476 (tok.type() == TOKEN_TYPE::Comment))
477 {
478 break;
479 }
480 }
481
482 return index;
483}
484
485size_t
486translate_expression(TokenList &tokens, size_t index, const vector<string> &macro_params)
487{
488 while (index < tokens.size())
489 {
490 Token tok = tokens[index];
491 switch (tok.type())
492 {
493 case TOKEN_TYPE::NewLine:
494 case TOKEN_TYPE::Comment:
495 return index;
496
497 case TOKEN_TYPE::KW_MASK:
498 printf("MASK_");
499 index += 2;
500 break;
501
502 case TOKEN_TYPE::Instruction:
503 if (iequals(tok.str(), "and"))
504 {
505 printf("&");
506 index += 1;
507 }
508 else if (iequals(tok.str(), "or"))
509 {
510 printf("|");
511 index += 1;
512 }
513 else if (iequals(tok.str(), "shl"))
514 {
515 printf("<<");
516 index += 1;
517 }
518 else if (iequals(tok.str(), "not"))
519 {
520 printf("!");
521 index += 1;
522 }
523 else
524 {
525 throw "Invalid expression";
526 }
527 break;
528
529 case TOKEN_TYPE::Operator:
530 if (tok.str() == ",")
531 {
532 return index;
533 }
534 case TOKEN_TYPE::WhiteSpace:
535 case TOKEN_TYPE::BraceOpen:
536 case TOKEN_TYPE::BraceClose:
537 case TOKEN_TYPE::DecNumber:
538 case TOKEN_TYPE::HexNumber:
539 case TOKEN_TYPE::Identifier:
540 index = translate_token(tokens, index, macro_params);
541 break;
542
543 default:
544 index = translate_token(tokens, index, macro_params);
545 }
546 }
547
548 return index;
549}
550
551size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params)
552{
553 unsigned int offset = 0;
554
555 Token tok = tokens[index];
556
557 if ((tok.type() == TOKEN_TYPE::DecNumber) ||
558 (tok.type() == TOKEN_TYPE::HexNumber))
559 {
560 offset = stoi(tok.str(), nullptr, 0);
561 index += 2;
562 }
563
564 index = translate_token(tokens, index, macro_params);
565
566 while (index < tokens.size())
567 {
568 Token tok = tokens[index];
569 index = translate_token(tokens, index, macro_params);
570 if (tok.type() == TOKEN_TYPE::MemRefEnd)
571 {
572 if (offset != 0)
573 {
574 printf(" + %u", offset);
575 }
576 return index;
577 }
578 }
579
580 throw "Failed to translate memory ref";
581 return index;
582}
583
584size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params)
585{
586 switch (tokens[index].type())
587 {
588 case TOKEN_TYPE::BYTE_PTR:
589 case TOKEN_TYPE::WORD_PTR:
590 case TOKEN_TYPE::DWORD_PTR:
591 case TOKEN_TYPE::QWORD_PTR:
592 case TOKEN_TYPE::XMMWORD_PTR:
593 index = translate_token(tokens, index, macro_params);
594
595 // Optional whitespace
596 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
597 {
598 index = translate_token(tokens, index, macro_params);
599 }
600 }
601
602 while (index < tokens.size())
603 {
604 Token tok = tokens[index];
605 switch (tok.type())
606 {
607 case TOKEN_TYPE::MemRefStart:
608 return translate_mem_ref(tokens, index, macro_params);
609
610 case TOKEN_TYPE::NewLine:
611 case TOKEN_TYPE::Comment:
612 return index;
613
614 case TOKEN_TYPE::Operator:
615 if (tok.str() == ",")
616 return index;
617 return translate_token(tokens, index, macro_params);
618
619 case TOKEN_TYPE::Identifier:
620 index = translate_token(tokens, index, macro_params);
621 if (is_mem_id(tok) &&
622 !is_string_in_list(macro_params, tok.str()) &&
624 {
625 printf("[rip]");
626 }
627 break;
628
629 default:
630 index = translate_expression(tokens, index, macro_params);
631 }
632 }
633
634 return index;
635}
636
637static
638bool
640{
641 const char* inst_list[] = {
642 "jmp", "call", "ja", "jae", "jb", "jbe", "jc", "jcxz", "je", "jecxz", "jg", "jge",
643 "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", "jnle",
644 "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "jrcxz", "js", "jz", "loop", "loope",
645 "loopne", "loopnz", "loopz"
646 };
647
648 for (const char* inst : inst_list)
649 {
650 if (iequals(tok.str(), inst))
651 {
652 return true;
653 }
654 }
655
656 return false;
657}
658
659size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params)
660{
661 // Check for jump/call instructions
662 if (is_jmp_or_call(tokens[index]))
663 {
664 g_processing_jmp = true;
665 }
666
667 // Translate the instruction itself
668 index = translate_token(tokens, index, macro_params);
669
670 // Handle instruction parameters
671 while (index < tokens.size())
672 {
673 // Optional whitespace
674 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
675 {
676 index = translate_token(tokens, index, macro_params);
677 }
678
679 // Check for parameters
680 Token tok = tokens[index];
681 switch (tok.type())
682 {
683 case TOKEN_TYPE::Comment:
684 case TOKEN_TYPE::NewLine:
685 g_processing_jmp = false;
686 return index;
687
688 case TOKEN_TYPE::WhiteSpace:
689 case TOKEN_TYPE::Operator:
690 index = translate_token(tokens, index, macro_params);
691 break;
692
693 default:
694 index = translate_instruction_param(tokens, index, macro_params);
695 break;
696 }
697 }
698
699 g_processing_jmp = false;
700 return index;
701}
702
703size_t translate_item(TokenList& tokens, size_t index, const vector<string> &macro_params)
704{
705 switch (tokens[index].type())
706 {
707 case TOKEN_TYPE::DecNumber:
708 case TOKEN_TYPE::HexNumber:
709 case TOKEN_TYPE::String:
710 case TOKEN_TYPE::WhiteSpace:
711 return translate_token(tokens, index, macro_params);
712 }
713
714 throw "Failed to translate item";
715 return -1;
716}
717
718size_t translate_list(TokenList& tokens, size_t index, const vector<string> &macro_params)
719{
720 while (index < tokens.size())
721 {
722 // The item itself
723 index = translate_item(tokens, index, macro_params);
724
725 // Optional white space
726 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
727 {
728 index = translate_token(tokens, index, macro_params);
729 }
730
731 // End of list?
732 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
733 (tokens[index].type() == TOKEN_TYPE::NewLine))
734 {
735 return index;
736 }
737
738 // We expect a comma here
739 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
740 (tokens[index].str() != ","))
741 {
742 throw "Unexpected end of list";
743 }
744
745 index = translate_token(tokens, index, macro_params);
746 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
747 {
748 index = translate_token(tokens, index, macro_params);
749 }
750 }
751
752 throw "Failed to translate list";
753 return -1;
754}
755
756size_t
757translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params)
758{
759 Token tok = tokens[index];
760 Token tok1 = get_ws(tokens[index + 1]);
761 string directive, need, have ="";
762
763 switch (tok.type())
764 {
765 case TOKEN_TYPE::KW_DB:
766 directive = ".byte";
767 break;
768
769 case TOKEN_TYPE::KW_DW:
770 directive = ".short";
771 break;
772
773 case TOKEN_TYPE::KW_DD:
774 directive = ".long";
775 break;
776
777 case TOKEN_TYPE::KW_DQ:
778 directive = ".quad";
779 break;
780 }
781
782 index += 2;
783
784 while (index < tokens.size())
785 {
786 // Check if we need '.ascii' for ASCII strings
787 if (tokens[index].str()[0] == '\"')
788 {
789 need = ".ascii";
790 }
791 else
792 {
793 need = directive;
794 }
795
796 // Output the directive we need (or a comma)
797 if (have == "")
798 {
799 printf("%s ", need.c_str());
800 }
801 else if (have != need)
802 {
803 printf("\n%s ", need.c_str());
804 }
805 else
806 {
807 printf(", ");
808 }
809
810 have = need;
811
812 // The item itself
813 index = translate_item(tokens, index, macro_params);
814
815 // Optional white space
816 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
817 {
818 index = translate_token(tokens, index, macro_params);
819 }
820
821 // End of list?
822 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
823 (tokens[index].type() == TOKEN_TYPE::NewLine))
824 {
825 return index;
826 }
827
828 // We expect a comma here
829 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
830 (tokens[index].str() != ","))
831 {
832 throw "Unexpected end of list";
833 }
834
835 // Skip comma and optional white-space
836 index++;
837 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
838 {
839 index++;
840 }
841 }
842
843 throw "Failed to translate list";
844 return -1;
845}
846
847size_t
848translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params)
849{
850 // The next token should be white space
851 Token tok1 = get_ws(tokens[index + 1]);
852
853 printf("%s%s", translated.c_str(), tok1.str().c_str());
854 return translate_expression(tokens, index + 2, macro_params);
855}
856
857size_t
858translate_record(TokenList &tokens, size_t index, const vector<string> &macro_params)
859{
860 unsigned int bits, bitpos = 0;
861 unsigned long long oldmask = 0, mask = 0;
862
863 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier);
864 index += 4;
865 while (index < tokens.size())
866 {
867 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier);
868
869 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
870 {
871 index++;
872 }
873
874 if (tokens[index++].str() != ":")
875 {
876 throw "Unexpected token";
877 }
878
879 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
880 {
881 index++;
882 }
883
884 Token tok_bits = tokens[index++];
885 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) &&
886 (tok_bits.type() != TOKEN_TYPE::HexNumber))
887 {
888 throw "Unexpected token";
889 }
890
891 bits = stoi(tok_bits.str(), nullptr, 0);
892
893 printf("%s = %u\n", tok_member.str().c_str(), bitpos);
894
895 oldmask = (1ULL << bitpos) - 1;
896 bitpos += bits;
897 mask = (1ULL << bitpos) - 1 - oldmask;
898 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask);
899
900 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
901 {
902 index++;
903 }
904
905 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
906 (tokens[index].type() == TOKEN_TYPE::Comment))
907 {
908 break;
909 }
910
911 if (tokens[index].str() != ",")
912 {
913 throw "unexpected token";
914 }
915
916 index++;
917 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
918 {
919 index++;
920 }
921
922 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
923 (tokens[index].type() == TOKEN_TYPE::Comment))
924 {
925 index++;
926 }
927
928 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
929 {
930 index++;
931 }
932 }
933
934 return index;
935}
936
937size_t
939{
940 Token tok = tokens[index];
941 Token tok1 = tokens[index + 1];
942
943 if (tok1.type() == TOKEN_TYPE::Colon)
944 {
945 if (tok.str() == "@@")
946 {
948 printf("%u:", g_label_number);
949 }
950 else
951 {
952 printf("%s:", tok.str().c_str());
953 }
955 return index + 2;
956 }
957
958 Token tok2 = tokens[index + 2];
959
960 switch (tok2.type())
961 {
962 case TOKEN_TYPE::KW_MACRO:
963 throw "Cannot have a nested macro!";
964
965 case TOKEN_TYPE::KW_DB:
966 case TOKEN_TYPE::KW_DW:
967 case TOKEN_TYPE::KW_DD:
968 case TOKEN_TYPE::KW_DQ:
969 printf("%s:%s", tok.str().c_str(), tok1.str().c_str());
970 add_mem_id(tok);
971 return translate_data_def(tokens, index + 2, macro_params);
972
973 case TOKEN_TYPE::KW_EQU:
974 //printf("%s%s", tok.str().c_str(), tok1.str().c_str());
975 printf("#define %s ", tok.str().c_str());
977 return translate_expression(tokens, index + 3, macro_params);
978
979 case TOKEN_TYPE::KW_TEXTEQU:
980 {
981 Token tok3 = get_ws(tokens[index + 3]);
982 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef);
983
984 string textdef = tok4.str();
985 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str());
987 return index + 5;
988 }
989
990 case TOKEN_TYPE::KW_PROC:
991 {
992 printf(".func %s\n", tok.str().c_str());
993 printf("%s:", tok.str().c_str());
994 index += 3;
995
996 if ((tokens[index].type() == TOKEN_TYPE::WhiteSpace) &&
997 (tokens[index + 1].type() == TOKEN_TYPE::KW_FRAME))
998 {
999#ifdef TARGET_amd64
1000 printf("\n.seh_proc %s\n", tok.str().c_str());
1001#else
1002 printf("\n.cfi_startproc\n");
1003#endif
1004 index += 2;
1005 }
1007 break;
1008 }
1009
1010 case TOKEN_TYPE::KW_ENDP:
1011 {
1012 printf(".seh_endproc\n.endfunc");
1013 index += 3;
1014 break;
1015 }
1016
1017 case TOKEN_TYPE::KW_RECORD:
1018 index = translate_record(tokens, index, macro_params);
1019 break;
1020
1021 default:
1022 // We don't know what it is, assume it's a macro and treat it like an instruction
1023 index = translate_instruction(tokens, index, macro_params);
1024 break;
1025 }
1026
1027 return index;
1028}
1029
1030size_t
1031translate_construct(TokenList& tokens, size_t index, const vector<string> &macro_params)
1032{
1033 Token tok = tokens[index];
1034
1035 switch (tok.type())
1036 {
1037 case TOKEN_TYPE::WhiteSpace:
1038 case TOKEN_TYPE::NewLine:
1039 case TOKEN_TYPE::Comment:
1040 return translate_token(tokens, index, macro_params);
1041
1042 case TOKEN_TYPE::Identifier:
1043 return translate_identifier_construct(tokens, index, macro_params);
1044
1045 case TOKEN_TYPE::KW_ALIGN:
1046 index = translate_construct_one_param(".align", tokens, index, macro_params);
1047 break;
1048
1049 case TOKEN_TYPE::KW_allocstack:
1050 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params);
1051 break;
1052
1053 case TOKEN_TYPE::KW_code:
1054#ifdef TARGET_amd64
1055 printf(".code64");
1056#else
1057 printf(".code");
1058#endif
1059 printf(" .intel_syntax noprefix");
1060 index++;
1061 break;
1062
1063 case TOKEN_TYPE::KW_const:
1064 printf(".section .rdata");
1065 index++;
1066 break;
1067
1068 case TOKEN_TYPE::KW_DB:
1069 case TOKEN_TYPE::KW_DW:
1070 case TOKEN_TYPE::KW_DD:
1071 case TOKEN_TYPE::KW_DQ:
1072 return translate_data_def(tokens, index, macro_params);
1073
1074 case TOKEN_TYPE::KW_END:
1075 printf("// END\n");
1076 return tokens.size();
1077
1078 case TOKEN_TYPE::KW_endprolog:
1079 printf(".seh_endprologue");
1080 index++;
1081 break;
1082
1083 case TOKEN_TYPE::KW_EXTERN:
1084 {
1085 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1086 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier);
1087 add_mem_id(tok2);
1088 printf("//");
1089 return complete_line(tokens, index, macro_params);
1090 }
1091
1092 case TOKEN_TYPE::KW_if:
1093 case TOKEN_TYPE::KW_ifdef:
1094 case TOKEN_TYPE::KW_ifndef:
1095 case TOKEN_TYPE::KW_else:
1096 case TOKEN_TYPE::KW_endif:
1097 // TODO: handle parameter differences between "if" and ".if" etc.
1098 printf(".");
1099 return complete_line(tokens, index, macro_params);
1100
1101 case TOKEN_TYPE::KW_include:
1102 {
1103 // The next token should be white space
1104 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1105 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename);
1106 printf("#include \"%s.h\"", tok2.str().c_str());
1107 index += 3;
1108 break;
1109 }
1110
1111 case TOKEN_TYPE::KW_PUBLIC:
1112 index = translate_construct_one_param(".global", tokens, index, macro_params);
1113 break;
1114
1115 case TOKEN_TYPE::KW_savereg:
1116 printf(".seh_savereg");
1117 return complete_line(tokens, index + 1, macro_params);
1118
1119 case TOKEN_TYPE::KW_savexmm128:
1120 printf(".seh_savexmm");
1121 return complete_line(tokens, index + 1, macro_params);
1122
1123 case TOKEN_TYPE::Instruction:
1124 index = translate_instruction(tokens, index, macro_params);
1125 break;
1126
1127 case TOKEN_TYPE::KW_ERRDEF:
1128 printf("//");
1129 return complete_line(tokens, index, macro_params);
1130
1131 default:
1132 throw "failed to translate construct";
1133 }
1134
1135 // Skip optional white-space
1136 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1137 {
1138 index++;
1139 }
1140
1141 // Line should end here!
1142 Token end = tokens[index];
1143 if ((end.type() != TOKEN_TYPE::Comment) &&
1144 (end.type() != TOKEN_TYPE::NewLine))
1145 {
1146 throw "unexpected tokens";
1147 }
1148
1149 return index;
1150}
1151
1152size_t
1154{
1155 vector<string> macro_params;
1156
1157 printf(".macro %s", tokens[index].str().c_str());
1158
1159 // Parse marameters
1160 index += 3;
1161 while (index < tokens.size())
1162 {
1163 Token tok = tokens[index];
1164 switch (tok.type())
1165 {
1166 case TOKEN_TYPE::NewLine:
1167 case TOKEN_TYPE::Comment:
1168 index = translate_token(tokens, index, macro_params);
1169 break;
1170
1171 case TOKEN_TYPE::Identifier:
1172 macro_params.push_back(tok.str());
1173 printf("%s", tok.str().c_str());
1174 index++;
1175 continue;
1176
1177 case TOKEN_TYPE::WhiteSpace:
1178 case TOKEN_TYPE::Operator:
1179 index = translate_token(tokens, index, macro_params);
1180 continue;
1181 }
1182
1183 break;
1184 }
1185
1186 // Parse content
1187 while (index < tokens.size())
1188 {
1189 Token tok = tokens[index];
1190 switch (tok.type())
1191 {
1192 case TOKEN_TYPE::KW_ENDM:
1193 printf(".endm");
1194 return index + 1;
1195
1196 default:
1197 index = translate_construct(tokens, index, macro_params);
1198 }
1199 }
1200
1201 throw "Failed to translate macro";
1202 return -1;
1203}
1204
1205void
1207{
1208 size_t index = 0;
1209 size_t size = tokens.size();
1210 vector<string> empty_macro_params;
1211
1212 while (index < size)
1213 {
1214 // Macros are special
1215 if ((tokens[index].type() == TOKEN_TYPE::Identifier) &&
1216 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) &&
1217 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO))
1218 {
1219 index = translate_macro(tokens, index);
1220 }
1221 else
1222 {
1223 index = translate_construct(tokens, index, empty_macro_params);
1224 }
1225 }
1226}
1227
1228int main(int argc, char* argv[])
1229{
1230 if (argc < 2)
1231 {
1232 fprintf(stderr, "Invalid parameter!\n");
1233 return -1;
1234 }
1235
1236#if PROFILING_ENABLED
1237 time_t start_time = time(NULL);
1238#endif
1239
1240 try
1241 {
1242 // Open and read the input file
1243 string filename(argv[1]);
1246 buffer << file.rdbuf();
1247 string text = buffer.str();
1248
1249 // Create the tokenizer
1250 Tokenizer tokenizer(g_TokenList);
1251
1252 // Get a token list
1253 TokenList toklist(tokenizer, text);
1254
1255 // Now translate the tokens
1256 translate(toklist);
1257 }
1258 catch (const char* message)
1259 {
1260 fprintf(stderr, "Exception caught: '%s'\n", message);
1261 return -2;
1262 }
1263
1264#if PROFILING_ENABLED
1265 time_t total_time = time(NULL) + 1 - start_time;
1266 fprintf(stderr, "total_time = %llu\n", total_time);
1267 fprintf(stderr, "search_time = %llu\n", search_time);
1268 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time);
1269#endif
1270
1271 return 0;
1272}
static int argc
Definition: ServiceArgs.c:12
int tolower(int c)
Definition: utclib.c:902
void add_mem_id(Token &tok)
Definition: asmpp.cpp:330
size_t translate_list(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:718
size_t translate_data_def(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:757
size_t translate_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:1031
static void add_identifier(Token &tok, IDTYPE type)
Definition: asmpp.cpp:323
size_t translate_item(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:703
vector< TOKEN_DEF > g_TokenList
Definition: asmpp.cpp:225
void translate(TokenList &tokens)
Definition: asmpp.cpp:1206
size_t translate_token(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:409
Token get_ws_or_nl(Token &&tok)
Definition: asmpp.cpp:383
#define INSTRUCTION
Definition: asmpp.cpp:115
static bool is_jmp_or_call(const Token &tok)
Definition: asmpp.cpp:639
bool is_mem_id(Token &tok)
Definition: asmpp.cpp:336
int fake_printf(const char *format,...)
Definition: asmpp.cpp:98
size_t complete_line(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:469
unsigned int g_label_number
Definition: asmpp.cpp:297
bool is_string_in_list(vector< string > list, string str)
Definition: asmpp.cpp:395
time_t search_time
Definition: asmpp.cpp:26
size_t translate_expression(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:486
#define FOLLOWED_BY(x)
Definition: asmpp.cpp:107
Token get_expected_token(Token &&tok, TOKEN_TYPE type)
Definition: asmpp.cpp:362
vector< IDENTIFIER > g_identifiers
Definition: asmpp.cpp:319
bool iequals(const string &a, const string &b)
Definition: asmpp.cpp:350
TOKEN_TYPE
Definition: asmpp.cpp:29
@ KW_allocstack
Definition: asmpp.cpp:62
@ Reg32
Definition: asmpp.cpp:85
@ KW_FRAME
Definition: asmpp.cpp:74
@ KW_savereg
Definition: asmpp.cpp:63
@ KW_EXTERN
Definition: asmpp.cpp:52
@ KW_PROC
Definition: asmpp.cpp:73
@ StringDef
Definition: asmpp.cpp:45
@ MemRefEnd
Definition: asmpp.cpp:42
@ KW_ENDM
Definition: asmpp.cpp:54
@ BraceClose
Definition: asmpp.cpp:40
@ Comment
Definition: asmpp.cpp:34
@ KW_else
Definition: asmpp.cpp:59
@ Instruction
Definition: asmpp.cpp:82
@ KW_END
Definition: asmpp.cpp:55
@ KW_code
Definition: asmpp.cpp:49
@ KW_endprolog
Definition: asmpp.cpp:50
@ DecNumber
Definition: asmpp.cpp:35
@ Identifier
Definition: asmpp.cpp:95
@ KW_DW
Definition: asmpp.cpp:67
@ BraceOpen
Definition: asmpp.cpp:39
@ KW_RECORD
Definition: asmpp.cpp:76
@ Colon
Definition: asmpp.cpp:43
@ KW_ENDP
Definition: asmpp.cpp:75
@ Filename
Definition: asmpp.cpp:81
@ KW_EQU
Definition: asmpp.cpp:70
@ BYTE_PTR
Definition: asmpp.cpp:88
@ RegXmm
Definition: asmpp.cpp:87
@ WhiteSpace
Definition: asmpp.cpp:32
@ KW_DB
Definition: asmpp.cpp:66
@ MemRefStart
Definition: asmpp.cpp:41
@ QWORD_PTR
Definition: asmpp.cpp:91
@ KW_include
Definition: asmpp.cpp:47
@ KW_MACRO
Definition: asmpp.cpp:72
@ HexNumber
Definition: asmpp.cpp:36
@ KW_DD
Definition: asmpp.cpp:68
@ LabelName
Definition: asmpp.cpp:94
@ KW_const
Definition: asmpp.cpp:48
@ KW_TEXTEQU
Definition: asmpp.cpp:71
@ Reg8
Definition: asmpp.cpp:83
@ NewLine
Definition: asmpp.cpp:33
@ KW_ifndef
Definition: asmpp.cpp:58
@ KW_ifdef
Definition: asmpp.cpp:57
@ XMMWORD_PTR
Definition: asmpp.cpp:92
@ KW_PUBLIC
Definition: asmpp.cpp:53
@ Operator
Definition: asmpp.cpp:44
@ KW_DQ
Definition: asmpp.cpp:69
@ Eof
Definition: asmpp.cpp:31
@ KW_ALIGN
Definition: asmpp.cpp:51
@ KW_savexmm128
Definition: asmpp.cpp:64
@ DWORD_PTR
Definition: asmpp.cpp:90
@ String
Definition: asmpp.cpp:37
@ KW_endif
Definition: asmpp.cpp:60
@ KW_MASK
Definition: asmpp.cpp:78
@ KW_if
Definition: asmpp.cpp:56
@ WORD_PTR
Definition: asmpp.cpp:89
@ Invalid
Definition: asmpp.cpp:30
@ Reg16
Definition: asmpp.cpp:84
@ Reg64
Definition: asmpp.cpp:86
@ KW_ERRDEF
Definition: asmpp.cpp:79
Token get_ws(Token &&tok)
Definition: asmpp.cpp:372
IDTYPE
Definition: asmpp.cpp:302
size_t translate_identifier_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:938
size_t translate_construct_one_param(string translated, TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:848
size_t translate_record(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:858
size_t translate_macro(TokenList &tokens, size_t index)
Definition: asmpp.cpp:1153
size_t translate_instruction(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:659
bool g_processing_jmp
Definition: asmpp.cpp:299
size_t translate_mem_ref(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:551
size_t translate_instruction_param(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:584
#define SEPARATOR
Definition: asmpp.cpp:113
#define index(s, c)
Definition: various.h:29
size_t size() const
Definition: tokenizer.hpp:239
int type() const
Definition: tokenizer.hpp:55
std::string str() const
Definition: tokenizer.hpp:50
_Self substr(size_type __pos=0, size_type __n=npos) const
Definition: _string.h:1022
const _CharT * c_str() const
Definition: _string.h:949
size_type size() const
Definition: _string.h:400
Definition: list.h:37
#define NULL
Definition: types.h:112
const WCHAR * text
Definition: package.c:1799
int main()
Definition: test.c:6
__kernel_time_t time_t
Definition: linux.h:252
#define printf
Definition: freeldr.h:93
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLdouble s
Definition: gl.h:2039
GLuint GLuint end
Definition: gl.h:1545
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
Definition: gl.h:1546
GLsizeiptr size
Definition: glext.h:5919
GLuint buffer
Definition: glext.h:5915
GLuint index
Definition: glext.h:6031
GLenum GLint GLuint mask
Definition: glext.h:6028
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:10929
GLuint GLuint num
Definition: glext.h:9618
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLintptr offset
Definition: glext.h:5920
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define stderr
Definition: stdio.h:100
_Check_return_opt_ _CRTIMP int __cdecl fprintf(_Inout_ FILE *_File, _In_z_ _Printf_format_string_ const char *_Format,...)
#define bits
Definition: infblock.c:15
const char * filename
Definition: ioapi.h:137
__u16 time
Definition: mkdosfs.c:8
static unsigned int number
Definition: dsound.c:1479
static UINT PSTR DWORD UINT * need
Definition: parser.c:36
#define argv
Definition: mplay32.c:18
Definition: features.h:417
const WCHAR * str
IDTYPE Type
Definition: asmpp.cpp:316
string Name
Definition: asmpp.cpp:315
Definition: fci.c:127
Definition: tftpd.h:60
void push_back(const _Tp &__x=_STLP_DEFAULT_CONSTRUCTED(_Tp))
Definition: _vector.h:379