ReactOS 0.4.17-dev-243-g1369312
asmpp.cpp
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS host tools
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: ASM preprocessor
5 * COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
6 */
7
8// Optimize even on debug builds, because otherwise it's ridiculously slow
9#ifdef _MSC_VER
10#pragma optimize("gst", on)
11#pragma auto_inline(on)
12#else
13#pragma GCC optimize("O3,inline")
14#endif
15
16#include "tokenizer.hpp"
17#include <cstdlib>
18#include <cstdio>
19#include <sstream>
20#include <ctime>
21
22#define PROFILING_ENABLED 0
23
24using namespace std;
25
27
29{
30 Invalid = -1,
38
46
61
65
77
80
93
96};
97
98int fake_printf(const char* format, ...)
99{
100 return 0;
101}
102
103//#define printf fake_printf
104
105// Use a look-ahead for following characters, not included into the match
106//#define FOLLOWED_BY(x) R"((?=)" x R"())"
107#define FOLLOWED_BY(x) x
108
109#define ANY_CHAR R"((?:.|\n))"
110#define WHITESPACE R"((?:[ \t]++))"
111#define NEWLINE R"([\n])"
112#define WS_OR_NL R"((?:)" WHITESPACE "|" NEWLINE R"()+)"
113#define SEPARATOR R"([\s,\=\+\-\*\/\:\~\[\]])"
114
115#define INSTRUCTION \
116 "AAA|AAD|AAM|AAS|ADC|ADCX|ADD|ADDPD|ADDPS|ADDSD|ADDSS|ADDSUBPD|ADDSUBPS|" \
117 "ADOX|AESDEC|AESDECLAST|AESENC|AESENCLAST|AESIMC|AESKEYGENASSIST|AND|ANDN|" \
118 "ANDNPD|ANDNPS|ANDPD|ANDPS|ARPL|BEXTR|BLENDPD|BLENDPS|BLENDVPD|BLENDVPS|" \
119 "BLSI|BLSMSK|BLSR|BNDCL|BNDCN|BNDCU|BNDLDX|BNDMK|BNDMOV|BNDSTX|BOUND|BSF|" \
120 "BSR|BSWAP|BT|BTC|BTR|BTS|BZHI|CALL|CBW|CDQ|CDQE|CLAC|CLC|CLD|CLDEMOTE|" \
121 "CLFLUSH|CLFLUSHOPT|CLI|CLTS|CLWB|CMC|CMOVcc|CMP|CMPPD|CMPPS|CMPS|CMPSB|" \
122 "CMPSD|CMPSQ|CMPSS|CMPSW|CMPXCHG|CMPXCHG16B|CMPXCHG8B|COMISD|COMISS|CPUID|" \
123 "CQO|CRC32|CVTDQ2PD|CVTDQ2PS|CVTPD2DQ|CVTPD2PI|CVTPD2PS|CVTPI2PD|CVTPI2PS|" \
124 "CVTPS2DQ|CVTPS2PD|CVTPS2PI|CVTSD2SI|CVTSD2SS|CVTSI2SD|CVTSI2SS|CVTSS2SD|" \
125 "CVTSS2SI|CVTTPD2DQ|CVTTPD2PI|CVTTPS2DQ|CVTTPS2PI|CVTTSD2SI|CVTTSS2SI|CWD|" \
126 "CWDE|DAA|DAS|DEC|DIV|DIVPD|DIVPS|DIVSD|DIVSS|DPPD|DPPS|EMMS|ENTER|" \
127 "EXTRACTPS|F2XM1|FABS|FADD|FADDP|FBLD|FBSTP|FCHS|FCLEX|FCMOVcc|FCOM|FCOMI|" \
128 "FCOMIP|FCOMP|FCOMPP|FCOS|FDECSTP|FDIV|FDIVP|FDIVR|FDIVRP|FFREE|FIADD|" \
129 "FICOM|FICOMP|FIDIV|FIDIVR|FILD|FIMUL|FINCSTP|FINIT|FIST|FISTP|FISTTP|" \
130 "FISUB|FISUBR|FLD|FLD1|FLDCW|FLDENV|FLDL2E|FLDL2T|FLDLG2|FLDLN2|FLDPI|" \
131 "FLDZ|FMUL|FMULP|FNCLEX|FNINIT|FNOP|FNSAVE|FNSTCW|FNSTENV|FNSTSW|FPATAN|" \
132 "FPREM|FPREM1|FPTAN|FRNDINT|FRSTOR|FSAVE|FSCALE|FSIN|FSINCOS|FSQRT|FST|" \
133 "FSTCW|FSTENV|FSTP|FSTSW|FSUB|FSUBP|FSUBR|FSUBRP|FTST|FUCOM|FUCOMI|" \
134 "FUCOMIP|FUCOMP|FUCOMPP|FWAIT|FXAM|FXCH|FXRSTOR|FXSAVE|FXTRACT|FYL2X|" \
135 "FYL2XP1|GF2P8AFFINEINVQB|GF2P8AFFINEQB|GF2P8MULB|HADDPD|HADDPS|HLT|" \
136 "HSUBPD|HSUBPS|IDIV|IMUL|IN|INC|INS|INSB|INSD|INSERTPS|INSW|INT|INT1|INT3|" \
137 "INTO|INVD|INVLPG|INVPCID|IRET|IRETD|JMP|Jcc|KADDB|KADDD|KADDQ|KADDW|" \
138 "KANDB|KANDD|KANDNB|KANDND|KANDNQ|KANDNW|KANDQ|KANDW|KMOVB|KMOVD|KMOVQ|" \
139 "KMOVW|KNOTB|KNOTD|KNOTQ|KNOTW|KORB|KORD|KORQ|KORTESTB|KORTESTD|KORTESTQ|" \
140 "KORTESTW|KORW|KSHIFTLB|KSHIFTLD|KSHIFTLQ|KSHIFTLW|KSHIFTRB|KSHIFTRD|" \
141 "KSHIFTRQ|KSHIFTRW|KTESTB|KTESTD|KTESTQ|KTESTW|KUNPCKBW|KUNPCKDQ|KUNPCKWD|" \
142 "KXNORB|KXNORD|KXNORQ|KXNORW|KXORB|KXORD|KXORQ|KXORW|LAHF|LAR|LDDQU|" \
143 "LDMXCSR|LDS|LEA|LEAVE|LES|LFENCE|LFS|LGDT|LGS|LIDT|LLDT|LMSW|LOCK|LODS|" \
144 "LODSB|LODSD|LODSQ|LODSW|LOOP|LOOPcc|LSL|LSS|LTR|LZCNT|MASKMOVDQU|MASKMOVQ|" \
145 "MAXPD|MAXPS|MAXSD|MAXSS|MFENCE|MINPD|MINPS|MINSD|MINSS|MONITOR|MOV|MOVAPD|" \
146 "MOVAPS|MOVBE|MOVD|MOVDDUP|MOVDIR64B|MOVDIRI|MOVDQ2Q|MOVDQA|MOVDQU|MOVHLPS|" \
147 "MOVHPD|MOVHPS|MOVLHPS|MOVLPD|MOVLPS|MOVMSKPD|MOVMSKPS|MOVNTDQ|MOVNTDQA|" \
148 "MOVNTI|MOVNTPD|MOVNTPS|MOVNTQ|MOVQ|MOVQ2DQ|MOVS|MOVSB|MOVSD|MOVSHDUP|" \
149 "MOVSLDUP|MOVSQ|MOVSS|MOVSW|MOVSX|MOVSXD|MOVUPD|MOVUPS|MOVZX|MPSADBW|MUL|" \
150 "MULPD|MULPS|MULSD|MULSS|MULX|MWAIT|NEG|NOP|NOT|OR|ORPD|ORPS|OUT|OUTS|" \
151 "OUTSB|OUTSD|OUTSW|PABSB|PABSD|PABSQ|PABSW|PACKSSDW|PACKSSWB|PACKUSDW|" \
152 "PACKUSWB|PADDB|PADDD|PADDQ|PADDSB|PADDSW|PADDUSB|PADDUSW|PADDW|PALIGNR|" \
153 "PAND|PANDN|PAUSE|PAVGB|PAVGW|PBLENDVB|PBLENDW|PCLMULQDQ|PCMPEQB|PCMPEQD|" \
154 "PCMPEQQ|PCMPEQW|PCMPESTRI|PCMPESTRM|PCMPGTB|PCMPGTD|PCMPGTQ|PCMPGTW|" \
155 "PCMPISTRI|PCMPISTRM|PDEP|PEXT|PEXTRB|PEXTRD|PEXTRQ|PEXTRW|PHADDD|PHADDSW|" \
156 "PHADDW|PHMINPOSUW|PHSUBD|PHSUBSW|PHSUBW|PINSRB|PINSRD|PINSRQ|PINSRW|" \
157 "PMADDUBSW|PMADDWD|PMAXSB|PMAXSD|PMAXSQ|PMAXSW|PMAXUB|PMAXUD|PMAXUQ|PMAXUW|" \
158 "PMINSB|PMINSD|PMINSQ|PMINSW|PMINUB|PMINUD|PMINUQ|PMINUW|PMOVMSKB|PMOVSX|" \
159 "PMOVZX|PMULDQ|PMULHRSW|PMULHUW|PMULHW|PMULLD|PMULLQ|PMULLW|PMULUDQ|POP|" \
160 "POPA|POPAD|POPCNT|POPF|POPFD|POPFQ|POR|PREFETCHW|PREFETCHh|PSADBW|PSHUFB|" \
161 "PSHUFD|PSHUFHW|PSHUFLW|PSHUFW|PSIGNB|PSIGND|PSIGNW|PSLLD|PSLLDQ|PSLLQ|" \
162 "PSLLW|PSRAD|PSRAQ|PSRAW|PSRLD|PSRLDQ|PSRLQ|PSRLW|PSUBB|PSUBD|PSUBQ|PSUBSB|" \
163 "PSUBSW|PSUBUSB|PSUBUSW|PSUBW|PTEST|PTWRITE|PUNPCKHBW|PUNPCKHDQ|PUNPCKHQDQ|" \
164 "PUNPCKHWD|PUNPCKLBW|PUNPCKLDQ|PUNPCKLQDQ|PUNPCKLWD|PUSH|PUSHA|PUSHAD|" \
165 "PUSHF|PUSHFD|PUSHFQ|PXOR|RCL|RCPPS|RCPSS|RCR|RDFSBASE|RDGSBASE|RDMSR|" \
166 "RDPID|RDPKRU|RDPMC|RDRAND|RDSEED|RDTSC|RDTSCP|REP|REPE|REPNE|REPNZ|REPZ|" \
167 "RET|ROL|ROR|RORX|ROUNDPD|ROUNDPS|ROUNDSD|ROUNDSS|RSM|RSQRTPS|RSQRTSS|SAHF|" \
168 "SAL|SAR|SARX|SBB|SCAS|SCASB|SCASD|SCASW|SETcc|SFENCE|SGDT|SHA1MSG1|" \
169 "SHA1MSG2|SHA1NEXTE|SHA1RNDS4|SHA256MSG1|SHA256MSG2|SHA256RNDS2|SHL|SHLD|" \
170 "SHLX|SHR|SHRD|SHRX|SHUFPD|SHUFPS|SIDT|SLDT|SMSW|SQRTPD|SQRTPS|SQRTSD|" \
171 "SQRTSS|STAC|STC|STD|STI|STMXCSR|STOS|STOSB|STOSD|STOSQ|STOSW|STR|SUB|" \
172 "SUBPD|SUBPS|SUBSD|SUBSS|SWAPGS|SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|" \
173 "TPAUSE|TZCNT|UCOMISD|UCOMISS|UD|UMONITOR|UMWAIT|UNPCKHPD|UNPCKHPS|" \
174 "UNPCKLPD|UNPCKLPS|VALIGND|VALIGNQ|VBLENDMPD|VBLENDMPS|VBROADCAST|" \
175 "VCOMPRESSPD|VCOMPRESSPS|VCVTPD2QQ|VCVTPD2UDQ|VCVTPD2UQQ|VCVTPH2PS|" \
176 "VCVTPS2PH|VCVTPS2QQ|VCVTPS2UDQ|VCVTPS2UQQ|VCVTQQ2PD|VCVTQQ2PS|VCVTSD2USI|" \
177 "VCVTSS2USI|VCVTTPD2QQ|VCVTTPD2UDQ|VCVTTPD2UQQ|VCVTTPS2QQ|VCVTTPS2UDQ|" \
178 "VCVTTPS2UQQ|VCVTTSD2USI|VCVTTSS2USI|VCVTUDQ2PD|VCVTUDQ2PS|VCVTUQQ2PD|" \
179 "VCVTUQQ2PS|VCVTUSI2SD|VCVTUSI2SS|VDBPSADBW|VERR|VERW|VEXPANDPD|VEXPANDPS|" \
180 "VEXTRACTF128|VEXTRACTF32x4|VEXTRACTF32x8|VEXTRACTF64x2|VEXTRACTF64x4|" \
181 "VEXTRACTI128|VEXTRACTI32x4|VEXTRACTI32x8|VEXTRACTI64x2|VEXTRACTI64x4|" \
182 "VFIXUPIMMPD|VFIXUPIMMPS|VFIXUPIMMSD|VFIXUPIMMSS|VFMADD132PD|VFMADD132PS|" \
183 "VFMADD132SD|VFMADD132SS|VFMADD213PD|VFMADD213PS|VFMADD213SD|VFMADD213SS|" \
184 "VFMADD231PD|VFMADD231PS|VFMADD231SD|VFMADD231SS|VFMADDSUB132PD|" \
185 "VFMADDSUB132PS|VFMADDSUB213PD|VFMADDSUB213PS|VFMADDSUB231PD|" \
186 "VFMADDSUB231PS|VFMSUB132PD|VFMSUB132PS|VFMSUB132SD|VFMSUB132SS|" \
187 "VFMSUB213PD|VFMSUB213PS|VFMSUB213SD|VFMSUB213SS|VFMSUB231PD|VFMSUB231PS|" \
188 "VFMSUB231SD|VFMSUB231SS|VFMSUBADD132PD|VFMSUBADD132PS|VFMSUBADD213PD|" \
189 "VFMSUBADD213PS|VFMSUBADD231PD|VFMSUBADD231PS|VFNMADD132PD|VFNMADD132PS|" \
190 "VFNMADD132SD|VFNMADD132SS|VFNMADD213PD|VFNMADD213PS|VFNMADD213SD|" \
191 "VFNMADD213SS|VFNMADD231PD|VFNMADD231PS|VFNMADD231SD|VFNMADD231SS|" \
192 "VFNMSUB132PD|VFNMSUB132PS|VFNMSUB132SD|VFNMSUB132SS|VFNMSUB213PD|" \
193 "VFNMSUB213PS|VFNMSUB213SD|VFNMSUB213SS|VFNMSUB231PD|VFNMSUB231PS|" \
194 "VFNMSUB231SD|VFNMSUB231SS|VFPCLASSPD|VFPCLASSPS|VFPCLASSSD|VFPCLASSSS|" \
195 "VGATHERDPD|VGATHERDPS|VGATHERQPD|VGATHERQPS|VGETEXPPD|VGETEXPPS|VGETEXPSD|" \
196 "VGETEXPSS|VGETMANTPD|VGETMANTPS|VGETMANTSD|VGETMANTSS|VINSERTF128|" \
197 "VINSERTF32x4|VINSERTF32x8|VINSERTF64x2|VINSERTF64x4|VINSERTI128|" \
198 "VINSERTI32x4|VINSERTI32x8|VINSERTI64x2|VINSERTI64x4|VMASKMOV|VMOVDQA32|" \
199 "VMOVDQA64|VMOVDQU16|VMOVDQU32|VMOVDQU64|VMOVDQU8|VPBLENDD|VPBLENDMB|" \
200 "VPBLENDMD|VPBLENDMQ|VPBLENDMW|VPBROADCAST|VPBROADCASTB|VPBROADCASTD|" \
201 "VPBROADCASTM|VPBROADCASTQ|VPBROADCASTW|VPCMPB|VPCMPD|VPCMPQ|VPCMPUB|" \
202 "VPCMPUD|VPCMPUQ|VPCMPUW|VPCMPW|VPCOMPRESSD|VPCOMPRESSQ|VPCONFLICTD|" \
203 "VPCONFLICTQ|VPERM2F128|VPERM2I128|VPERMB|VPERMD|VPERMI2B|VPERMI2D|" \
204 "VPERMI2PD|VPERMI2PS|VPERMI2Q|VPERMI2W|VPERMILPD|VPERMILPS|VPERMPD|VPERMPS|" \
205 "VPERMQ|VPERMT2B|VPERMT2D|VPERMT2PD|VPERMT2PS|VPERMT2Q|VPERMT2W|VPERMW|" \
206 "VPEXPANDD|VPEXPANDQ|VPGATHERDD|VPGATHERDQ|VPGATHERQD|VPGATHERQQ|VPLZCNTD|" \
207 "VPLZCNTQ|VPMADD52HUQ|VPMADD52LUQ|VPMASKMOV|VPMOVB2M|VPMOVD2M|VPMOVDB|" \
208 "VPMOVDW|VPMOVM2B|VPMOVM2D|VPMOVM2Q|VPMOVM2W|VPMOVQ2M|VPMOVQB|VPMOVQD|" \
209 "VPMOVQW|VPMOVSDB|VPMOVSDW|VPMOVSQB|VPMOVSQD|VPMOVSQW|VPMOVSWB|VPMOVUSDB|" \
210 "VPMOVUSDW|VPMOVUSQB|VPMOVUSQD|VPMOVUSQW|VPMOVUSWB|VPMOVW2M|VPMOVWB|" \
211 "VPMULTISHIFTQB|VPROLD|VPROLQ|VPROLVD|VPROLVQ|VPRORD|VPRORQ|VPRORVD|" \
212 "VPRORVQ|VPSCATTERDD|VPSCATTERDQ|VPSCATTERQD|VPSCATTERQQ|VPSLLVD|VPSLLVQ|" \
213 "VPSLLVW|VPSRAVD|VPSRAVQ|VPSRAVW|VPSRLVD|VPSRLVQ|VPSRLVW|VPTERNLOGD|" \
214 "VPTERNLOGQ|VPTESTMB|VPTESTMD|VPTESTMQ|VPTESTMW|VPTESTNMB|VPTESTNMD|" \
215 "VPTESTNMQ|VPTESTNMW|VRANGEPD|VRANGEPS|VRANGESD|VRANGESS|VRCP14PD|VRCP14PS|" \
216 "VRCP14SD|VRCP14SS|VREDUCEPD|VREDUCEPS|VREDUCESD|VREDUCESS|VRNDSCALEPD|" \
217 "VRNDSCALEPS|VRNDSCALESD|VRNDSCALESS|VRSQRT14PD|VRSQRT14PS|VRSQRT14SD|" \
218 "VRSQRT14SS|VSCALEFPD|VSCALEFPS|VSCALEFSD|VSCALEFSS|VSCATTERDPD|" \
219 "VSCATTERDPS|VSCATTERQPD|VSCATTERQPS|VSHUFF32x4|VSHUFF64x2|VSHUFI32x4|" \
220 "VSHUFI64x2|VTESTPD|VTESTPS|VZEROALL|VZEROUPPER|WAIT|WBINVD|WRFSBASE|" \
221 "WRGSBASE|WRMSR|WRPKRU|XABORT|XACQUIRE|XADD|XBEGIN|XCHG|XEND|XGETBV|XLAT|" \
222 "XLATB|XOR|XORPD|XORPS|XRELEASE|XRSTOR|XRSTORS|XSAVE|XSAVEC|XSAVEOPT|" \
223 "XSAVES|XSETBV|XTEST"
224
226{
227 //{ TOKEN_TYPE::WhiteSpace, R"((\s+))" },
228 { TOKEN_TYPE::WhiteSpace, R"(([ \t]+))" },
229 { TOKEN_TYPE::NewLine, R"((\n))" },
230 { TOKEN_TYPE::Comment, R"((;.*\n))" },
231 { TOKEN_TYPE::HexNumber, R"(([0-9][0-9a-f]*h))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
232 { TOKEN_TYPE::DecNumber, R"(([0-9]+))" FOLLOWED_BY(R"([\s\n\+\-\*\/,=!\]\‍(\)])") },
233 { TOKEN_TYPE::String, R"((\".*\"))" },
234
235 { TOKEN_TYPE::BraceOpen, R"((\‍())"},
236 { TOKEN_TYPE::BraceClose, R"((\)))"},
237 { TOKEN_TYPE::MemRefStart, R"((\[))"},
238 { TOKEN_TYPE::MemRefEnd, R"((\]))"},
239 { TOKEN_TYPE::Colon, R"((\:))"},
240 { TOKEN_TYPE::Operator, R"(([,\+\-\*\/\:]))"},
241 { TOKEN_TYPE::StringDef, R"((<.+>))" },
242
243 { TOKEN_TYPE::KW_include, R"((include))" FOLLOWED_BY(R"([\s])") },
244 { TOKEN_TYPE::KW_const, R"((\.const))" FOLLOWED_BY(R"([\s])") },
245 { TOKEN_TYPE::KW_code, R"((\.code))" FOLLOWED_BY(R"([\s])") },
246 { TOKEN_TYPE::KW_endprolog, R"((\.endprolog))" FOLLOWED_BY(R"([\s])") },
247 { TOKEN_TYPE::KW_ALIGN, R"((ALIGN))" FOLLOWED_BY(R"([\s])") },
248 { TOKEN_TYPE::KW_EXTERN, R"((EXTERN))" FOLLOWED_BY(R"([\s])") },
249 { TOKEN_TYPE::KW_EXTERN, R"((EXTRN))" FOLLOWED_BY(R"([\s])") },
250 { TOKEN_TYPE::KW_PUBLIC, R"((PUBLIC))" FOLLOWED_BY(R"([\s])") },
251 { TOKEN_TYPE::KW_ENDM, R"((ENDM))" FOLLOWED_BY(R"([\s\;])") },
252 { TOKEN_TYPE::KW_END, R"((END))" FOLLOWED_BY(R"([\s])") },
253 { TOKEN_TYPE::KW_if, R"((if))" FOLLOWED_BY(R"([\s])") },
254 { TOKEN_TYPE::KW_ifdef, R"((ifdef))" FOLLOWED_BY(R"([\s])")},
255 { TOKEN_TYPE::KW_ifndef, R"((ifndef))" FOLLOWED_BY(R"([\s])")},
256 { TOKEN_TYPE::KW_else, R"((else))" FOLLOWED_BY(R"([\s])")},
257 { TOKEN_TYPE::KW_endif, R"((endif))" FOLLOWED_BY(R"([\s])")},
258
259 { TOKEN_TYPE::KW_allocstack, R"((.allocstack))" FOLLOWED_BY(R"([\s])") },
260 { TOKEN_TYPE::KW_savereg, R"((.savereg))" FOLLOWED_BY(R"([\s])") },
261 { TOKEN_TYPE::KW_savexmm128, R"((.savexmm128))" FOLLOWED_BY(R"([\s])") },
262
263 { TOKEN_TYPE::KW_DB, R"((DB))" FOLLOWED_BY(R"([\s])") },
264 { TOKEN_TYPE::KW_DW, R"((DW))" FOLLOWED_BY(R"([\s])") },
265 { TOKEN_TYPE::KW_DD, R"((DD))" FOLLOWED_BY(R"([\s])") },
266 { TOKEN_TYPE::KW_DQ, R"((DQ))" FOLLOWED_BY(R"([\s])") },
267 { TOKEN_TYPE::KW_EQU, R"((EQU))" FOLLOWED_BY(R"([\s])") },
268 { TOKEN_TYPE::KW_TEXTEQU, R"((TEXTEQU))" FOLLOWED_BY(R"([\s])") },
269 { TOKEN_TYPE::KW_MACRO, R"((MACRO))" FOLLOWED_BY(R"([\s\;])") },
270 { TOKEN_TYPE::KW_PROC, R"((PROC))" FOLLOWED_BY(R"([\s\;])") },
271 { TOKEN_TYPE::KW_FRAME, R"((FRAME))" FOLLOWED_BY(R"([\s\;])") },
272 { TOKEN_TYPE::KW_ENDP, R"((ENDP))" FOLLOWED_BY(R"([\s\;])") },
273 { TOKEN_TYPE::KW_RECORD, R"((RECORD))" FOLLOWED_BY(R"([\s\;])") },
274 { TOKEN_TYPE::KW_MASK, R"((MASK))" FOLLOWED_BY(R"([\s\;])")},
275 { TOKEN_TYPE::KW_ERRDEF, R"((\.ERRDEF))" FOLLOWED_BY(R"([\s\;])")},
276
277 { TOKEN_TYPE::Filename, R"(([a-z_][a-z0-9_]*\.inc))" FOLLOWED_BY(R"([\s])") },
278 { TOKEN_TYPE::Instruction, "(" INSTRUCTION ")" FOLLOWED_BY(R"([\s])") },
279 { TOKEN_TYPE::Reg8, R"((al|ah|bl|bh|cl|ch|dl|dh|sil|dil|bpl|spl|r8b|r9b|r10b|r11b|r12b|r13b|r14b|r15b))" FOLLOWED_BY(R"([\s\,])") },
280 { TOKEN_TYPE::Reg16, R"((ax|bx|cx|dx|si|di|bp|sp|r8w|r9w|r10w|r11w|r12w|r13w|r14w|r15w))" FOLLOWED_BY(R"([\s\,])") },
281 { TOKEN_TYPE::Reg32, R"((eax|ebx|ecx|edx|esi|edi|ebp|esp|r8d|r9d|r10d|r11d|r12d|r13d|r14d|r15d))" FOLLOWED_BY(R"([\s\,])") },
282 { TOKEN_TYPE::Reg64, R"((rax|rbx|rcx|rdx|rsi|rdi|rbp|rsp|r8|r9|r10|r11|r12|r13|r14|r15))" FOLLOWED_BY(R"([\s\,])") },
283 { TOKEN_TYPE::RegXmm, R"((xmm0|xmm1|xmm2|xmm3|xmm4|xmm5|xmm6|xmm7|xmm8|xmm9|xmm10|xmm11|xmm12|xmm13|xmm14|xmm15))" FOLLOWED_BY(R"([\s\,])") },
284 { TOKEN_TYPE::BYTE_PTR, R"((BYTE[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
285 { TOKEN_TYPE::WORD_PTR, R"((WORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
286 { TOKEN_TYPE::DWORD_PTR, R"((DWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
287 { TOKEN_TYPE::QWORD_PTR, R"((QWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
288 { TOKEN_TYPE::XMMWORD_PTR, R"((XMMWORD[\s]+PTR))" FOLLOWED_BY(R"([\s\[])") },
289
290 { TOKEN_TYPE::Identifier, R"((@@))" FOLLOWED_BY(SEPARATOR)},
291 { TOKEN_TYPE::Identifier, R"((@[a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
292 { TOKEN_TYPE::Identifier, R"(([a-z_][a-z0-9_]*))" FOLLOWED_BY(SEPARATOR)},
293
294};
295
296// FIXME: use context?
297unsigned int g_label_number = 0;
298
299bool g_processing_jmp = false;
300
302
303enum class IDTYPE
304{
305 Memory,
306 Register,
307 Label,
308 Constant,
309 Macro,
311 String,
312 Unknown
313};
314
316{
317 string Name;
319};
320
322
323static
324void
326{
327 g_identifiers.push_back(IDENTIFIER{ tok.str(), type });
328 //fprintf(stderr, "Added id: '%s'\n", tok.str().c_str());
329}
330
331void
333{
335}
336
337bool
339{
340 for (IDENTIFIER& identifier : g_identifiers)
341 {
342 if (identifier.Name == tok.str())
343 {
344 return identifier.Type == IDTYPE::Memory;
345 }
346 }
347
348 return true;
349}
350
351bool
352iequals(const string &a, const string &b)
353{
354 size_t sz = a.size();
355 if (b.size() != sz)
356 return false;
357 for (unsigned int i = 0; i < sz; ++i)
358 if (tolower(a[i]) != tolower(b[i]))
359 return false;
360 return true;
361}
362
363const char*
365{
366 const struct
367 {
368 const char* masm;
369 const char* gas;
370 } operators[] = {
371 {"and", "&"}, {"or", "|"}, {"shl", "<<"}, {"shr", ">>"}, {"not", "~"},
372 {"eq", "=="}, {"ne", "!="}, {"lt", "<"}, {"le", "<="}, {"gt", ">"}, {"ge", ">="},
373 };
374
375 for (const auto& entry : operators)
376 {
377 if (iequals(op, entry.masm))
378 return entry.gas;
379 }
380
381 return nullptr;
382}
383
384Token
386{
387 if (tok.type() != type)
388 {
389 throw "Not white space after identifier!\n";
390 }
391
392 return tok;
393}
394
396{
397 int type = tok.type();
398 if (type != TOKEN_TYPE::WhiteSpace)
399 {
400 throw "Not white space after identifier!\n";
401 }
402
403 return tok;
404}
405
407{
408 int type = tok.type();
409 if ((type != TOKEN_TYPE::WhiteSpace) &&
410 (type != TOKEN_TYPE::NewLine))
411 {
412 throw "Not white space after identifier!\n";
413 }
414
415 return tok;
416}
417
419{
420 for (string &s : list)
421 {
422 if (s == str)
423 {
424 return true;
425 }
426 }
427
428 return false;
429}
430
431size_t
432translate_token(TokenList& tokens, size_t index, const vector<string> &macro_params)
433{
434 Token tok = tokens[index];
435 switch (tok.type())
436 {
437 case TOKEN_TYPE::Comment:
438 printf("//%s", tok.str().c_str() + 1);
439 break;
440
441 case TOKEN_TYPE::DecNumber:
442 {
443 unsigned long long num = stoull(tok.str(), nullptr, 10);
444 printf("%llu", num);
445 break;
446 }
447
448 case TOKEN_TYPE::HexNumber:
449 {
450 string number = tok.str();
451 printf("0x%s", number.substr(0, number.size() - 1).c_str());
452 break;
453 }
454
455 case TOKEN_TYPE::Identifier:
456 if (is_string_in_list(macro_params, tok.str()))
457 {
458 printf("\\");
459 }
460 printf("%s", tok.str().c_str());
461 break;
462
463 // We migt want to improve these
464 case TOKEN_TYPE::BYTE_PTR:
465 case TOKEN_TYPE::WORD_PTR:
466 case TOKEN_TYPE::DWORD_PTR:
467 case TOKEN_TYPE::QWORD_PTR:
468 case TOKEN_TYPE::XMMWORD_PTR:
469
470 // Check these. valid only in instructions?
471 case TOKEN_TYPE::Reg8:
472 case TOKEN_TYPE::Reg16:
473 case TOKEN_TYPE::Reg32:
474 case TOKEN_TYPE::Reg64:
475 case TOKEN_TYPE::RegXmm:
476 case TOKEN_TYPE::Instruction:
477
478 case TOKEN_TYPE::WhiteSpace:
479 case TOKEN_TYPE::NewLine:
480 case TOKEN_TYPE::Operator:
481 printf("%s", tok.str().c_str());
482 break;
483
484 default:
485 printf("%s", tok.str().c_str());
486 break;
487 }
488
489 return index + 1;
490}
491
492size_t complete_line(TokenList &tokens, size_t index, const vector<string> &macro_params)
493{
494 while (index < tokens.size())
495 {
496 Token tok = tokens[index];
497 index = translate_token(tokens, index, macro_params);
498 if ((tok.type() == TOKEN_TYPE::NewLine) ||
499 (tok.type() == TOKEN_TYPE::Comment))
500 {
501 break;
502 }
503 }
504
505 return index;
506}
507
508size_t
509translate_expression(TokenList &tokens, size_t index, const vector<string> &macro_params)
510{
511 while (index < tokens.size())
512 {
513 Token tok = tokens[index];
514 switch (tok.type())
515 {
516 case TOKEN_TYPE::NewLine:
517 case TOKEN_TYPE::Comment:
518 return index;
519
520 case TOKEN_TYPE::KW_MASK:
521 printf("MASK_");
522 index += 2;
523 break;
524
525 case TOKEN_TYPE::Instruction:
526 {
527 const char* op = get_expression_operator(tok.str());
528 if (!op)
529 {
530 throw "Invalid expression";
531 }
532 printf("%s", op);
533 index += 1;
534 break;
535 }
536
537 case TOKEN_TYPE::Operator:
538 if (tok.str() == ",")
539 {
540 return index;
541 }
542 index = translate_token(tokens, index, macro_params);
543 break;
544
545 case TOKEN_TYPE::Identifier:
546 {
547 const char* op = get_expression_operator(tok.str());
548 if (op)
549 {
550 printf("%s", op);
551 index += 1;
552 }
553 else
554 {
555 index = translate_token(tokens, index, macro_params);
556 }
557 break;
558 }
559
560 case TOKEN_TYPE::WhiteSpace:
561 case TOKEN_TYPE::BraceOpen:
562 case TOKEN_TYPE::BraceClose:
563 case TOKEN_TYPE::DecNumber:
564 case TOKEN_TYPE::HexNumber:
565 index = translate_token(tokens, index, macro_params);
566 break;
567
568 default:
569 index = translate_token(tokens, index, macro_params);
570 }
571 }
572
573 return index;
574}
575
576size_t translate_mem_ref(TokenList& tokens, size_t index, const vector<string>& macro_params)
577{
578 unsigned int offset = 0;
579
580 Token tok = tokens[index];
581
582 if ((tok.type() == TOKEN_TYPE::DecNumber) ||
583 (tok.type() == TOKEN_TYPE::HexNumber))
584 {
585 offset = stoi(tok.str(), nullptr, 0);
586 index += 2;
587 }
588
589 index = translate_token(tokens, index, macro_params);
590
591 while (index < tokens.size())
592 {
593 Token tok = tokens[index];
594 index = translate_token(tokens, index, macro_params);
595 if (tok.type() == TOKEN_TYPE::MemRefEnd)
596 {
597 if (offset != 0)
598 {
599 printf(" + %u", offset);
600 }
601 return index;
602 }
603 }
604
605 throw "Failed to translate memory ref";
606 return index;
607}
608
609static
610bool
612{
613 return ((tok.type() == TOKEN_TYPE::DecNumber) ||
614 (tok.type() == TOKEN_TYPE::HexNumber));
615}
616
617static
618size_t
620{
621 if ((index < tokens.size()) &&
622 (tokens[index].type() == TOKEN_TYPE::WhiteSpace))
623 {
624 index++;
625 }
626
627 return index;
628}
629
630static
631size_t
632translate_rip_relative_offset(TokenList& tokens, size_t index, const vector<string>& macro_params)
633{
634 size_t operatorIndex = skip_whitespace(tokens, index);
635 if ((operatorIndex == tokens.size()) ||
636 (tokens[operatorIndex].type() != TOKEN_TYPE::Operator) ||
637 ((tokens[operatorIndex].str() != "+") &&
638 (tokens[operatorIndex].str() != "-")))
639 {
640 return index;
641 }
642
643 size_t numberIndex = skip_whitespace(tokens, operatorIndex + 1);
644 if ((numberIndex == tokens.size()) ||
645 !is_number_token(tokens[numberIndex]))
646 {
647 return index;
648 }
649
650 while (index <= numberIndex)
651 {
652 index = translate_token(tokens, index, macro_params);
653 }
654
655 return index;
656}
657
658size_t translate_instruction_param(TokenList& tokens, size_t index, const vector<string>& macro_params)
659{
660 switch (tokens[index].type())
661 {
662 case TOKEN_TYPE::BYTE_PTR:
663 case TOKEN_TYPE::WORD_PTR:
664 case TOKEN_TYPE::DWORD_PTR:
665 case TOKEN_TYPE::QWORD_PTR:
666 case TOKEN_TYPE::XMMWORD_PTR:
667 index = translate_token(tokens, index, macro_params);
668
669 // Optional whitespace
670 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
671 {
672 index = translate_token(tokens, index, macro_params);
673 }
674 }
675
676 while (index < tokens.size())
677 {
678 Token tok = tokens[index];
679 switch (tok.type())
680 {
681 case TOKEN_TYPE::MemRefStart:
682 return translate_mem_ref(tokens, index, macro_params);
683
684 case TOKEN_TYPE::NewLine:
685 case TOKEN_TYPE::Comment:
686 return index;
687
688 case TOKEN_TYPE::Operator:
689 if (tok.str() == ",")
690 return index;
691 return translate_token(tokens, index, macro_params);
692
693 case TOKEN_TYPE::Identifier:
694 index = translate_token(tokens, index, macro_params);
695 if (is_mem_id(tok) &&
696 !is_string_in_list(macro_params, tok.str()) &&
698 {
699 index = translate_rip_relative_offset(tokens, index, macro_params);
700 printf("[rip]");
701 }
702 break;
703
704 default:
705 index = translate_expression(tokens, index, macro_params);
706 }
707 }
708
709 return index;
710}
711
712static
713bool
715{
716 const char* inst_list[] = {
717 "jmp", "call", "ja", "jae", "jb", "jbe", "jc", "jcxz", "je", "jecxz", "jg", "jge",
718 "jl", "jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl", "jnle",
719 "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "jrcxz", "js", "jz", "loop", "loope",
720 "loopne", "loopnz", "loopz"
721 };
722
723 for (const char* inst : inst_list)
724 {
725 if (iequals(tok.str(), inst))
726 {
727 return true;
728 }
729 }
730
731 return false;
732}
733
734size_t translate_instruction(TokenList& tokens, size_t index, const vector<string>& macro_params)
735{
736 // Check for jump/call instructions
737 if (is_jmp_or_call(tokens[index]))
738 {
739 g_processing_jmp = true;
740 }
741
742 // Translate the instruction itself
743 index = translate_token(tokens, index, macro_params);
744
745 // Handle instruction parameters
746 while (index < tokens.size())
747 {
748 // Optional whitespace
749 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
750 {
751 index = translate_token(tokens, index, macro_params);
752 }
753
754 // Check for parameters
755 Token tok = tokens[index];
756 switch (tok.type())
757 {
758 case TOKEN_TYPE::Comment:
759 case TOKEN_TYPE::NewLine:
760 g_processing_jmp = false;
761 return index;
762
763 case TOKEN_TYPE::WhiteSpace:
764 case TOKEN_TYPE::Operator:
765 index = translate_token(tokens, index, macro_params);
766 break;
767
768 default:
769 index = translate_instruction_param(tokens, index, macro_params);
770 break;
771 }
772 }
773
774 g_processing_jmp = false;
775 return index;
776}
777
778size_t translate_item(TokenList& tokens, size_t index, const vector<string> &macro_params)
779{
780 switch (tokens[index].type())
781 {
782 case TOKEN_TYPE::DecNumber:
783 case TOKEN_TYPE::HexNumber:
784 case TOKEN_TYPE::String:
785 case TOKEN_TYPE::WhiteSpace:
786 return translate_token(tokens, index, macro_params);
787 }
788
789 throw "Failed to translate item";
790 return -1;
791}
792
793size_t translate_list(TokenList& tokens, size_t index, const vector<string> &macro_params)
794{
795 while (index < tokens.size())
796 {
797 // The item itself
798 index = translate_item(tokens, index, macro_params);
799
800 // Optional white space
801 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
802 {
803 index = translate_token(tokens, index, macro_params);
804 }
805
806 // End of list?
807 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
808 (tokens[index].type() == TOKEN_TYPE::NewLine))
809 {
810 return index;
811 }
812
813 // We expect a comma here
814 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
815 (tokens[index].str() != ","))
816 {
817 throw "Unexpected end of list";
818 }
819
820 index = translate_token(tokens, index, macro_params);
821 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
822 {
823 index = translate_token(tokens, index, macro_params);
824 }
825 }
826
827 throw "Failed to translate list";
828 return -1;
829}
830
831size_t
832translate_data_def(TokenList& tokens, size_t index, const vector<string>& macro_params)
833{
834 Token tok = tokens[index];
835 Token tok1 = get_ws(tokens[index + 1]);
836 string directive, need, have ="";
837
838 switch (tok.type())
839 {
840 case TOKEN_TYPE::KW_DB:
841 directive = ".byte";
842 break;
843
844 case TOKEN_TYPE::KW_DW:
845 directive = ".short";
846 break;
847
848 case TOKEN_TYPE::KW_DD:
849 directive = ".long";
850 break;
851
852 case TOKEN_TYPE::KW_DQ:
853 directive = ".quad";
854 break;
855 }
856
857 index += 2;
858
859 while (index < tokens.size())
860 {
861 // Check if we need '.ascii' for ASCII strings
862 if (tokens[index].str()[0] == '\"')
863 {
864 need = ".ascii";
865 }
866 else
867 {
868 need = directive;
869 }
870
871 // Output the directive we need (or a comma)
872 if (have == "")
873 {
874 printf("%s ", need.c_str());
875 }
876 else if (have != need)
877 {
878 printf("\n%s ", need.c_str());
879 }
880 else
881 {
882 printf(", ");
883 }
884
885 have = need;
886
887 // The item itself
888 index = translate_item(tokens, index, macro_params);
889
890 // Optional white space
891 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
892 {
893 index = translate_token(tokens, index, macro_params);
894 }
895
896 // End of list?
897 if ((tokens[index].type() == TOKEN_TYPE::Comment) ||
898 (tokens[index].type() == TOKEN_TYPE::NewLine))
899 {
900 return index;
901 }
902
903 // We expect a comma here
904 if ((tokens[index].type() != TOKEN_TYPE::Operator) ||
905 (tokens[index].str() != ","))
906 {
907 throw "Unexpected end of list";
908 }
909
910 // Skip comma and optional white-space
911 index++;
912 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
913 {
914 index++;
915 }
916 }
917
918 throw "Failed to translate list";
919 return -1;
920}
921
922size_t
923translate_construct_one_param(string translated, TokenList& tokens, size_t index, const vector<string>& macro_params)
924{
925 // The next token should be white space
926 Token tok1 = get_ws(tokens[index + 1]);
927
928 printf("%s%s", translated.c_str(), tok1.str().c_str());
929 return translate_expression(tokens, index + 2, macro_params);
930}
931
932size_t
933translate_record(TokenList &tokens, size_t index, const vector<string> &macro_params)
934{
935 unsigned int bits, bitpos = 0;
936 unsigned long long oldmask = 0, mask = 0;
937
938 Token tok_name = get_expected_token(tokens[index], TOKEN_TYPE::Identifier);
939 index += 4;
940 while (index < tokens.size())
941 {
942 Token tok_member = get_expected_token(tokens[index++], TOKEN_TYPE::Identifier);
943
944 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
945 {
946 index++;
947 }
948
949 if (tokens[index++].str() != ":")
950 {
951 throw "Unexpected token";
952 }
953
954 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
955 {
956 index++;
957 }
958
959 Token tok_bits = tokens[index++];
960 if ((tok_bits.type() != TOKEN_TYPE::DecNumber) &&
961 (tok_bits.type() != TOKEN_TYPE::HexNumber))
962 {
963 throw "Unexpected token";
964 }
965
966 bits = stoi(tok_bits.str(), nullptr, 0);
967
968 printf("%s = %u\n", tok_member.str().c_str(), bitpos);
969
970 oldmask = (1ULL << bitpos) - 1;
971 bitpos += bits;
972 mask = (1ULL << bitpos) - 1 - oldmask;
973 printf("MASK_%s = 0x%llx\n", tok_member.str().c_str(), mask);
974
975 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
976 {
977 index++;
978 }
979
980 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
981 (tokens[index].type() == TOKEN_TYPE::Comment))
982 {
983 break;
984 }
985
986 if (tokens[index].str() != ",")
987 {
988 throw "unexpected token";
989 }
990
991 index++;
992 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
993 {
994 index++;
995 }
996
997 if ((tokens[index].type() == TOKEN_TYPE::NewLine) ||
998 (tokens[index].type() == TOKEN_TYPE::Comment))
999 {
1000 index++;
1001 }
1002
1003 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1004 {
1005 index++;
1006 }
1007 }
1008
1009 return index;
1010}
1011
1012static
1013bool
1014find_proc_frame(TokenList& tokens, size_t index, size_t& frameEndIndex)
1015{
1016 while (index < tokens.size())
1017 {
1018 Token tok = tokens[index];
1019 if ((tok.type() == TOKEN_TYPE::NewLine) ||
1020 (tok.type() == TOKEN_TYPE::Comment))
1021 {
1022 return false;
1023 }
1024
1025 if (tok.type() == TOKEN_TYPE::KW_FRAME)
1026 {
1027 frameEndIndex = index + 1;
1028 return true;
1029 }
1030
1031 index++;
1032 }
1033
1034 return false;
1035}
1036
1037size_t
1039{
1040 Token tok = tokens[index];
1041 Token tok1 = tokens[index + 1];
1042
1043 if (tok1.type() == TOKEN_TYPE::Colon)
1044 {
1045 if (tok.str() == "@@")
1046 {
1048 printf("%u:", g_label_number);
1049 }
1050 else
1051 {
1052 printf("%s:", tok.str().c_str());
1053 }
1055 return index + 2;
1056 }
1057
1058 Token tok2 = tokens[index + 2];
1059
1060 switch (tok2.type())
1061 {
1062 case TOKEN_TYPE::KW_MACRO:
1063 throw "Cannot have a nested macro!";
1064
1065 case TOKEN_TYPE::KW_DB:
1066 case TOKEN_TYPE::KW_DW:
1067 case TOKEN_TYPE::KW_DD:
1068 case TOKEN_TYPE::KW_DQ:
1069 printf("%s:%s", tok.str().c_str(), tok1.str().c_str());
1070 add_mem_id(tok);
1071 return translate_data_def(tokens, index + 2, macro_params);
1072
1073 case TOKEN_TYPE::KW_EQU:
1074 //printf("%s%s", tok.str().c_str(), tok1.str().c_str());
1075 printf("#define %s ", tok.str().c_str());
1077 return translate_expression(tokens, index + 3, macro_params);
1078
1079 case TOKEN_TYPE::KW_TEXTEQU:
1080 {
1081 Token tok3 = get_ws(tokens[index + 3]);
1082 Token tok4 = get_expected_token(tokens[index + 4], TOKEN_TYPE::StringDef);
1083
1084 string textdef = tok4.str();
1085 printf("#define %s %s", tok.str().c_str(), textdef.substr(1, textdef.size() - 2).c_str());
1087 return index + 5;
1088 }
1089
1090 case TOKEN_TYPE::KW_PROC:
1091 {
1092 printf("%s:", tok.str().c_str());
1093 index += 3;
1094
1095 size_t frameEndIndex;
1096 bool hasFrame = find_proc_frame(tokens, index, frameEndIndex);
1097 g_proc_frame_stack.push_back(hasFrame);
1098
1099 if (hasFrame)
1100 {
1101#ifdef TARGET_amd64
1102 printf("\n.seh_proc %s\n", tok.str().c_str());
1103#else
1104 printf("\n.cfi_startproc\n");
1105#endif
1106 index = frameEndIndex;
1107 }
1109 break;
1110 }
1111
1112 case TOKEN_TYPE::KW_ENDP:
1113 {
1114 bool hasFrame = false;
1115 if (!g_proc_frame_stack.empty())
1116 {
1117 hasFrame = g_proc_frame_stack.back();
1118 g_proc_frame_stack.pop_back();
1119 }
1120
1121 if (hasFrame)
1122 {
1123#ifdef TARGET_amd64
1124 printf(".seh_endproc");
1125#else
1126 printf(".cfi_endproc");
1127#endif
1128 }
1129 index += 3;
1130 break;
1131 }
1132
1133 case TOKEN_TYPE::KW_RECORD:
1134 index = translate_record(tokens, index, macro_params);
1135 break;
1136
1137 default:
1138 // We don't know what it is, assume it's a macro and treat it like an instruction
1139 index = translate_instruction(tokens, index, macro_params);
1140 break;
1141 }
1142
1143 return index;
1144}
1145
1146static bool g_intel_syntax_emitted = false;
1147
1148size_t
1149translate_construct(TokenList& tokens, size_t index, const vector<string> &macro_params)
1150{
1151 Token tok = tokens[index];
1152
1153 switch (tok.type())
1154 {
1155 case TOKEN_TYPE::WhiteSpace:
1156 case TOKEN_TYPE::NewLine:
1157 case TOKEN_TYPE::Comment:
1158 return translate_token(tokens, index, macro_params);
1159
1160 case TOKEN_TYPE::Identifier:
1161 return translate_identifier_construct(tokens, index, macro_params);
1162
1163 case TOKEN_TYPE::KW_ALIGN:
1164 index = translate_construct_one_param(".align", tokens, index, macro_params);
1165 break;
1166
1167 case TOKEN_TYPE::KW_allocstack:
1168 index = translate_construct_one_param(".seh_stackalloc", tokens, index, macro_params);
1169 break;
1170
1171 case TOKEN_TYPE::KW_code:
1172 {
1173 printf(".text\n");
1174#ifdef TARGET_amd64
1175 printf(".code64\n");
1176#else
1177 printf(".code32\n");
1178#endif
1180 {
1181 printf(".intel_syntax noprefix\n");
1183 }
1184 index++;
1185 break;
1186 }
1187
1188 case TOKEN_TYPE::KW_const:
1189 printf(".section .rdata");
1190 index++;
1191 break;
1192
1193 case TOKEN_TYPE::KW_DB:
1194 case TOKEN_TYPE::KW_DW:
1195 case TOKEN_TYPE::KW_DD:
1196 case TOKEN_TYPE::KW_DQ:
1197 return translate_data_def(tokens, index, macro_params);
1198
1199 case TOKEN_TYPE::KW_END:
1200 printf("// END\n");
1201 return tokens.size();
1202
1203 case TOKEN_TYPE::KW_endprolog:
1204 printf(".seh_endprologue");
1205 index++;
1206 break;
1207
1208 case TOKEN_TYPE::KW_EXTERN:
1209 {
1210 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1211 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Identifier);
1212 add_mem_id(tok2);
1213 printf("//");
1214 return complete_line(tokens, index, macro_params);
1215 }
1216
1217 case TOKEN_TYPE::KW_if:
1218 printf(".");
1219 return translate_expression(tokens, index, macro_params);
1220
1221 case TOKEN_TYPE::KW_ifdef:
1222 case TOKEN_TYPE::KW_ifndef:
1223 case TOKEN_TYPE::KW_else:
1224 case TOKEN_TYPE::KW_endif:
1225 printf(".");
1226 return complete_line(tokens, index, macro_params);
1227
1228 case TOKEN_TYPE::KW_include:
1229 {
1230 // The next token should be white space
1231 Token tok1 = get_ws_or_nl(tokens[index + 1]);
1232 Token tok2 = get_expected_token(tokens[index + 2], TOKEN_TYPE::Filename);
1233 printf("#include \"%s.h\"", tok2.str().c_str());
1234 index += 3;
1235 break;
1236 }
1237
1238 case TOKEN_TYPE::KW_PUBLIC:
1239 index = translate_construct_one_param(".global", tokens, index, macro_params);
1240 break;
1241
1242 case TOKEN_TYPE::KW_savereg:
1243 printf(".seh_savereg");
1244 return complete_line(tokens, index + 1, macro_params);
1245
1246 case TOKEN_TYPE::KW_savexmm128:
1247 printf(".seh_savexmm");
1248 return complete_line(tokens, index + 1, macro_params);
1249
1250 case TOKEN_TYPE::Instruction:
1251 index = translate_instruction(tokens, index, macro_params);
1252 break;
1253
1254 case TOKEN_TYPE::KW_ERRDEF:
1255 printf("//");
1256 return complete_line(tokens, index, macro_params);
1257
1258 default:
1259 throw "failed to translate construct";
1260 }
1261
1262 // Skip optional white-space
1263 if (tokens[index].type() == TOKEN_TYPE::WhiteSpace)
1264 {
1265 index++;
1266 }
1267
1268 // Line should end here!
1269 Token end = tokens[index];
1270 if ((end.type() != TOKEN_TYPE::Comment) &&
1271 (end.type() != TOKEN_TYPE::NewLine))
1272 {
1273 throw "unexpected tokens";
1274 }
1275
1276 return index;
1277}
1278
1279size_t
1281{
1282 vector<string> macro_params;
1283
1284 printf(".macro %s", tokens[index].str().c_str());
1285
1286 // Parse marameters
1287 index += 3;
1288 while (index < tokens.size())
1289 {
1290 Token tok = tokens[index];
1291 switch (tok.type())
1292 {
1293 case TOKEN_TYPE::NewLine:
1294 case TOKEN_TYPE::Comment:
1295 index = translate_token(tokens, index, macro_params);
1296 break;
1297
1298 case TOKEN_TYPE::Identifier:
1299 macro_params.push_back(tok.str());
1300 printf("%s", tok.str().c_str());
1301 index++;
1302 continue;
1303
1304 case TOKEN_TYPE::WhiteSpace:
1305 case TOKEN_TYPE::Operator:
1306 index = translate_token(tokens, index, macro_params);
1307 continue;
1308 }
1309
1310 break;
1311 }
1312
1313 // Parse content
1314 while (index < tokens.size())
1315 {
1316 Token tok = tokens[index];
1317 switch (tok.type())
1318 {
1319 case TOKEN_TYPE::KW_ENDM:
1320 printf(".endm");
1321 return index + 1;
1322
1323 default:
1324 index = translate_construct(tokens, index, macro_params);
1325 }
1326 }
1327
1328 throw "Failed to translate macro";
1329 return -1;
1330}
1331
1332void
1334{
1335 size_t index = 0;
1336 size_t size = tokens.size();
1337 vector<string> empty_macro_params;
1338
1339 while (index < size)
1340 {
1341 // Macros are special
1342 if ((tokens[index].type() == TOKEN_TYPE::Identifier) &&
1343 (tokens[index + 1].type() == TOKEN_TYPE::WhiteSpace) &&
1344 (tokens[index + 2].type() == TOKEN_TYPE::KW_MACRO))
1345 {
1346 index = translate_macro(tokens, index);
1347 }
1348 else
1349 {
1350 index = translate_construct(tokens, index, empty_macro_params);
1351 }
1352 }
1353}
1354
1355int main(int argc, char* argv[])
1356{
1357 if (argc < 2)
1358 {
1359 fprintf(stderr, "Invalid parameter!\n");
1360 return -1;
1361 }
1362
1363#if PROFILING_ENABLED
1364 time_t start_time = time(NULL);
1365#endif
1366
1367 try
1368 {
1369 // Open and read the input file
1370 string filename(argv[1]);
1373 buffer << file.rdbuf();
1374 string text = buffer.str();
1375
1376 // Create the tokenizer
1377 Tokenizer tokenizer(g_TokenList);
1378
1379 // Get a token list
1380 TokenList toklist(tokenizer, text);
1381
1382 // Now translate the tokens
1383 translate(toklist);
1384 }
1385 catch (const char* message)
1386 {
1387 fprintf(stderr, "Exception caught: '%s'\n", message);
1388 return -2;
1389 }
1390
1391#if PROFILING_ENABLED
1392 time_t total_time = time(NULL) + 1 - start_time;
1393 fprintf(stderr, "total_time = %llu\n", total_time);
1394 fprintf(stderr, "search_time = %llu\n", search_time);
1395 fprintf(stderr, "search: %llu %%\n", search_time * 100 / total_time);
1396#endif
1397
1398 return 0;
1399}
void add_mem_id(Token &tok)
Definition: asmpp.cpp:332
size_t translate_list(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:793
size_t translate_data_def(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:832
size_t translate_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:1149
static void add_identifier(Token &tok, IDTYPE type)
Definition: asmpp.cpp:325
size_t translate_item(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:778
vector< TOKEN_DEF > g_TokenList
Definition: asmpp.cpp:225
void translate(TokenList &tokens)
Definition: asmpp.cpp:1333
size_t translate_token(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:432
Token get_ws_or_nl(Token &&tok)
Definition: asmpp.cpp:406
static bool find_proc_frame(TokenList &tokens, size_t index, size_t &frameEndIndex)
Definition: asmpp.cpp:1014
static bool is_number_token(const Token &tok)
Definition: asmpp.cpp:611
#define INSTRUCTION
Definition: asmpp.cpp:115
static size_t translate_rip_relative_offset(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:632
static bool is_jmp_or_call(const Token &tok)
Definition: asmpp.cpp:714
bool is_mem_id(Token &tok)
Definition: asmpp.cpp:338
vector< bool > g_proc_frame_stack
Definition: asmpp.cpp:301
int fake_printf(const char *format,...)
Definition: asmpp.cpp:98
size_t complete_line(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:492
unsigned int g_label_number
Definition: asmpp.cpp:297
bool is_string_in_list(vector< string > list, string str)
Definition: asmpp.cpp:418
static bool g_intel_syntax_emitted
Definition: asmpp.cpp:1146
time_t search_time
Definition: asmpp.cpp:26
size_t translate_expression(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:509
#define FOLLOWED_BY(x)
Definition: asmpp.cpp:107
Token get_expected_token(Token &&tok, TOKEN_TYPE type)
Definition: asmpp.cpp:385
vector< IDENTIFIER > g_identifiers
Definition: asmpp.cpp:321
bool iequals(const string &a, const string &b)
Definition: asmpp.cpp:352
TOKEN_TYPE
Definition: asmpp.cpp:29
@ KW_allocstack
Definition: asmpp.cpp:62
@ Reg32
Definition: asmpp.cpp:85
@ KW_FRAME
Definition: asmpp.cpp:74
@ KW_savereg
Definition: asmpp.cpp:63
@ KW_EXTERN
Definition: asmpp.cpp:52
@ KW_PROC
Definition: asmpp.cpp:73
@ StringDef
Definition: asmpp.cpp:45
@ MemRefEnd
Definition: asmpp.cpp:42
@ KW_ENDM
Definition: asmpp.cpp:54
@ BraceClose
Definition: asmpp.cpp:40
@ Comment
Definition: asmpp.cpp:34
@ KW_else
Definition: asmpp.cpp:59
@ Instruction
Definition: asmpp.cpp:82
@ KW_END
Definition: asmpp.cpp:55
@ KW_code
Definition: asmpp.cpp:49
@ KW_endprolog
Definition: asmpp.cpp:50
@ DecNumber
Definition: asmpp.cpp:35
@ Identifier
Definition: asmpp.cpp:95
@ KW_DW
Definition: asmpp.cpp:67
@ BraceOpen
Definition: asmpp.cpp:39
@ KW_RECORD
Definition: asmpp.cpp:76
@ Colon
Definition: asmpp.cpp:43
@ KW_ENDP
Definition: asmpp.cpp:75
@ Filename
Definition: asmpp.cpp:81
@ KW_EQU
Definition: asmpp.cpp:70
@ BYTE_PTR
Definition: asmpp.cpp:88
@ RegXmm
Definition: asmpp.cpp:87
@ WhiteSpace
Definition: asmpp.cpp:32
@ KW_DB
Definition: asmpp.cpp:66
@ MemRefStart
Definition: asmpp.cpp:41
@ QWORD_PTR
Definition: asmpp.cpp:91
@ KW_include
Definition: asmpp.cpp:47
@ KW_MACRO
Definition: asmpp.cpp:72
@ HexNumber
Definition: asmpp.cpp:36
@ KW_DD
Definition: asmpp.cpp:68
@ LabelName
Definition: asmpp.cpp:94
@ KW_const
Definition: asmpp.cpp:48
@ KW_TEXTEQU
Definition: asmpp.cpp:71
@ Reg8
Definition: asmpp.cpp:83
@ NewLine
Definition: asmpp.cpp:33
@ KW_ifndef
Definition: asmpp.cpp:58
@ KW_ifdef
Definition: asmpp.cpp:57
@ XMMWORD_PTR
Definition: asmpp.cpp:92
@ KW_PUBLIC
Definition: asmpp.cpp:53
@ Operator
Definition: asmpp.cpp:44
@ KW_DQ
Definition: asmpp.cpp:69
@ Eof
Definition: asmpp.cpp:31
@ KW_ALIGN
Definition: asmpp.cpp:51
@ KW_savexmm128
Definition: asmpp.cpp:64
@ DWORD_PTR
Definition: asmpp.cpp:90
@ String
Definition: asmpp.cpp:37
@ KW_endif
Definition: asmpp.cpp:60
@ KW_MASK
Definition: asmpp.cpp:78
@ KW_if
Definition: asmpp.cpp:56
@ WORD_PTR
Definition: asmpp.cpp:89
@ Invalid
Definition: asmpp.cpp:30
@ Reg16
Definition: asmpp.cpp:84
@ Reg64
Definition: asmpp.cpp:86
@ KW_ERRDEF
Definition: asmpp.cpp:79
Token get_ws(Token &&tok)
Definition: asmpp.cpp:395
const char * get_expression_operator(const string &op)
Definition: asmpp.cpp:364
IDTYPE
Definition: asmpp.cpp:304
size_t translate_identifier_construct(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:1038
size_t translate_construct_one_param(string translated, TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:923
size_t translate_record(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:933
size_t translate_macro(TokenList &tokens, size_t index)
Definition: asmpp.cpp:1280
size_t translate_instruction(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:734
bool g_processing_jmp
Definition: asmpp.cpp:299
size_t translate_mem_ref(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:576
size_t translate_instruction_param(TokenList &tokens, size_t index, const vector< string > &macro_params)
Definition: asmpp.cpp:658
#define SEPARATOR
Definition: asmpp.cpp:113
#define index(s, c)
Definition: various.h:29
size_t size() const
Definition: tokenizer.hpp:239
int type() const
Definition: tokenizer.hpp:55
std::string str() const
Definition: tokenizer.hpp:50
_Self substr(size_type __pos=0, size_type __n=npos) const
Definition: _string.h:1022
const _CharT * c_str() const
Definition: _string.h:949
size_type size() const
Definition: _string.h:400
Definition: list.h:37
#define NULL
Definition: types.h:112
UINT op
Definition: effect.c:236
MonoAssembly int argc
Definition: metahost.c:107
const WCHAR * text
Definition: package.c:1794
int CDECL tolower(int c)
Definition: ctype.c:572
int WINAPIV fprintf(FILE *file, const char *format,...)
Definition: file.c:5549
__time32_t time_t
Definition: corecrt.h:228
#define stderr
int main()
Definition: test.c:6
#define printf
Definition: freeldr.h:103
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
GLdouble s
Definition: gl.h:2039
GLuint GLuint end
Definition: gl.h:1545
GLuint buffer
Definition: glext.h:5915
GLsizeiptr size
Definition: glext.h:5919
GLintptr offset
Definition: glext.h:5920
GLuint index
Definition: glext.h:6031
GLenum GLint GLuint mask
Definition: glext.h:6028
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:10929
GLuint GLuint num
Definition: glext.h:9618
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define bits
Definition: infblock.c:15
const char * filename
Definition: ioapi.h:137
uint32_t entry
Definition: isohybrid.c:63
__u16 time
Definition: mkdosfs.c:8
static unsigned int number
Definition: dsound.c:1479
static UINT PSTR DWORD UINT * need
Definition: parser.c:36
#define argv
Definition: mplay32.c:18
Definition: features.h:417
static void skip_whitespace()
Definition: regtests2xml.c:188
const WCHAR * str
IDTYPE Type
Definition: asmpp.cpp:318
string Name
Definition: asmpp.cpp:317
Definition: fci.c:127
Definition: format.c:58
Definition: tftpd.h:60
void push_back(const _Tp &__x=_STLP_DEFAULT_CONSTRUCTED(_Tp))
Definition: _vector.h:379